From 468e00132f76a6d057da1520873e7a468ccae422 Mon Sep 17 00:00:00 2001 From: Martin Casado Date: Tue, 4 Mar 2008 13:12:53 -0800 Subject: [PATCH] Initial import --- COPYING | 367 ++++ ChangeLog | 55 + INSTALL | 156 ++ Make.vars | 17 + Makefile.am | 2 + README | 199 ++ acinclude.m4 | 42 + configure.ac | 55 + controller/.gitignore | 3 + controller/Makefile.am | 6 + controller/controller.c | 687 +++++++ datapath/.gitignore | 2 + datapath/Makefile.am | 35 + datapath/README | 0 datapath/chain.c | 161 ++ datapath/chain.h | 31 + datapath/compat.h | 17 + datapath/crc32.c | 40 + datapath/crc32.h | 22 + datapath/crc_t.c | 47 + datapath/datapath.c | 1624 +++++++++++++++++ datapath/datapath.h | 72 + datapath/datapath_t.c | 118 ++ datapath/datapath_t.h | 12 + datapath/dp_dev.c | 78 + datapath/flow.c | 311 ++++ datapath/flow.h | 131 ++ datapath/forward.c | 585 ++++++ datapath/forward.h | 33 + datapath/forward_t.c | 581 ++++++ datapath/linux-2.4/.gitignore | 19 + datapath/linux-2.4/Makefile.in | 100 + datapath/linux-2.4/README | 6 + datapath/linux-2.4/compat-2.4/TODO | 2 + datapath/linux-2.4/compat-2.4/attr.c | 436 +++++ datapath/linux-2.4/compat-2.4/compat24.c | 27 + datapath/linux-2.4/compat-2.4/compat24.h | 11 + datapath/linux-2.4/compat-2.4/genetlink.c | 810 ++++++++ .../compat-2.4/include-arm/asm/atomic.h | 56 + .../compat-2.4/include-i386/asm/atomic.h | 10 + .../compat-2.4/include-mips/asm/atomic.h | 11 + .../compat-2.4/include-mips/asm/barrier.h | 29 + .../compat-2.4/include-mips/asm/system.h | 268 +++ .../linux-2.4/compat-2.4/include/asm/system.h | 18 + .../compat-2.4/include/linux/compiler.h | 8 + .../compat-2.4/include/linux/delay.h | 59 + .../compat-2.4/include/linux/etherdevice.h | 71 + .../compat-2.4/include/linux/genetlink.h | 82 + .../linux-2.4/compat-2.4/include/linux/gfp.h | 6 + .../compat-2.4/include/linux/if_ether.h | 15 + .../compat-2.4/include/linux/if_vlan.h | 71 + .../linux-2.4/compat-2.4/include/linux/ip.h | 15 + .../linux-2.4/compat-2.4/include/linux/ipv6.h | 11 + .../compat-2.4/include/linux/jiffies.h | 10 + .../compat-2.4/include/linux/kernel.h | 43 + .../linux-2.4/compat-2.4/include/linux/list.h | 510 ++++++ .../compat-2.4/include/linux/module.h | 19 + .../compat-2.4/include/linux/mutex.h | 58 + .../compat-2.4/include/linux/netdevice.h | 31 + .../compat-2.4/include/linux/netlink.h | 78 + .../compat-2.4/include/linux/random.h | 11 + .../compat-2.4/include/linux/rcupdate.h | 205 +++ .../compat-2.4/include/linux/skbuff.h | 130 ++ .../linux-2.4/compat-2.4/include/linux/slab.h | 44 + .../compat-2.4/include/linux/sockios.h | 12 + .../compat-2.4/include/linux/spinlock.h | 8 + .../compat-2.4/include/linux/string.h | 10 + .../linux-2.4/compat-2.4/include/linux/tcp.h | 25 + .../compat-2.4/include/linux/timer.h | 96 + .../compat-2.4/include/linux/types.h | 49 + .../linux-2.4/compat-2.4/include/linux/udp.h | 15 + .../compat-2.4/include/net/checksum.h | 11 + .../compat-2.4/include/net/genetlink.h | 252 +++ .../compat-2.4/include/net/llc_pdu.h | 11 + .../compat-2.4/include/net/netlink.h | 1014 ++++++++++ datapath/linux-2.4/compat-2.4/kernel.c | 27 + datapath/linux-2.4/compat-2.4/netlink.c | 116 ++ datapath/linux-2.4/compat-2.4/random32.c | 142 ++ datapath/linux-2.4/compat-2.4/rcupdate.c | 145 ++ datapath/linux-2.4/compat-2.4/string.c | 30 + .../linux-2.4/config/config-linux-2.4.35-kvm | 600 ++++++ datapath/linux-2.4/kbuild.inc | 246 +++ datapath/linux-2.4/kernel-src.inc.in | 1 + datapath/linux-2.6-uml/.gitignore | 15 + datapath/linux-2.6-uml/Makefile.in | 51 + .../config/config-linux-2.6.23-rc5-kvm | 896 +++++++++ datapath/linux-2.6-uml/kbuild.inc | 210 +++ datapath/linux-2.6/.gitignore | 18 + datapath/linux-2.6/Makefile.in | 65 + datapath/linux-2.6/compat-2.6/compat26.h | 25 + datapath/linux-2.6/compat-2.6/genetlink.c | 15 + .../linux-2.6/compat-2.6/include/linux/ip.h | 20 + .../linux-2.6/compat-2.6/include/linux/ipv6.h | 20 + .../compat-2.6/include/linux/lockdep.h | 450 +++++ .../compat-2.6/include/linux/mutex.h | 59 + .../compat-2.6/include/linux/netlink.h | 22 + .../compat-2.6/include/linux/random.h | 17 + .../compat-2.6/include/linux/skbuff.h | 63 + .../linux-2.6/compat-2.6/include/linux/tcp.h | 18 + .../compat-2.6/include/linux/timer.h | 90 + .../compat-2.6/include/linux/types.h | 14 + .../linux-2.6/compat-2.6/include/linux/udp.h | 19 + .../compat-2.6/include/net/checksum.h | 16 + .../compat-2.6/include/net/genetlink.h | 123 ++ datapath/linux-2.6/compat-2.6/random32.c | 146 ++ .../config/config-linux-2.6.23-rc9-kvm | 1408 ++++++++++++++ datapath/linux-2.6/kbuild.inc | 214 +++ datapath/linux-2.6/kernel-src.inc.in | 1 + datapath/run-unit-tests | 64 + datapath/snap.h | 32 + datapath/t/.gitignore | 4 + datapath/table-hash.c | 466 +++++ datapath/table-linear.c | 202 ++ datapath/table-mac.c | 272 +++ datapath/table.h | 74 + datapath/table_t.c | 879 +++++++++ datapath/tests/.gitignore | 4 + datapath/tests/Makefile.am | 7 + datapath/tests/example.pcap | Bin 0 -> 82 bytes datapath/tests/gen_forward_t.pl | 80 + datapath/tests/ofp_pcap.c | 97 + datapath/tests/ofp_pcap.h | 64 + datapath/unit-exports.c | 26 + datapath/unit.c | 100 + datapath/unit.h | 21 + include/.gitignore | 2 + include/Makefile.am | 25 + include/buffer.h | 63 + include/command-line.h | 30 + include/compiler.h | 32 + include/dpif.h | 55 + include/dynamic-string.h | 45 + include/fatal-signal.h | 28 + include/fault.h | 28 + include/flow.h | 33 + include/hash.h | 12 + include/ip.h | 11 + include/list.h | 53 + include/mac.h | 41 + include/netlink.h | 148 ++ include/ofp-print.h | 48 + include/openflow-netlink.h | 83 + include/openflow.h | 388 ++++ include/packets.h | 136 ++ include/socket-util.h | 31 + include/util.h | 84 + include/vconn.h | 154 ++ include/vlog-socket.h | 42 + include/vlog.h | 100 + include/xtoxll.h | 19 + lib/.gitignore | 2 + lib/Makefile.am | 27 + lib/buffer.c | 192 ++ lib/command-line.c | 53 + lib/dpif.c | 519 ++++++ lib/dynamic-string.c | 98 + lib/fatal-signal.c | 181 ++ lib/fault.c | 77 + lib/flow.c | 122 ++ lib/hash.c | 13 + lib/list.c | 123 ++ lib/netlink.c | 908 +++++++++ lib/ofp-print.c | 471 +++++ lib/socket-util.c | 65 + lib/util.c | 195 ++ lib/vconn-netlink.c | 126 ++ lib/vconn-tcp.c | 370 ++++ lib/vconn.c | 289 +++ lib/vlog-socket.c | 504 +++++ lib/vlog.c | 309 ++++ man/.gitignore | 2 + man/Makefile.am | 1 + man/man1/.gitignore | 2 + man/man8/.gitignore | 2 + man/man8/Makefile.am | 1 + man/man8/controller.8 | 69 + man/man8/dpctl.8 | 141 ++ man/man8/secchan.8 | 47 + man/man8/vlogconf.8 | 45 + secchan/.gitignore | 6 + secchan/Makefile.am | 6 + secchan/secchan.c | 256 +++ third-party/.gitignore | 2 + third-party/Makefile.am | 1 + third-party/README | 35 + third-party/ofp-tcpdump.patch | 119 ++ utilities/.gitignore | 4 + utilities/Makefile.am | 12 + utilities/dpctl.c | 519 ++++++ utilities/vlogconf.c | 185 ++ 190 files changed, 25756 insertions(+) create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Make.vars create mode 100644 Makefile.am create mode 100644 acinclude.m4 create mode 100644 configure.ac create mode 100644 controller/.gitignore create mode 100644 controller/Makefile.am create mode 100644 controller/controller.c create mode 100644 datapath/.gitignore create mode 100644 datapath/Makefile.am create mode 100644 datapath/README create mode 100644 datapath/chain.c create mode 100644 datapath/chain.h create mode 100644 datapath/compat.h create mode 100644 datapath/crc32.c create mode 100644 datapath/crc32.h create mode 100644 datapath/crc_t.c create mode 100644 datapath/datapath.c create mode 100644 datapath/datapath.h create mode 100644 datapath/datapath_t.c create mode 100644 datapath/datapath_t.h create mode 100644 datapath/dp_dev.c create mode 100644 datapath/flow.c create mode 100644 datapath/flow.h create mode 100644 datapath/forward.c create mode 100644 datapath/forward.h create mode 100644 datapath/forward_t.c create mode 100644 datapath/linux-2.4/.gitignore create mode 100644 datapath/linux-2.4/Makefile.in create mode 100644 datapath/linux-2.4/README create mode 100644 datapath/linux-2.4/compat-2.4/TODO create mode 100644 datapath/linux-2.4/compat-2.4/attr.c create mode 100644 datapath/linux-2.4/compat-2.4/compat24.c create mode 100644 datapath/linux-2.4/compat-2.4/compat24.h create mode 100644 datapath/linux-2.4/compat-2.4/genetlink.c create mode 100644 datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h create mode 100644 datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h create mode 100644 datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h create mode 100644 datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h create mode 100644 datapath/linux-2.4/compat-2.4/include-mips/asm/system.h create mode 100644 datapath/linux-2.4/compat-2.4/include/asm/system.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/compiler.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/delay.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/genetlink.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/gfp.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/if_ether.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/ip.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/ipv6.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/jiffies.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/kernel.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/list.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/module.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/mutex.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/netdevice.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/netlink.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/random.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/skbuff.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/slab.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/sockios.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/spinlock.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/string.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/tcp.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/timer.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/types.h create mode 100644 datapath/linux-2.4/compat-2.4/include/linux/udp.h create mode 100644 datapath/linux-2.4/compat-2.4/include/net/checksum.h create mode 100644 datapath/linux-2.4/compat-2.4/include/net/genetlink.h create mode 100644 datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h create mode 100644 datapath/linux-2.4/compat-2.4/include/net/netlink.h create mode 100644 datapath/linux-2.4/compat-2.4/kernel.c create mode 100644 datapath/linux-2.4/compat-2.4/netlink.c create mode 100644 datapath/linux-2.4/compat-2.4/random32.c create mode 100644 datapath/linux-2.4/compat-2.4/rcupdate.c create mode 100644 datapath/linux-2.4/compat-2.4/string.c create mode 100644 datapath/linux-2.4/config/config-linux-2.4.35-kvm create mode 100644 datapath/linux-2.4/kbuild.inc create mode 100644 datapath/linux-2.4/kernel-src.inc.in create mode 100644 datapath/linux-2.6-uml/.gitignore create mode 100644 datapath/linux-2.6-uml/Makefile.in create mode 100644 datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm create mode 100644 datapath/linux-2.6-uml/kbuild.inc create mode 100644 datapath/linux-2.6/.gitignore create mode 100644 datapath/linux-2.6/Makefile.in create mode 100644 datapath/linux-2.6/compat-2.6/compat26.h create mode 100644 datapath/linux-2.6/compat-2.6/genetlink.c create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/ip.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/ipv6.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/lockdep.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/mutex.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/netlink.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/random.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/skbuff.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/tcp.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/timer.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/types.h create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/udp.h create mode 100644 datapath/linux-2.6/compat-2.6/include/net/checksum.h create mode 100644 datapath/linux-2.6/compat-2.6/include/net/genetlink.h create mode 100644 datapath/linux-2.6/compat-2.6/random32.c create mode 100644 datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm create mode 100644 datapath/linux-2.6/kbuild.inc create mode 100644 datapath/linux-2.6/kernel-src.inc.in create mode 100755 datapath/run-unit-tests create mode 100644 datapath/snap.h create mode 100644 datapath/t/.gitignore create mode 100644 datapath/table-hash.c create mode 100644 datapath/table-linear.c create mode 100644 datapath/table-mac.c create mode 100644 datapath/table.h create mode 100644 datapath/table_t.c create mode 100644 datapath/tests/.gitignore create mode 100644 datapath/tests/Makefile.am create mode 100644 datapath/tests/example.pcap create mode 100755 datapath/tests/gen_forward_t.pl create mode 100644 datapath/tests/ofp_pcap.c create mode 100644 datapath/tests/ofp_pcap.h create mode 100644 datapath/unit-exports.c create mode 100644 datapath/unit.c create mode 100644 datapath/unit.h create mode 100644 include/.gitignore create mode 100644 include/Makefile.am create mode 100644 include/buffer.h create mode 100644 include/command-line.h create mode 100644 include/compiler.h create mode 100644 include/dpif.h create mode 100644 include/dynamic-string.h create mode 100644 include/fatal-signal.h create mode 100644 include/fault.h create mode 100644 include/flow.h create mode 100644 include/hash.h create mode 100644 include/ip.h create mode 100644 include/list.h create mode 100644 include/mac.h create mode 100644 include/netlink.h create mode 100644 include/ofp-print.h create mode 100644 include/openflow-netlink.h create mode 100644 include/openflow.h create mode 100644 include/packets.h create mode 100644 include/socket-util.h create mode 100644 include/util.h create mode 100644 include/vconn.h create mode 100644 include/vlog-socket.h create mode 100644 include/vlog.h create mode 100644 include/xtoxll.h create mode 100644 lib/.gitignore create mode 100644 lib/Makefile.am create mode 100644 lib/buffer.c create mode 100644 lib/command-line.c create mode 100644 lib/dpif.c create mode 100644 lib/dynamic-string.c create mode 100644 lib/fatal-signal.c create mode 100644 lib/fault.c create mode 100644 lib/flow.c create mode 100644 lib/hash.c create mode 100644 lib/list.c create mode 100644 lib/netlink.c create mode 100644 lib/ofp-print.c create mode 100644 lib/socket-util.c create mode 100644 lib/util.c create mode 100644 lib/vconn-netlink.c create mode 100644 lib/vconn-tcp.c create mode 100644 lib/vconn.c create mode 100644 lib/vlog-socket.c create mode 100644 lib/vlog.c create mode 100644 man/.gitignore create mode 100644 man/Makefile.am create mode 100644 man/man1/.gitignore create mode 100644 man/man8/.gitignore create mode 100644 man/man8/Makefile.am create mode 100644 man/man8/controller.8 create mode 100644 man/man8/dpctl.8 create mode 100644 man/man8/secchan.8 create mode 100644 man/man8/vlogconf.8 create mode 100644 secchan/.gitignore create mode 100644 secchan/Makefile.am create mode 100644 secchan/secchan.c create mode 100644 third-party/.gitignore create mode 100644 third-party/Makefile.am create mode 100644 third-party/README create mode 100644 third-party/ofp-tcpdump.patch create mode 100644 utilities/.gitignore create mode 100644 utilities/Makefile.am create mode 100644 utilities/dpctl.c create mode 100644 utilities/vlogconf.c diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..76ecbb77 --- /dev/null +++ b/COPYING @@ -0,0 +1,367 @@ +All source files are Copyright (C) 2007 Board of Trustees, Leland +Stanford Jr. University and covered by the following licence. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Files in the datapath/ and its sub-directories are covered under the GNU +General Public License Version 2. Included below: + + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..b8bcaa3d --- /dev/null +++ b/ChangeLog @@ -0,0 +1,55 @@ +v0.1.8 - 03 Mar 2008 +-------------------- + - Added support for cross-compilation. + - Various bug fixes and tweaks + +v0.1.7 - 07 Feb 2008 +-------------------- + - Allow permanent flow entries to be set + - Added patch for tcpdump that allows parsing of OpenFlow messages + - Various bug fixes and tweaks + +v0.1.6 - 05 Feb 2008 +-------------------- + - Added support for Linux 2.6.24 + - Set nwsrc/nwdst fields in flow structs on ARP packets + - Various bug fixes and tweaks + +v0.1.5 - 17 Jan 2008 +-------------------- + - Added support for Linux 2.4.20 + - Added support for GCC 2.95 + +v0.1.4 - 15 Jan 2008 +-------------------- + - Decode and print port_status messages + - Fixed build problems on big-endian systems + - Fixed compatibility for older 2.6 kernels + - Various bug fixes and tweaks + +v0.1.3 - 08 Jan 2008 +-------------------- + - Added support for flow expiration messages + - Decode and print all datapath-generated messages in dpctl's "monitor" + - Added "--noflow" option to controller + - Various bug fixes and tweaks + +v0.1.2 - 07 Jan 2008 +-------------------- + - Fixed distribution to include ofp_pcap.h + - Removed autoconf C++ checks + +v0.1.1 - 18 Dec 2007 +-------------------- + - Fixed support for Linux 2.4.35 and 2.6.22 + - Added support for Linux 2.6.15 + - Added "vlogconf" utility to modify logging configuration + - Added better support for SNAP headers + - Improved printing of flow information in dpctl + - Made kernel code consistently use tabs instead of spaces + - Removed libpcap requirement for building + - Various bug fixes and tweaks + +v0.1.0 - 30 Nov 2007 +-------------------- + - Initial release diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..e10aec69 --- /dev/null +++ b/INSTALL @@ -0,0 +1,156 @@ + Installation Instructions for OpenFlow Reference Release v0.1.5 + +This document describes how to build, install, and execute the v0.1.5 +reference implementation of OpenFlow. Please send any comments to: + + + +Setting up the Kernel Build Environment +--------------------------------------- + +The datapath kernel module must be compiled against a kernel build +directory for the Linux version the module is to run on. The datapath +module has been mainly tested on Linux 2.6.23. Support for Linux 2.4 +is also in place, although it has only been lightly tested under 2.4.35. + +For example, if compiling on Debian or Ubuntu, the Linux headers +and image packages must be installed (apt-get install +linux-headers- linux-image-). + +Note: the OpenFlow datapath requires that bridging support has been +configured in the kernel, but not enabled or in use. If the bridge +module is running (check with "lsmod | grep bridge"), you must remove +it ("rmmod bridge") before starting the datapath. + +Building the Code +----------------- + +1. In the top source directory, configure the package, passing the + location of the kernel build directory as an argument. Use + --with-l26 for Linux 2.6, --with-l24 for Linux 2.4: + + For example, if compiling for a running instance of Linux 2.6: + % ./configure --with-l26=/lib/modules/`uname -r`/build + + Or if compiling for a running instance of Linux 2.4: + % ./configure --with-l24=/lib/modules/`uname -r`/build + + To use a specific C compiler for compiling OpenFlow user programs, + also specify it on the configure command line, like so: + % ./configure CC=gcc-4.2 + +2. Run make in the top source directory: + + % make + + The following binaries will be built: + + Datapath kernel module: + ./datapath/linux-2.6/openflow_mod.ko (If compiling for Linux 2.6) + ./datapath/linux-2.4/openflow_mod.o (If compiling for Linux 2.4) + + Secure channel executable: + ./secchan/secchan + + Controller executable: + ./controller/controller + + Datapath administration utility: + ./utilities/dpctl + +3. (Optional) Run "make install" to install the executables and + manpages into the running system, by default under /usr/local. + +Installing the datapath +----------------------- + +To run the module, simply insmod it: + + (Linux 2.6) + % insmod datapath/linux-2.6/openflow_mod.ko + + (Linux 2.4) + % insmod datapath/linux-2.4/compat24_mod.o + % insmod datapath/linux-2.4/openflow_mod.o + + +Testing the datapath +-------------------- + +Once the OpenFlow datapath has been installed (you can verify that it is +running if it appears in lsmod's listing), you can configure it using +the dpctl command line utility. + +1. Create a datapath instance. The command below creates a datapath with + ID 0 (see dpctl(8) for more detailed usage information). + + % dpctl adddp 0 + + (note, while in principle openflow_mod supports multiple datapaths + within the same host, this is rarely useful in practice) + +2. Use dpctl to attach the datapath to physical interfaces on the + machine. Say, for example, you want to create a trivial 2-port + switch using interfaces eth1 and eth2, you would issue the following + commands: + + % dpctl addif 0 eth1 + % dpctl addif 0 eth2 + + You can verify that the interfaces were successfully added by asking + dpctl to print the current status of datapath 0: + + % dpctl show 0 + +3. (Optional) You can manually add flows to the datapath to test using + dpctl add-flows and view them using dpctl dump-flows. See dpctl(8) + for more details. + +4. The simplest way to test the datapath is to run the provided sample + controller on the host machine to manage the datapath directly using + netlink. + + % controller -v nl:0 + + Once the controller is running, the datapath should operate like a + learning Ethernet switch. You may monitor the flows in the datapath + flow table using "dpctl dump-flows" command. + +Running the datapath with a remote controller +--------------------------------------------- + +1. Start the datapath and attach it to two or more physical ports as + described in the previous section. + + Note: The current version of the secure channel and controller + require at least one interface not be connected to the datapath + to be functional. This interface will be used for communication + between the secure channel and the controller. Future releases will + support in-band control communication. + +2. Run the controller in passive tcp mode on the host which will act as + the controller. In the example below, the controller will bind to + port 975 (the default) awaiting connections from secure channels. + + % controller -v ptcp: + + (See controller(8) for more details) + + Make sure the machine hosting the controller is reachable by the switch. + +3. Run secchan on the datapath host to start the secure channel + connecting the datapath to a remote controller. (See secchan(8) + for usage details). The channel should be configured to connect to + the controller's IP address on the port configured in step 2. + + If the controller is running on host 192.168.1.2 port 975 (the + default port) and the datapath ID is 0, the secchan invocation + would look like: + + % secchan -v nl:0 tcp:192.168.1.2 + +Bug Reporting +------------- + +Please report problems to: +info@openflowswitch.org diff --git a/Make.vars b/Make.vars new file mode 100644 index 00000000..9b8b342f --- /dev/null +++ b/Make.vars @@ -0,0 +1,17 @@ +# -*- makefile -*- + +if HAVE_NETLINK +AM_CPPFLAGS = -DHAVE_NETLINK=1 +endif + +COMMON_FLAGS = -DVERSION=\"$(VERSION)\" +if NDEBUG +COMMON_FLAGS += -DNDEBUG -fomit-frame-pointer +endif + +AM_CFLAGS = $(COMMON_FLAGS) +AM_CFLAGS += -Wstrict-prototypes -I $(top_srcdir)/include + +if !NDEBUG +AM_LDFLAGS = -export-dynamic +endif diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000..aee1a9dc --- /dev/null +++ b/Makefile.am @@ -0,0 +1,2 @@ +AUTOMAKE_OPTIONS=foreign +SUBDIRS = lib datapath secchan controller utilities man include third-party diff --git a/README b/README index e69de29b..b5268917 100644 --- a/README +++ b/README @@ -0,0 +1,199 @@ + OpenFlow Reference Release v0.1.5 + +What is OpenFlow? +----------------- + +OpenFlow is a flow-based switch specification designed to enable +researchers to run experiments in live networks. OpenFlow is based on a +simple Ethernet flow switch that exposes a standardized interface for +adding and removing flow entries. + +An OpenFlow Switch consists of three parts: (1) A Flow Table in which +each flow entry is associated with an action telling the switch how to +process the flow, (2) A Secure Channel connecting the switch to a remote +process (a controller), allowing commands and packets to be sent between +the controller and the switch, and (3) An OpenFlow Protocol +implementation, providing an open and standard way for a controller to +talk to the switch. + +An OpenFlow Switch can thus serve as a simple datapath element that +forwards packets between ports according to flow actions defined by +the controller using OpenFlow commands. Example actions are: + + - Forward this flow's packets to the given port(s) + - Drop this flow's packets + - Encapsulate and forward this flow's packets to the controller. + +The OpenFlow Switch is defined in detail in the OpenFlow Switch +Specification [2]. + +What's here? +------------ + +This software is a reference implementation of an OpenFlow Switch kernel +module for the Linux operating system, a secure channel implementation, +and an example controller that performs switching with MAC learning. + +The rest of this file contains the following sections: + + - Description of the directory hierarchy + - Platform support + - Quickstart build and install instructions + - Shortcomings + - References + +Directory Hierarchy +------------------- + + Source: + + datapath/ Linux kernel module implementing an OpenFlow Flow Table + that incoming packets are checked against. The + kernel module uses netlink (a socket protocol for + user-kernel communication, described in RFC 3549) to + pass OpenFlow messages with the secure channel to be + relayed to the controller. + + secchan/ A Secure Channel that connects to a kernel datapath + via netlink and a remote controller via TCP, + relaying OpenFlow packets received on one end to the + other. (The current implementation does not + support SSL, but this will be added in future releases.) + + controller/ A simple controller that connects to a datapath via + a Secure Channel, commanding the datapath to act as + a regular MAC learning switch. + + utilities/ Contains the sorce for "dpctl", a command-line utility + for controlling the OpenFlow datapath kernel module. + With it, you can add physical ports to the datapath, + add flows, monitor received packets, and query the + datapath state. + + include/ Header files common to the datapath, secure channel, + and controller components. + + lib/ Implementation files common to the datapath, secure + channel, and controller components. + + third-party/ Contains third-party software that may be useful for + debugging. Currently, it only contains a patch to + allow tcpdump to parse OpenFlow messages. + + Documentation: + + README Text file describing this OpenFlow implementation, + aka this document. + + INSTALLATION Detailed configure, build, and installation + instructions + + man/ Man pages describing how to administer datapath, + secure channel, and controller. + +Platform support +---------------- + + The datapath kernel module supports Linux 2.6.15 and above, however, + testing has focused on Linux 2.6.23. Support for Linux 2.4.20 and + above is also in place, although testing has focused on Linux 2.6. + + Components have been built and tested on Debian and Ubuntu. + + If you are able to build/run the code on platforms not mentioned + here, or have problems with supported system, please report your + experiences to: + + + + GCC is required for compilation. + + +Building and Installing (Quick Start) +------------------------------------- + + Building the datapath module requires that the source for the + currently running Linux kernel be installed on the machine and + been configured. + + The following instructions assume the Linux 2.6 source is located in + /usr/src/linux-2.6.23 and Linux 2.4 in /usr/src/linux-2.4.35 + + 1. ./configure the package, passing the location of one or more + kernel source trees on the command line: + + For example, if compiling for Linux 2.6: + % ./configure --with-l26=/usr/src/linux-2.6.23 + + Or compiling for Linux 2.4: + % ./configure --with-l24=/usr/src/linux-2.4.35 + + 2. Run make: + + % make + + The following binaries should be built. + + Datapath kernel module: + ./datapath/linux-2.6/openflow_mod.ko (If compiling for Linux 2.6) + ./datapath/linux-2.4/openflow_mod.o (If compiling for Linux 2.4) + + Secure channel executable: + ./secchan/secchan + + Controller executable: + ./controller/controller + + dpctl utility: + ./utility/dpctl + + 3. Optionally you can "make install" to install binaries and the + man pages (/usr/local/ is the default prefix). If you just want + access to the man pages without installing, set your MANPATH to + include the openflow/ source root. + + 4. Insert the datapath kernel module into the running Linux instance. + + (Linux 2.6) + % insmod datapath/linux-2.6/openflow_mod.ko + + (Linux 2.4) + % insmod datapath/linux-2.4/openflow_mod.o + + 5. Create datapaths by running dpctl on the Linux host (see man + dpctl(8)). Start the controller on a remote host with + controller (see man controller(8)). Start the Secure Channel + on the datapath host to connect the datapath to the controller + with secchan(see man secchan(8)). + + For more detailed installation instructions, refer to [3]. + + +Bugs/Shortcomings +----------------- + +- The current flowtable does not support all statistics messages + mentioned in the Type 0 OpenFlow spec +- The secure channel and sample controller don't support SSL +- The flowtable does not support the "normal processing" action +- Configure/build system does not support separate build directory for + the datapath. ./configure must be run from the source root. +- dpctl dump-flows may freeze when large numbers of flows are in the + flow table. This has no affect on the datapath + +References +---------- + + [1] OpenFlow: Enabling Innovation in College Networks. Whitepaper. + + + [2] OpenFlow Switch Specification. + + + [3] Installation Instructions: INSTALL + +Contact +------- + +e-mail: info@openflowswitch.org +www: http://openflowswitch.org/alpha/ diff --git a/acinclude.m4 b/acinclude.m4 new file mode 100644 index 00000000..1cca91a7 --- /dev/null +++ b/acinclude.m4 @@ -0,0 +1,42 @@ +dnl ================================================================================= +dnl Distributed under the terms of the GNU GPL version 2. +dnl Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University +dnl ================================================================================= + + +dnl -- +dnl CHECK_LINUX(OPTION, VERSION, VARIABLE, CONDITIONAL) +dnl +dnl Configure linux kernel source tree +dnl -- +AC_DEFUN([CHECK_LINUX], [ + AC_ARG_WITH([$1], + [AC_HELP_STRING([--with-$1=/path/to/linux-$3], + [Specify the linux $3 kernel sources])], + [path="$withval"], [path=])dnl + if test -n "$path"; then + path=`eval echo "$path"` + + AC_MSG_CHECKING([for $path directory]) + if test -d "$path"; then + AC_MSG_RESULT([yes]) + $4=$path + AC_SUBST($4) + else + AC_MSG_RESULT([no]) + AC_ERROR([source dir $path doesn't exist]) + fi + + AC_MSG_CHECKING([for $path kernel version]) + version=`grep '^PATCHLEVEL = ' "$path/Makefile" | sed 's/PATCHLEVEL = '//` + AC_MSG_RESULT([2.$version]) + if test "2.$version" != '$3'; then + AC_ERROR([Linux kernel source in $path is not version $3]) + fi + if ! test -e "$path"/include/linux/version.h || \ + ! test -e "$path"/include/linux/autoconf.h; then + AC_MSG_ERROR([Linux kernel source in $path is not configured]) + fi + fi + AM_CONDITIONAL($5, test -n "$path") +]) diff --git a/configure.ac b/configure.ac new file mode 100644 index 00000000..4ec1972a --- /dev/null +++ b/configure.ac @@ -0,0 +1,55 @@ +AC_PREREQ(2.59) +AC_INIT(openflow, v0.1.8, info@openflowswitch.org) +AM_INIT_AUTOMAKE + +AC_PROG_CC +AC_PROG_CPP + +AC_USE_SYSTEM_EXTENSIONS + +AC_PROG_LIBTOOL + +AC_ARG_ENABLE( + [ndebug], + [AC_HELP_STRING([--enable-ndebug], + [Disable debugging features for max performance])], + [case "${enableval}" in # ( + yes) ndebug=true ;; # ( + no) ndebug=false ;; # ( + *) AC_MSG_ERROR([bad value ${enableval} for --enable-ndebug]) ;; + esac], + [ndebug=false]) +AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue]) + +CHECK_LINUX(l26, 2.6, 2.6, KSRC26, L26_ENABLED) +CHECK_LINUX(uml, 2.6, 2.6-uml, KSRCUML, UML_ENABLED) +CHECK_LINUX(l24, 2.4, 2.4, KSRC24, L24_ENABLED) + +AC_CHECK_HEADER([linux/netlink.h], + [HAVE_NETLINK=yes], + [HAVE_NETLINK=no], + [#include ]) +AM_CONDITIONAL([HAVE_NETLINK], [test "$HAVE_NETLINK" = yes]) + +AC_CHECK_LIB([socket], [connect]) +AC_CHECK_LIB([resolv], [gethostbyname]) +AC_CHECK_LIB([dl], [dladdr]) + +CFLAGS="$CFLAGS -Wall -Wno-sign-compare" + +AC_CONFIG_FILES([Makefile +man/Makefile +man/man8/Makefile +datapath/Makefile +lib/Makefile +include/Makefile +controller/Makefile +utilities/Makefile +secchan/Makefile +datapath/tests/Makefile +third-party/Makefile +datapath/linux-2.6/Makefile +datapath/linux-2.6-uml/Makefile +datapath/linux-2.4/Makefile]) + +AC_OUTPUT diff --git a/controller/.gitignore b/controller/.gitignore new file mode 100644 index 00000000..14ba0495 --- /dev/null +++ b/controller/.gitignore @@ -0,0 +1,3 @@ +/Makefile +/Makefile.in +/controller diff --git a/controller/Makefile.am b/controller/Makefile.am new file mode 100644 index 00000000..1a22d525 --- /dev/null +++ b/controller/Makefile.am @@ -0,0 +1,6 @@ +include ../Make.vars + +bin_PROGRAMS = controller + +controller_SOURCES = controller.c +controller_LDADD = ../lib/libopenflow.la diff --git a/controller/controller.c b/controller/controller.c new file mode 100644 index 00000000..8e22feab --- /dev/null +++ b/controller/controller.c @@ -0,0 +1,687 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "command-line.h" +#include "compiler.h" +#include "fault.h" +#include "flow.h" +#include "hash.h" +#include "list.h" +#include "mac.h" +#include "ofp-print.h" +#include "openflow.h" +#include "time.h" +#include "util.h" +#include "vconn.h" +#include "vlog-socket.h" +#include "xtoxll.h" + +#include "vlog.h" +#define THIS_MODULE VLM_controller + +#define MAX_SWITCHES 16 +#define MAX_TXQ 128 + +struct switch_ { + char *name; + struct vconn *vconn; + struct pollfd *pollfd; + + uint64_t datapath_id; + time_t last_control_hello; + + int n_txq; + struct buffer *txq, *tx_tail; +}; + +/* -H, --hub: Use dumb hub instead of learning switch? */ +static bool hub = false; + +/* -n, --noflow: Pass traffic, but don't setup flows in switch */ +static bool noflow = false; + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static struct switch_ *connect_switch(const char *name); +static struct switch_ *new_switch(const char *name, struct vconn *); +static void close_switch(struct switch_ *); + +static void queue_tx(struct switch_ *, struct buffer *); + +static void send_control_hello(struct switch_ *); + +static int do_switch_recv(struct switch_ *this); +static int do_switch_send(struct switch_ *this); + +static void process_packet(struct switch_ *, struct buffer *); +static void process_hub(struct switch_ *, struct ofp_packet_in *); +static void process_noflow(struct switch_ *, struct ofp_packet_in *); + +static void switch_init(void); +static void process_switch(struct switch_ *, struct ofp_packet_in *); + +int +main(int argc, char *argv[]) +{ + struct switch_ *switches[MAX_SWITCHES]; + struct pollfd pollfds[MAX_SWITCHES + 1]; + struct vlog_server *vlog_server; + int n_switches; + int retval; + int i; + + set_program_name(argv[0]); + register_fault_handlers(); + vlog_init(); + parse_options(argc, argv); + + if (!hub && !noflow) { + switch_init(); + } + + if (argc - optind < 1) { + fatal(0, "at least one vconn argument required; use --help for usage"); + } + + retval = vlog_server_listen(NULL, &vlog_server); + if (retval) { + fatal(retval, "Could not listen for vlog connections"); + } + + n_switches = 0; + for (i = 0; i < argc - optind; i++) { + struct switch_ *this = connect_switch(argv[optind + i]); + if (this) { + if (n_switches >= MAX_SWITCHES) { + fatal(0, "max %d switch connections", n_switches); + } + switches[n_switches++] = this; + } + } + if (n_switches == 0) { + fatal(0, "could not connect to any switches"); + } + + while (n_switches > 0) { + int retval; + + /* Wait until there's something to do. */ + for (i = 0; i < n_switches; i++) { + struct switch_ *this = switches[i]; + int want; + + if (vconn_is_passive(this->vconn)) { + want = n_switches < MAX_SWITCHES ? WANT_ACCEPT : 0; + } else { + want = WANT_RECV; + if (this->n_txq) { + want |= WANT_SEND; + } + } + + this->pollfd = &pollfds[i]; + this->pollfd->fd = -1; + this->pollfd->events = 0; + vconn_prepoll(this->vconn, want, this->pollfd); + } + if (vlog_server) { + pollfds[n_switches].fd = vlog_server_get_fd(vlog_server); + pollfds[n_switches].events = POLLIN; + } + do { + retval = poll(pollfds, n_switches + (vlog_server != NULL), -1); + } while (retval < 0 && errno == EINTR); + if (retval <= 0) { + fatal(retval < 0 ? errno : 0, "poll"); + } + + /* Let each connection deal with any pending operations. */ + for (i = 0; i < n_switches; i++) { + struct switch_ *this = switches[i]; + vconn_postpoll(this->vconn, &this->pollfd->revents); + if (this->pollfd->revents & POLLERR) { + this->pollfd->revents |= POLLIN | POLLOUT; + } + } + if (vlog_server && pollfds[n_switches].revents) { + vlog_server_poll(vlog_server); + } + + for (i = 0; i < n_switches; ) { + struct switch_ *this = switches[i]; + + if (this->pollfd) { + retval = 0; + if (vconn_is_passive(this->vconn)) { + if (this->pollfd->revents & POLLIN) { + struct vconn *new_vconn; + while (n_switches < MAX_SWITCHES + && (retval = vconn_accept(this->vconn, + &new_vconn)) == 0) { + switches[n_switches++] = new_switch("tcp", + new_vconn); + } + } + } else { + bool may_read = this->pollfd->revents & POLLIN; + bool may_write = this->pollfd->revents & POLLOUT; + if (may_read) { + retval = do_switch_recv(this); + if (!retval || retval == EAGAIN) { + retval = 0; + + /* Enable writing to avoid round trip through poll + * in common case. */ + may_write = true; + } + } + while ((!retval || retval == EAGAIN) && may_write) { + retval = do_switch_send(this); + may_write = !retval; + } + } + + if (retval && retval != EAGAIN) { + close_switch(this); + switches[i] = switches[--n_switches]; + continue; + } + } else { + /* New switch that hasn't been polled yet. */ + } + i++; + } + } + + return 0; +} + +static int +do_switch_recv(struct switch_ *this) +{ + struct buffer *msg; + int retval; + + retval = vconn_recv(this->vconn, &msg); + if (!retval) { + process_packet(this, msg); + buffer_delete(msg); + } + return retval; +} + +static int +do_switch_send(struct switch_ *this) +{ + int retval = 0; + if (this->n_txq) { + struct buffer *next = this->txq->next; + + retval = vconn_send(this->vconn, this->txq); + if (retval) { + return retval; + } + + this->txq = next; + if (this->txq == NULL) { + this->tx_tail = NULL; + } + this->n_txq--; + return 0; + } + return EAGAIN; +} + +struct switch_ * +connect_switch(const char *name) +{ + struct vconn *vconn; + int retval; + + retval = vconn_open(name, &vconn); + if (retval) { + VLOG_ERR("%s: connect: %s", name, strerror(retval)); + return NULL; + } + + return new_switch(name, vconn); +} + +static struct switch_ * +new_switch(const char *name, struct vconn *vconn) +{ + struct switch_ *this = xmalloc(sizeof *this); + memset(this, 0, sizeof *this); + this->name = xstrdup(name); + this->vconn = vconn; + this->pollfd = NULL; + this->n_txq = 0; + this->txq = NULL; + this->tx_tail = NULL; + this->last_control_hello = 0; + if (!vconn_is_passive(vconn)) { + send_control_hello(this); + } + return this; +} + +static void +close_switch(struct switch_ *this) +{ + if (this) { + struct buffer *cur, *next; + + free(this->name); + vconn_close(this->vconn); + for (cur = this->txq; cur != NULL; cur = next) { + next = cur->next; + buffer_delete(cur); + } + free(this); + } +} + +static void +send_control_hello(struct switch_ *this) +{ + time_t now = time(0); + if (now >= this->last_control_hello + 1) { + struct buffer *b; + struct ofp_control_hello *och; + + b = buffer_new(0); + och = buffer_put_uninit(b, sizeof *och); + memset(och, 0, sizeof *och); + och->header.version = OFP_VERSION; + och->header.length = htons(sizeof *och); + + och->version = htonl(OFP_VERSION); + och->flags = htons(OFP_CHELLO_SEND_FLOW_EXP); + och->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); + queue_tx(this, b); + + this->last_control_hello = now; + } +} + +static void +check_txq(struct switch_ *this UNUSED) +{ +#if 0 + struct buffer *iter; + size_t n; + + assert(this->n_txq == 0 + ? this->txq == NULL && this->tx_tail == NULL + : this->txq != NULL && this->tx_tail != NULL); + + n = 0; + for (iter = this->txq; iter != NULL; iter = iter->next) { + n++; + assert((iter->next != NULL) == (iter != this->tx_tail)); + } + assert(n == this->n_txq); +#endif +} + +static void +queue_tx(struct switch_ *this, struct buffer *b) +{ + check_txq(this); + + b->next = NULL; + if (this->n_txq++) { + this->tx_tail->next = b; + } else { + this->txq = b; + } + this->tx_tail = b; + + check_txq(this); +} + +static void +process_packet(struct switch_ *sw, struct buffer *msg) +{ + static const size_t min_size[UINT8_MAX + 1] = { + [0 ... UINT8_MAX] = SIZE_MAX, + [OFPT_CONTROL_HELLO] = sizeof (struct ofp_control_hello), + [OFPT_DATA_HELLO] = sizeof (struct ofp_data_hello), + [OFPT_PACKET_IN] = offsetof (struct ofp_packet_in, data), + [OFPT_PACKET_OUT] = sizeof (struct ofp_packet_out), + [OFPT_FLOW_MOD] = sizeof (struct ofp_flow_mod), + [OFPT_FLOW_EXPIRED] = sizeof (struct ofp_flow_expired), + [OFPT_TABLE] = sizeof (struct ofp_table), + [OFPT_PORT_MOD] = sizeof (struct ofp_port_mod), + [OFPT_PORT_STATUS] = sizeof (struct ofp_port_status), + [OFPT_FLOW_STAT_REQUEST] = sizeof (struct ofp_flow_stat_request), + [OFPT_FLOW_STAT_REPLY] = sizeof (struct ofp_flow_stat_reply), + }; + struct ofp_header *oh; + + oh = msg->data; + if (msg->size < min_size[oh->type]) { + VLOG_WARN("%s: too short (%zu bytes) for type %"PRIu8" (min %zu)", + sw->name, msg->size, oh->type, min_size[oh->type]); + return; + } + + if (oh->type == OFPT_DATA_HELLO) { + struct ofp_data_hello *odh = msg->data; + sw->datapath_id = odh->datapath_id; + } else if (sw->datapath_id == 0) { + send_control_hello(sw); + return; + } + + if (oh->type == OFPT_PACKET_IN) { + if (sw->n_txq >= MAX_TXQ) { + VLOG_WARN("%s: tx queue overflow", sw->name); + } else if (noflow) { + process_noflow(sw, msg->data); + } else if (hub) { + process_hub(sw, msg->data); + } else { + process_switch(sw, msg->data); + } + return; + } + + ofp_print(stdout, msg->data, msg->size, 2); +} + +static void +process_hub(struct switch_ *sw, struct ofp_packet_in *opi) +{ + size_t pkt_ofs, pkt_len; + struct buffer pkt; + struct flow flow; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + flow_extract(&pkt, ntohs(opi->in_port), &flow); + + /* Add new flow. */ + queue_tx(sw, make_add_simple_flow(&flow, ntohl(opi->buffer_id), + OFPP_FLOOD)); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), + OFPP_FLOOD)); + } +} + +static void +process_noflow(struct switch_ *sw, struct ofp_packet_in *opi) +{ + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + size_t pkt_ofs, pkt_len; + struct buffer pkt; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(opi->in_port), + OFPP_FLOOD)); + } else { + queue_tx(sw, make_buffered_packet_out(ntohl(opi->buffer_id), + ntohs(opi->in_port), OFPP_FLOOD)); + } +} + + +#define MAC_HASH_BITS 10 +#define MAC_HASH_MASK (MAC_HASH_SIZE - 1) +#define MAC_HASH_SIZE (1u << MAC_HASH_BITS) + +#define MAC_MAX 1024 + +struct mac_source { + struct list hash_list; + struct list lru_list; + uint64_t datapath_id; + uint8_t mac[ETH_ADDR_LEN]; + uint16_t port; +}; + +static struct list mac_table[MAC_HASH_SIZE]; +static struct list lrus; +static size_t mac_count; + +static void +switch_init(void) +{ + int i; + + list_init(&lrus); + for (i = 0; i < MAC_HASH_SIZE; i++) { + list_init(&mac_table[i]); + } +} + +static struct list * +mac_table_bucket(uint64_t datapath_id, const uint8_t mac[ETH_ADDR_LEN]) +{ + uint32_t hash; + hash = hash_fnv(&datapath_id, sizeof datapath_id, HASH_FNV_BASIS); + hash = hash_fnv(mac, ETH_ADDR_LEN, hash); + return &mac_table[hash & MAC_HASH_BITS]; +} + +static void +process_switch(struct switch_ *sw, struct ofp_packet_in *opi) +{ + size_t pkt_ofs, pkt_len; + struct buffer pkt; + struct flow flow; + + uint16_t out_port; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + flow_extract(&pkt, ntohs(opi->in_port), &flow); + + /* Learn the source. */ + if (!mac_is_multicast(flow.dl_src)) { + struct mac_source *src; + struct list *bucket; + bool found; + + bucket = mac_table_bucket(sw->datapath_id, flow.dl_src); + found = false; + LIST_FOR_EACH (src, struct mac_source, hash_list, bucket) { + if (src->datapath_id == sw->datapath_id + && mac_equals(src->mac, flow.dl_src)) { + found = true; + break; + } + } + + if (!found) { + /* Learn a new address. */ + + if (mac_count >= MAC_MAX) { + /* Drop the least recently used mac source. */ + struct mac_source *lru; + lru = CONTAINER_OF(lrus.next, struct mac_source, lru_list); + list_remove(&lru->hash_list); + list_remove(&lru->lru_list); + free(lru); + } else { + mac_count++; + } + + /* Create new mac source */ + src = xmalloc(sizeof *src); + src->datapath_id = sw->datapath_id; + memcpy(src->mac, flow.dl_src, ETH_ADDR_LEN); + src->port = -1; + list_push_front(bucket, &src->hash_list); + list_push_back(&lrus, &src->lru_list); + } else { + /* Make 'src' most-recently-used. */ + list_remove(&src->lru_list); + list_push_back(&lrus, &src->lru_list); + } + + if (ntohs(flow.in_port) != src->port) { + src->port = ntohs(flow.in_port); + VLOG_DBG("learned that "MAC_FMT" is on datapath %"PRIx64" port %d", + MAC_ARGS(src->mac), ntohll(src->datapath_id), + src->port); + } + } else { + VLOG_DBG("multicast packet source "MAC_FMT, MAC_ARGS(flow.dl_src)); + } + + /* Figure out the destination. */ + out_port = OFPP_FLOOD; + if (!mac_is_multicast(flow.dl_dst)) { + struct mac_source *dst; + struct list *bucket; + + bucket = mac_table_bucket(sw->datapath_id, flow.dl_dst); + LIST_FOR_EACH (dst, struct mac_source, hash_list, bucket) { + if (dst->datapath_id == sw->datapath_id + && mac_equals(dst->mac, flow.dl_dst)) { + out_port = dst->port; + break; + } + } + } + + if (out_port != OFPP_FLOOD) { + /* The output port is known, so add a new flow. */ + queue_tx(sw, make_add_simple_flow(&flow, ntohl(opi->buffer_id), + out_port)); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), + out_port)); + } + } else { + /* We don't know that MAC. Flood the packet. */ + struct buffer *b; + if (ntohl(opi->buffer_id) == UINT32_MAX) { + b = make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), out_port); + } else { + b = make_buffered_packet_out(ntohl(opi->buffer_id), + ntohs(flow.in_port), out_port); + } + queue_tx(sw, b); + } +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"hub", no_argument, 0, 'H'}, + {"noflow", no_argument, 0, 'n'}, + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'H': + hub = true; + break; + + case 'n': + noflow = true; + break; + + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: OpenFlow controller\n" + "usage: %s [OPTIONS] VCONN\n" + "where VCONN is one of the following:\n" +#ifdef HAVE_NETLINK + " nl:DP_IDX via netlink to local datapath DP_IDX\n" +#endif + " ptcp:[PORT] listen to TCP PORT (default: %d)\n" + "\nOther options:\n" + " -H, --hub act as hub instead of learning switch\n" + " -n, --noflow pass traffic, but don't add flows\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name, OFP_TCP_PORT); + exit(EXIT_SUCCESS); +} diff --git a/datapath/.gitignore b/datapath/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/datapath/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/datapath/Makefile.am b/datapath/Makefile.am new file mode 100644 index 00000000..99c09af4 --- /dev/null +++ b/datapath/Makefile.am @@ -0,0 +1,35 @@ +SUBDIRS = tests +if L26_ENABLED +SUBDIRS += linux-2.6 +endif +if UML_ENABLED +SUBDIRS += linux-2.6-uml +endif +if L24_ENABLED +SUBDIRS += linux-2.4 +endif + +EXTRA_DIST = linux-2.6 linux-2.4 linux-2.6-uml\ + datapath.c snap.h chain.c crc32.c crc_t.c\ + flow.h forward.h table-hash.c table-mac.c\ + unit.c unit.h datapath.h chain.h crc32.h\ + flow.c forward.c forward_t.c table.h\ + table-linear.c table_t.c unit-exports.c\ + datapath_t.c datapath_t.h compat.h\ + dp_dev.c + +# Do not include header and source files from the top of the linux-* +# directories, as these are just symbolic links to the files in +# "datapath". +dist-hook: + rm -rf `find $(distdir)/linux-* -name Module.symvers` + rm -rf `find $(distdir)/linux-* -name .*.cmd` + rm -rf `find $(distdir)/linux-* -name .*.swp` + rm -rf `find $(distdir)/linux-* -name .*.d` + rm -rf `find $(distdir)/linux-* -name .tmp_versions` + rm -rf `find $(distdir)/linux-* -name *.o` + rm -rf `find $(distdir)/linux-* -name *.ko` + rm -rf `find $(distdir)/linux-* -name Makefile` + rm -rf `find $(distdir)/linux-* -name .gitignore` + rm -f $(distdir)/linux-*/*.h + rm -f $(distdir)/linux-*/*.c diff --git a/datapath/README b/datapath/README new file mode 100644 index 00000000..e69de29b diff --git a/datapath/chain.c b/datapath/chain.c new file mode 100644 index 00000000..458e9e4f --- /dev/null +++ b/datapath/chain.c @@ -0,0 +1,161 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University + */ + +#include "chain.h" +#include "flow.h" +#include "table.h" +#include +#include + +/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or + * negative error. If 'table' is null it is assumed that table creation failed + * due to out-of-memory. */ +static int add_table(struct sw_chain *chain, struct sw_table *table) +{ + if (table == NULL) + return -ENOMEM; + if (chain->n_tables >= CHAIN_MAX_TABLES) { + printk("too many tables in chain\n"); + table->destroy(table); + return -ENOBUFS; + } + chain->tables[chain->n_tables++] = table; + return 0; +} + +/* Creates and returns a new chain associated with 'dp'. Returns NULL if the + * chain cannot be created. */ +struct sw_chain *chain_create(struct datapath *dp) +{ + struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL); + if (chain == NULL) + return NULL; + chain->dp = dp; + + if (add_table(chain, table_mac_create(TABLE_MAC_NUM_BUCKETS, + TABLE_MAC_MAX_FLOWS)) + || add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, + 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) { + chain_destroy(chain); + return NULL; + } + + return chain; +} + +/* Searches 'chain' for a flow matching 'key', which must not have any wildcard + * fields. Returns the flow if successful, otherwise a null pointer. + * + * Caller must hold rcu_read_lock, and not release it until it is done with the + * returned flow. */ +struct sw_flow *chain_lookup(struct sw_chain *chain, + const struct sw_flow_key *key) +{ + int i; + + BUG_ON(key->wildcards); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + struct sw_flow *flow = t->lookup(t, key); + if (flow) + return flow; + } + return NULL; +} + +/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if + * successful or a negative error. + * + * If successful, 'flow' becomes owned by the chain, otherwise it is retained + * by the caller. + * + * Caller must hold rcu_read_lock. If insertion is successful, it must not + * release rcu_read_lock until it is done with the inserted flow. */ +int chain_insert(struct sw_chain *chain, struct sw_flow *flow) +{ + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + if (t->insert(t, flow)) + return 0; + } + + return -ENOBUFS; +} + +/* Deletes from 'chain' any and all flows that match 'key'. Returns the number + * of flows that were deleted. + * + * Expensive in the general case as currently implemented, since it requires + * iterating through the entire contents of each table for keys that contain + * wildcards. Relatively cheap for fully specified keys. + * + * The caller need not hold any locks. */ +int chain_delete(struct sw_chain *chain, const struct sw_flow_key *key, int strict) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + rcu_read_lock(); + count += t->delete(t, key, strict); + rcu_read_unlock(); + } + + return count; + +} + +/* Performs timeout processing on all the tables in 'chain'. Returns the + * number of flow entries deleted through expiration. + * + * Expensive as currently implemented, since it iterates through the entire + * contents of each table. + * + * The caller need not hold any locks. */ +int chain_timeout(struct sw_chain *chain) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + rcu_read_lock(); + count += t->timeout(chain->dp, t); + rcu_read_unlock(); + } + return count; +} + +/* Destroys 'chain', which must not have any users. */ +void chain_destroy(struct sw_chain *chain) +{ + int i; + + synchronize_rcu(); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + t->destroy(t); + } + kfree(chain); +} + +/* Prints statistics for each of the tables in 'chain'. */ +void chain_print_stats(struct sw_chain *chain) +{ + int i; + + printk("\n"); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + struct sw_table_stats stats; + t->stats(t, &stats); + printk("%s: %lu/%lu flows\n", + stats.name, stats.n_flows, stats.max_flows); + } +} diff --git a/datapath/chain.h b/datapath/chain.h new file mode 100644 index 00000000..fc07f513 --- /dev/null +++ b/datapath/chain.h @@ -0,0 +1,31 @@ +#ifndef CHAIN_H +#define CHAIN_H 1 + +struct sw_flow; +struct sw_flow_key; +struct datapath; + + +#define TABLE_LINEAR_MAX_FLOWS 100 +#define TABLE_HASH_MAX_FLOWS 65536 +#define TABLE_MAC_MAX_FLOWS 1024 +#define TABLE_MAC_NUM_BUCKETS 1024 + +/* Set of tables chained together in sequence from cheap to expensive. */ +#define CHAIN_MAX_TABLES 4 +struct sw_chain { + int n_tables; + struct sw_table *tables[CHAIN_MAX_TABLES]; + + struct datapath *dp; +}; + +struct sw_chain *chain_create(struct datapath *); +struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *); +int chain_insert(struct sw_chain *, struct sw_flow *); +int chain_delete(struct sw_chain *, const struct sw_flow_key *, int); +int chain_timeout(struct sw_chain *); +void chain_destroy(struct sw_chain *); +void chain_print_stats(struct sw_chain *); + +#endif /* chain.h */ diff --git a/datapath/compat.h b/datapath/compat.h new file mode 100644 index 00000000..12100ae3 --- /dev/null +++ b/datapath/compat.h @@ -0,0 +1,17 @@ +#ifndef COMPAT_H +#define COMPAT_H 1 + +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +#include "compat26.h" + +#else + +#include "compat24.h" + +#endif + + +#endif /* compat.h */ diff --git a/datapath/crc32.c b/datapath/crc32.c new file mode 100644 index 00000000..a5210ad4 --- /dev/null +++ b/datapath/crc32.c @@ -0,0 +1,40 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "crc32.h" + +void crc32_init(struct crc32 *crc, unsigned int polynomial) +{ + int i; + + for (i = 0; i < CRC32_TABLE_SIZE; ++i) { + unsigned int reg = i << 24; + int j; + for (j = 0; j < CRC32_TABLE_BITS; j++) { + int topBit = (reg & 0x80000000) != 0; + reg <<= 1; + if (topBit) + reg ^= polynomial; + } + crc->table[i] = reg; + } +} + +unsigned int crc32_calculate(const struct crc32 *crc, + const void *data_, size_t n_bytes) +{ + // FIXME: this can be optimized by unrolling, see linux-2.6/lib/crc32.c. + const uint8_t *data = data_; + unsigned int result = 0; + size_t i; + + for (i = 0; i < n_bytes; i++) { + unsigned int top = result >> 24; + top ^= data[i]; + result = (result << 8) ^ crc->table[top]; + } + return result; +} diff --git a/datapath/crc32.h b/datapath/crc32.h new file mode 100644 index 00000000..21a350a9 --- /dev/null +++ b/datapath/crc32.h @@ -0,0 +1,22 @@ +#ifndef CRC32_H +#define CRC32_H 1 + +#include +#ifndef __KERNEL__ +#include +#endif +#include + +#define CRC32_TABLE_BITS 8 +#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS) + +struct crc32 { + unsigned int table[CRC32_TABLE_SIZE]; +}; + +void crc32_init(struct crc32 *, unsigned int polynomial); +unsigned int crc32_calculate(const struct crc32 *, + const void *data_, size_t n_bytes); + + +#endif /* crc32.h */ diff --git a/datapath/crc_t.c b/datapath/crc_t.c new file mode 100644 index 00000000..e01768fc --- /dev/null +++ b/datapath/crc_t.c @@ -0,0 +1,47 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include + +#include "crc32.h" +#include "unit.h" + + +static void +print_error(unsigned int poly, char *data, + unsigned int expected, unsigned int calculated) +{ + unit_fail("crc error: poly=%x data=%s expected=%x calculated=%x\n", + poly, data, expected, calculated); +} + +void +run_crc_t(void) +{ + struct crc32 crc; + unsigned int val, i, j; + + char *data[3] = { "h3rei$@neX@mp13da7@sTr117G0fCH@r$", + "1324lkqasdf0-[LKJD0;asd,.cv;/asd0:\"'~`co29", + "6" }; + + unsigned int polys[2] = { 0x04C11DB7, + 0x1EDC6F41 }; + + unsigned int crc_values[2][3] = { + { 0xDE1040C3, 0x65343A0B, 0xCEB42022 }, + { 0x6C149FAE, 0x470A6B73, 0x4D3AA134 } }; + for (i = 0; i < 2; i++) { + crc32_init(&crc, polys[i]); + for (j = 0; j < 3; j++) { + val = crc32_calculate(&crc, data[j], strlen(data[j])); + if (val != crc_values[i][j]) { + print_error(polys[i], data[j], crc_values[i][j], val); + } + } + } +} diff --git a/datapath/datapath.c b/datapath/datapath.c new file mode 100644 index 00000000..1b5a2cd5 --- /dev/null +++ b/datapath/datapath.c @@ -0,0 +1,1624 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +/* Functions for managing the dp interface/device. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow-netlink.h" +#include "datapath.h" +#include "table.h" +#include "chain.h" +#include "forward.h" +#include "flow.h" +#include "datapath_t.h" + +#include "compat.h" + + +/* Number of seconds between runs of the flow expiration code. */ +#define EXPIRE_SECS 1 + +#define BRIDGE_PORT_NO_FLOOD 0x00000001 + +#define UINT32_MAX 4294967295U + +struct net_bridge_port { + u16 port_no; + u32 flags; + struct datapath *dp; + struct net_device *dev; + struct list_head node; /* Element in datapath.ports. */ +}; + +static struct genl_family dp_genl_family; +static struct genl_multicast_group mc_group; + +int dp_dev_setup(struct net_device *dev); + +/* It's hard to imagine wanting more than one datapath, but... */ +#define DP_MAX 32 + +/* datapaths. Protected on the read side by rcu_read_lock, on the write side + * by dp_mutex. + * + * It is safe to access the datapath and net_bridge_port structures with just + * the dp_mutex, but to access the chain you need to take the rcu_read_lock + * also (because dp_mutex doesn't prevent flows from being destroyed). + */ +static struct datapath *dps[DP_MAX]; +static DEFINE_MUTEX(dp_mutex); + +static void dp_timer_handler(unsigned long arg); +static int send_port_status(struct net_bridge_port *p, uint8_t status); + + +/* nla_unreserve - reduce amount of space reserved by nla_reserve + * @skb: socket buffer from which to recover room + * @nla: netlink attribute to adjust + * @len: amount by which to reduce attribute payload + * + * Reduces amount of space reserved by a call to nla_reserve. + * + * No other attributes may be added between calling nla_reserve and this + * function, since it will create a hole in the message. + */ +void nla_unreserve(struct sk_buff *skb, struct nlattr *nla, int len) +{ + skb->tail -= len; + skb->len -= len; + + nla->nla_len -= len; +} + +/* Generates a unique datapath id. It incorporates the datapath index + * and a hardware address, if available. If not, it generates a random + * one. + */ +static +uint64_t gen_datapath_id(uint16_t dp_idx) +{ + uint64_t id; + int i; + struct net_device *dev; + + /* The top 16 bits are used to identify the datapath. The lower 48 bits + * use an interface address. */ + id = (uint64_t)dp_idx << 48; + if ((dev = dev_get_by_name(&init_net, "ctl0")) + || (dev = dev_get_by_name(&init_net, "eth0"))) { + for (i=0; idev_addr[i] << (8*(ETH_ALEN-1 - i)); + } + dev_put(dev); + } else { + /* Randomly choose the lower 48 bits if we cannot find an + * address and mark the most significant bit to indicate that + * this was randomly generated. */ + uint8_t rand[ETH_ALEN]; + get_random_bytes(rand, ETH_ALEN); + id |= (uint64_t)1 << 63; + for (i=0; i= DP_MAX) + return -EINVAL; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&dp_mutex); + dp = rcu_dereference(dps[dp_idx]); + if (dp != NULL) { + err = -EEXIST; + goto err_unlock; + } + + err = -ENOMEM; + dp = kzalloc(sizeof *dp, GFP_KERNEL); + if (dp == NULL) + goto err_unlock; + + dp->dp_idx = dp_idx; + dp->id = gen_datapath_id(dp_idx); + dp->chain = chain_create(dp); + if (dp->chain == NULL) + goto err_free_dp; + INIT_LIST_HEAD(&dp->port_list); + +#if 0 + /* Setup our "of" device */ + dp->dev.priv = dp; + rtnl_lock(); + err = dp_dev_setup(&dp->dev); + rtnl_unlock(); + if (err != 0) + printk("datapath: problem setting up 'of' device\n"); +#endif + + dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + + setup_timer(&dp->timer, dp_timer_handler, (unsigned long) dp); + mod_timer(&dp->timer, round_jiffies(jiffies + (EXPIRE_SECS * HZ))); + + rcu_assign_pointer(dps[dp_idx], dp); + mutex_unlock(&dp_mutex); + + return 0; + +err_free_dp: + kfree(dp); +err_unlock: + mutex_unlock(&dp_mutex); + module_put(THIS_MODULE); + return err; +} + +/* Find and return a free port number under 'dp'. Called under dp_mutex. */ +static int find_portno(struct datapath *dp) +{ + int i; + for (i = 0; i < OFPP_MAX; i++) + if (dp->ports[i] == NULL) + return i; + return -EXFULL; +} + +static struct net_bridge_port *new_nbp(struct datapath *dp, + struct net_device *dev) +{ + struct net_bridge_port *p; + int port_no; + + port_no = find_portno(dp); + if (port_no < 0) + return ERR_PTR(port_no); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + + p->dp = dp; + dev_hold(dev); + p->dev = dev; + p->port_no = port_no; + + return p; +} + +/* Called with dp_mutex. */ +int add_switch_port(struct datapath *dp, struct net_device *dev) +{ + struct net_bridge_port *p; + + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + return -EINVAL; + + if (dev->br_port != NULL) + return -EBUSY; + + p = new_nbp(dp, dev); + if (IS_ERR(p)) + return PTR_ERR(p); + + dev_hold(dev); + rcu_assign_pointer(dev->br_port, p); + rtnl_lock(); + dev_set_promiscuity(dev, 1); + rtnl_unlock(); + + rcu_assign_pointer(dp->ports[p->port_no], p); + list_add_rcu(&p->node, &dp->port_list); + + /* Notify the ctlpath that this port has been added */ + send_port_status(p, OFPPR_ADD); + + return 0; +} + +/* Delete 'p' from switch. + * Called with dp_mutex. */ +static int del_switch_port(struct net_bridge_port *p) +{ + /* First drop references to device. */ + rtnl_lock(); + dev_set_promiscuity(p->dev, -1); + rtnl_unlock(); + list_del_rcu(&p->node); + rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + rcu_assign_pointer(p->dev->br_port, NULL); + + /* Then wait until no one is still using it, and destroy it. */ + synchronize_rcu(); + + /* Notify the ctlpath that this port no longer exists */ + send_port_status(p, OFPPR_DELETE); + + dev_put(p->dev); + kfree(p); + + return 0; +} + +/* Called with dp_mutex. */ +static void del_dp(struct datapath *dp) +{ + struct net_bridge_port *p, *n; + +#if 0 + /* Unregister the "of" device of this dp */ + rtnl_lock(); + unregister_netdevice(&dp->dev); + rtnl_unlock(); +#endif + + /* Drop references to DP. */ + list_for_each_entry_safe (p, n, &dp->port_list, node) + del_switch_port(p); + del_timer_sync(&dp->timer); + rcu_assign_pointer(dps[dp->dp_idx], NULL); + + /* Wait until no longer in use, then destroy it. */ + synchronize_rcu(); + chain_destroy(dp->chain); + kfree(dp); + module_put(THIS_MODULE); +} + +static void dp_timer_handler(unsigned long arg) +{ + struct datapath *dp = (struct datapath *) arg; +#if 1 + chain_timeout(dp->chain); +#else + int count = chain_timeout(dp->chain); + chain_print_stats(dp->chain); + if (count) + printk("%d flows timed out\n", count); +#endif + mod_timer(&dp->timer, round_jiffies(jiffies + (EXPIRE_SECS * HZ))); +} + +/* + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on + * different set of devices!) Returns 0 if *pskb should be processed further, + * 1 if *pskb is handled. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +/* Called with rcu_read_lock. */ +static struct sk_buff *dp_frame_hook(struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct ethhdr *eh = eth_hdr(skb); + struct sk_buff *skb_local = NULL; + + + if (compare_ether_addr(eh->h_dest, skb->dev->dev_addr) == 0) + return skb; + + if (is_broadcast_ether_addr(eh->h_dest) + || is_multicast_ether_addr(eh->h_dest) + || is_local_ether_addr(eh->h_dest)) + skb_local = skb_clone(skb, GFP_ATOMIC); + + /* Push the Ethernet header back on. */ + if (skb->protocol == htons(ETH_P_8021Q)) + skb_push(skb, VLAN_ETH_HLEN); + else + skb_push(skb, ETH_HLEN); + + fwd_port_input(p->dp->chain, skb, p->port_no); + + return skb_local; +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) +{ + /* Push the Ethernet header back on. */ + if ((*pskb)->protocol == htons(ETH_P_8021Q)) + skb_push(*pskb, VLAN_ETH_HLEN); + else + skb_push(*pskb, ETH_HLEN); + + fwd_port_input(p->dp->chain, *pskb, p->port_no); + return 1; +} +#else +/* NB: This has only been tested on 2.4.35 */ + +/* Called without any locks (?) */ +static void dp_frame_hook(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + + /* Push the Ethernet header back on. */ + if (skb->protocol == htons(ETH_P_8021Q)) + skb_push(skb, VLAN_ETH_HLEN); + else + skb_push(skb, ETH_HLEN); + + if (p) { + rcu_read_lock(); + fwd_port_input(p->dp->chain, skb, p->port_no); + rcu_read_unlock(); + } else + kfree_skb(skb); +} +#endif + +/* Forwarding output path. + * Based on net/bridge/br_forward.c. */ + +/* Don't forward packets to originating port or with flooding disabled */ +static inline int should_deliver(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + if ((skb->dev == p->dev) || (p->flags & BRIDGE_PORT_NO_FLOOD)) { + return 0; + } + + return 1; +} + +static inline unsigned packet_length(const struct sk_buff *skb) +{ + int length = skb->len - ETH_HLEN; + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + return length; +} + +static int +flood(struct datapath *dp, struct sk_buff *skb) +{ + struct net_bridge_port *p; + int prev_port; + + prev_port = -1; + list_for_each_entry_rcu (p, &dp->port_list, node) { + if (!should_deliver(p, skb)) + continue; + if (prev_port != -1) { + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + kfree_skb(skb); + return -ENOMEM; + } + dp_output_port(dp, clone, prev_port); + } + prev_port = p->port_no; + } + if (prev_port != -1) + dp_output_port(dp, skb, prev_port); + else + kfree_skb(skb); + + return 0; +} + +/* Marks 'skb' as having originated from 'in_port' in 'dp'. + FIXME: how are devices reference counted? */ +int dp_set_origin(struct datapath *dp, uint16_t in_port, + struct sk_buff *skb) +{ + if (in_port < OFPP_MAX && dp->ports[in_port]) { + skb->dev = dp->ports[in_port]->dev; + return 0; + } + return -ENOENT; +} + +/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'. + */ +int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct net_bridge_port *p; + int len = skb->len; + + BUG_ON(!skb); + if (out_port == OFPP_FLOOD) + return flood(dp, skb); + else if (out_port == OFPP_CONTROLLER) + return dp_output_control(dp, skb, fwd_save_skb(skb), 0, + OFPR_ACTION); + else if (out_port >= OFPP_MAX) + goto bad_port; + + p = dp->ports[out_port]; + if (p == NULL) + goto bad_port; + + skb->dev = p->dev; + if (packet_length(skb) > skb->dev->mtu) { + printk("dropped over-mtu packet: %d > %d\n", + packet_length(skb), skb->dev->mtu); + kfree_skb(skb); + return -E2BIG; + } + + dev_queue_xmit(skb); + + return len; + +bad_port: + kfree_skb(skb); + if (net_ratelimit()) + printk("can't forward to bad port %d\n", out_port); + return -ENOENT; +} + +/* Takes ownership of 'skb' and transmits it to 'dp''s control path. If + * 'buffer_id' != -1, then only the first 64 bytes of 'skb' are sent; + * otherwise, all of 'skb' is sent. 'reason' indicates why 'skb' is being + * sent. 'max_len' sets the maximum number of bytes that the caller + * wants to be sent; a value of 0 indicates the entire packet should be + * sent. */ +int +dp_output_control(struct datapath *dp, struct sk_buff *skb, + uint32_t buffer_id, size_t max_len, int reason) +{ + /* FIXME? packet_rcv_spkt in net/packet/af_packet.c does some stuff + that we should possibly be doing here too. */ + /* FIXME? Can we avoid creating a new skbuff in the case where we + * forward the whole packet? */ + struct sk_buff *f_skb; + struct nlattr *attr; + struct ofp_packet_in *opi; + size_t opi_len; + size_t len, fwd_len; + void *data; + int err = -ENOMEM; + + fwd_len = skb->len; + if ((buffer_id != (uint32_t) -1) && max_len) + fwd_len = min(fwd_len, max_len); + + len = nla_total_size(offsetof(struct ofp_packet_in, data) + fwd_len) + + nla_total_size(sizeof(uint32_t)); + + f_skb = genlmsg_new(len, GFP_ATOMIC); + if (!f_skb) + goto error_free_skb; + + data = genlmsg_put(f_skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) + goto error_free_f_skb; + + NLA_PUT_U32(f_skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + opi_len = offsetof(struct ofp_packet_in, data) + fwd_len; + attr = nla_reserve(f_skb, DP_GENL_A_OPENFLOW, opi_len); + if (!attr) + goto error_free_f_skb; + opi = nla_data(attr); + opi->header.version = OFP_VERSION; + opi->header.type = OFPT_PACKET_IN; + opi->header.length = htons(opi_len); + opi->header.xid = htonl(0); + + opi->buffer_id = htonl(buffer_id); + opi->total_len = htons(skb->len); + opi->in_port = htons(skb->dev->br_port->port_no); + opi->reason = reason; + SKB_LINEAR_ASSERT(skb); + memcpy(opi->data, skb_mac_header(skb), fwd_len); + + err = genlmsg_end(f_skb, data); + if (err < 0) + goto error_free_f_skb; + + err = genlmsg_multicast(f_skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "dp_output_control: genlmsg_multicast failed: %d\n", err); + + kfree_skb(skb); + + return err; + +nla_put_failure: +error_free_f_skb: + nlmsg_free(f_skb); +error_free_skb: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "dp_output_control: failed to send: %d\n", err); + return err; +} + +static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc) +{ + desc->port_no = htons(p->port_no); + strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN); + desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0'; + memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN); + desc->flags = htonl(p->flags); + desc->features = 0; + desc->speed = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24) + if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) { + struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; + + if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) { + if (ecmd.supported & SUPPORTED_10baseT_Half) + desc->features |= OFPPF_10MB_HD; + if (ecmd.supported & SUPPORTED_10baseT_Full) + desc->features |= OFPPF_10MB_FD; + if (ecmd.supported & SUPPORTED_100baseT_Half) + desc->features |= OFPPF_100MB_HD; + if (ecmd.supported & SUPPORTED_100baseT_Full) + desc->features |= OFPPF_100MB_FD; + if (ecmd.supported & SUPPORTED_1000baseT_Half) + desc->features |= OFPPF_1GB_HD; + if (ecmd.supported & SUPPORTED_1000baseT_Full) + desc->features |= OFPPF_1GB_FD; + /* 10Gbps half-duplex doesn't exist... */ + if (ecmd.supported & SUPPORTED_10000baseT_Full) + desc->features |= OFPPF_10GB_FD; + + desc->features = htonl(desc->features); + desc->speed = htonl(ecmd.speed); + } + } +#endif +} + +static int +fill_data_hello(struct datapath *dp, struct ofp_data_hello *odh) +{ + struct net_bridge_port *p; + int port_count = 0; + + odh->header.version = OFP_VERSION; + odh->header.type = OFPT_DATA_HELLO; + odh->header.xid = htonl(0); + odh->datapath_id = cpu_to_be64(dp->id); + + odh->n_exact = htonl(2 * TABLE_HASH_MAX_FLOWS); + odh->n_mac_only = htonl(TABLE_MAC_MAX_FLOWS); + odh->n_compression = 0; /* Not supported */ + odh->n_general = htonl(TABLE_LINEAR_MAX_FLOWS); + odh->buffer_mb = htonl(UINT32_MAX); + odh->n_buffers = htonl(N_PKT_BUFFERS); + odh->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES); + odh->actions = htonl(OFP_SUPPORTED_ACTIONS); + odh->miss_send_len = htons(dp->miss_send_len); + + list_for_each_entry_rcu (p, &dp->port_list, node) { + fill_port_desc(p, &odh->ports[port_count]); + port_count++; + } + + return port_count; +} + +int +dp_send_hello(struct datapath *dp) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_data_hello *odh; + size_t odh_max_len, odh_len, port_max_len, len; + void *data; + int err = -ENOMEM; + int port_count; + + + /* Overallocate, since we can't reliably determine the number of + * ports a priori. */ + port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX; + + len = nla_total_size(sizeof(*odh) + port_max_len) + + nla_total_size(sizeof(uint32_t)); + + skb = genlmsg_new(len, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + odh_max_len = sizeof(*odh) + port_max_len; + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, odh_max_len); + if (!attr) { + if (net_ratelimit()) + printk("dp_send_hello: nla_reserve failed\n"); + goto error; + } + odh = nla_data(attr); + port_count = fill_data_hello(dp, odh); + + /* Only now that we know how many ports we've added can we say + * say something about the length. */ + odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count); + odh->header.length = htons(odh_len); + + /* Take back the unused part that was reserved */ + nla_unreserve(skb, attr, (odh_max_len - odh_len)); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "dp_send_hello: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "dp_send_hello: failed to send: %d\n", err); + return err; +} + +int +dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp) +{ + struct net_bridge_port *p; + + p = dp->ports[htons(opp->port_no)]; + + /* Make sure the port id hasn't changed since this was sent */ + if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN) != 0) + return -1; + + p->flags = htonl(opp->flags); + + return 0; +} + + +static int +send_port_status(struct net_bridge_port *p, uint8_t status) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_port_status *ops; + void *data; + int err = -ENOMEM; + + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, p->dp->dp_idx); + + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ops)); + if (!attr) { + if (net_ratelimit()) + printk("send_port_status: nla_reserve failed\n"); + goto error; + } + + ops = nla_data(attr); + ops->header.version = OFP_VERSION; + ops->header.type = OFPT_PORT_STATUS; + ops->header.length = htons(sizeof(*ops)); + ops->header.xid = htonl(0); + + ops->reason = status; + fill_port_desc(p, &ops->desc); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "send_port_status: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "send_port_status: failed to send: %d\n", err); + return err; +} + +int +dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_flow_expired *ofe; + void *data; + unsigned long duration_j; + int err = -ENOMEM; + + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ofe)); + if (!attr) { + if (net_ratelimit()) + printk("dp_send_flow_expired: nla_reserve failed\n"); + goto error; + } + + ofe = nla_data(attr); + ofe->header.version = OFP_VERSION; + ofe->header.type = OFPT_FLOW_EXPIRED; + ofe->header.length = htons(sizeof(*ofe)); + ofe->header.xid = htonl(0); + + flow_fill_match(&ofe->match, &flow->key); + duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time; + ofe->duration = htonl(duration_j / HZ); + ofe->packet_count = cpu_to_be64(flow->packet_count); + ofe->byte_count = cpu_to_be64(flow->byte_count); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "send_flow_expired: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "send_flow_expired: failed to send: %d\n", err); + return err; +} + +/* Generic Netlink interface. + * + * See netlink(7) for an introduction to netlink. See + * http://linux-net.osdl.org/index.php/Netlink for more information and + * pointers on how to work with netlink and Generic Netlink in the kernel and + * in userspace. */ + +static struct genl_family dp_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = DP_GENL_FAMILY_NAME, + .version = 1, + .maxattr = DP_GENL_A_MAX, +}; + +/* Attribute policy: what each attribute may contain. */ +static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 }, + [DP_GENL_A_PORTNAME] = { .type = NLA_STRING } +}; + +static int dp_genl_add(struct sk_buff *skb, struct genl_info *info) +{ + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); +} + +static struct genl_ops dp_genl_ops_add_dp = { + .cmd = DP_GENL_C_ADD_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add, + .dumpit = NULL, +}; + +struct datapath *dp_get(int dp_idx) +{ + if (dp_idx < 0 || dp_idx > DP_MAX) + return NULL; + return rcu_dereference(dps[dp_idx]); +} + +static int dp_genl_del(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + err = -ENOENT; + else { + del_dp(dp); + err = 0; + } + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_del_dp = { + .cmd = DP_GENL_C_DEL_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_del, + .dumpit = NULL, +}; + +/* Queries a datapath for related information. Currently the only relevant + * information is the datapath's multicast group ID. Really we want one + * multicast group per datapath, but because of locking issues[*] we can't + * easily get one. Thus, every datapath will currently return the same + * global multicast group ID, but in the future it would be nice to fix that. + * + * [*] dp_genl_add, to add a new datapath, is called under the genl_lock + * mutex, and genl_register_mc_group, called to acquire a new multicast + * group ID, also acquires genl_lock, thus deadlock. + */ +static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + struct sk_buff *ans_skb = NULL; + int dp_idx; + int err = -ENOMEM; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + rcu_read_lock(); + dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])); + dp = dp_get(dp_idx); + if (!dp) + err = -ENOENT; + else { + void *data; + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ans_skb) { + err = -ENOMEM; + goto err; + } + data = genlmsg_put_reply(ans_skb, info, &dp_genl_family, + 0, DP_GENL_C_QUERY_DP); + if (data == NULL) { + err = -ENOMEM; + goto err; + } + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id); + + genlmsg_end(ans_skb, data); + err = genlmsg_reply(ans_skb, info); + if (!err) + ans_skb = NULL; + } +err: +nla_put_failure: + if (ans_skb) + kfree_skb(ans_skb); + rcu_read_unlock(); + return err; +} + +/* + * Fill flow entry for nl flow query. Called with rcu_lock + * + */ +static +int +dp_fill_flow(struct ofp_flow_mod* ofm, struct swt_iterator* iter) +{ + ofm->header.version = OFP_VERSION; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(sizeof(struct ofp_flow_mod) + + sizeof(ofm->actions[0])); + ofm->header.xid = htonl(0); + + ofm->match.wildcards = htons(iter->flow->key.wildcards); + ofm->match.in_port = iter->flow->key.in_port; + ofm->match.dl_vlan = iter->flow->key.dl_vlan; + memcpy(ofm->match.dl_src, iter->flow->key.dl_src, ETH_ALEN); + memcpy(ofm->match.dl_dst, iter->flow->key.dl_dst, ETH_ALEN); + ofm->match.dl_type = iter->flow->key.dl_type; + ofm->match.nw_src = iter->flow->key.nw_src; + ofm->match.nw_dst = iter->flow->key.nw_dst; + ofm->match.nw_proto = iter->flow->key.nw_proto; + ofm->match.tp_src = iter->flow->key.tp_src; + ofm->match.tp_dst = iter->flow->key.tp_dst; + ofm->group_id = iter->flow->group_id; + ofm->max_idle = iter->flow->max_idle; + /* TODO support multiple actions */ + ofm->actions[0] = iter->flow->actions[0]; + + return 0; +} + +static int dp_genl_show(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + int err = -ENOMEM; + struct sk_buff *ans_skb = NULL; + void *data; + struct nlattr *attr; + struct ofp_data_hello *odh; + size_t odh_max_len, odh_len, port_max_len, len; + int port_count; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + goto error; + + /* Overallocate, since we can't reliably determine the number of + * ports a priori. */ + port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX; + + len = nla_total_size(sizeof(*odh) + port_max_len) + + nla_total_size(sizeof(uint32_t)); + + ans_skb = nlmsg_new(len, GFP_KERNEL); + if (!ans_skb) + goto error; + + data = genlmsg_put_reply(ans_skb, info, &dp_genl_family, + 0, DP_GENL_C_SHOW_DP); + if (data == NULL) + goto error; + + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + odh_max_len = sizeof(*odh) + port_max_len; + attr = nla_reserve(ans_skb, DP_GENL_A_DP_INFO, odh_max_len); + if (!attr) + goto error; + odh = nla_data(attr); + port_count = fill_data_hello(dp, odh); + + /* Only now that we know how many ports we've added can we say + * say something about the length. */ + odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count); + odh->header.length = htons(odh_len); + + /* Take back the unused part that was reserved */ + nla_unreserve(ans_skb, attr, (odh_max_len - odh_len)); + + genlmsg_end(ans_skb, data); + err = genlmsg_reply(ans_skb, info); + if (!err) + ans_skb = NULL; + +error: +nla_put_failure: + if (ans_skb) + kfree_skb(ans_skb); + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_show_dp = { + .cmd = DP_GENL_C_SHOW_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_show, + .dumpit = NULL, +}; + +/* Convenience function */ +static +void* +dp_init_nl_flow_msg(uint32_t dp_idx, uint16_t table_idx, + struct genl_info *info, struct sk_buff* skb) +{ + void* data; + + data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, + DP_GENL_C_QUERY_FLOW); + if (data == NULL) + return NULL; + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U16(skb, DP_GENL_A_TABLEIDX, table_idx); + + return data; + +nla_put_failure: + return NULL; +} + +/* Iterate through the specified table and send all flow entries over + * netlink to userspace. Each flow message has the following format: + * + * 32bit dpix + * 16bit tabletype + * 32bit number of flows + * openflow-flow-entries + * + * The full table may require multiple messages. A message with 0 flows + * signifies end-of message. + */ + +static +int +dp_dump_table(struct datapath *dp, uint16_t table_idx, struct genl_info *info, struct ofp_flow_mod* matchme) +{ + struct sk_buff *skb = 0; + struct sw_table *table = 0; + struct swt_iterator iter; + struct sw_flow_key in_flow; + struct nlattr *attr; + int count = 0, sum_count = 0; + void *data; + uint8_t* ofm_ptr = 0; + struct nlattr *num_attr; + int err = -ENOMEM; + + table = dp->chain->tables[table_idx]; + if ( table == NULL ) { + dprintk("dp::dp_dump_table error, non-existant table at position %d\n", table_idx); + return -EINVAL; + } + + if (!table->iterator(table, &iter)) { + dprintk("dp::dp_dump_table couldn't initialize empty table iterator\n"); + return -ENOMEM; + } + + while (iter.flow) { + + /* verify that we can fit all NL_FLOWS_PER_MESSAGE in a single + * sk_buf */ + if( (sizeof(dp_genl_family) + sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + + (NL_FLOWS_PER_MESSAGE * sizeof(struct ofp_flow_mod))) > (8192 - 64)){ + dprintk("dp::dp_dump_table NL_FLOWS_PER_MESSAGE may cause overrun in skbuf\n"); + return -ENOMEM; + } + + skb = nlmsg_new(8192 - 64, GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + + data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb); + if (data == NULL){ + err= -ENOMEM; + goto error_free_skb; + } + + /* reserve space to put the number of flows for this message, to + * be filled after the loop*/ + num_attr = nla_reserve(skb, DP_GENL_A_NUMFLOWS, sizeof(uint32_t)); + if(!num_attr){ + err = -ENOMEM; + goto error_free_skb; + } + + /* Only load NL_FLOWS_PER_MESSAGE flows at a time */ + attr = nla_reserve(skb, DP_GENL_A_FLOW, + (sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action)) * NL_FLOWS_PER_MESSAGE); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + + /* internal loop to fill NL_FLOWS_PER_MESSAGE flows */ + ofm_ptr = nla_data(attr); + flow_extract_match(&in_flow, &matchme->match); + while (iter.flow && count < NL_FLOWS_PER_MESSAGE) { + if(flow_matches(&in_flow, &iter.flow->key)){ + if((err = dp_fill_flow((struct ofp_flow_mod*)ofm_ptr, &iter))) + goto error_free_skb; + count++; + /* TODO support multiple actions */ + ofm_ptr += sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action); + } + table->iterator_next(&iter); + } + + *((uint32_t*)nla_data(num_attr)) = count; + genlmsg_end(skb, data); + + sum_count += count; + count = 0; + + err = genlmsg_unicast(skb, info->snd_pid); + skb = 0; + } + + /* send a sentinal message saying we're done */ + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb); + if (data == NULL){ + err= -ENOMEM; + goto error_free_skb; + } + + NLA_PUT_U32(skb, DP_GENL_A_NUMFLOWS, 0); + /* dummy flow so nl doesn't complain */ + attr = nla_reserve(skb, DP_GENL_A_FLOW, sizeof(struct ofp_flow_mod)); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + genlmsg_end(skb, data); + err = genlmsg_reply(skb, info); skb = 0; + +nla_put_failure: +error_free_skb: + if(skb) + kfree_skb(skb); + return err; +} + +/* Helper function to query_table which creates and sends a message packed with + * table stats. Message form is: + * + * u32 DP_IDX + * u32 NUM_TABLES + * OFP_TABLE (list of OFP_TABLES) + * + */ + +static +int +dp_dump_table_stats(struct datapath *dp, int dp_idx, struct genl_info *info) +{ + struct sk_buff *skb = 0; + struct ofp_table *ot = 0; + struct nlattr *attr; + struct sw_table_stats stats; + void *data; + int err = -ENOMEM; + int i = 0; + int nt = dp->chain->n_tables; + + /* u32 IDX, u32 NUMTABLES, list-of-tables */ + skb = nlmsg_new(4 + 4 + (sizeof(struct ofp_table) * nt), GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + + data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, + DP_GENL_C_QUERY_TABLE); + if (data == NULL){ + return -ENOMEM; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U32(skb, DP_GENL_A_NUMTABLES, nt); + + /* ... we assume that all tables can fit in a single message. + * Probably a reasonable assumption seeing that we only have + * 3 atm */ + attr = nla_reserve(skb, DP_GENL_A_TABLE, (sizeof(struct ofp_table) * nt)); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + + ot = nla_data(attr); + + for (i = 0; i < nt; ++i) { + dp->chain->tables[i]->stats(dp->chain->tables[i], &stats); + ot->header.version = OFP_VERSION; + ot->header.type = OFPT_TABLE; + ot->header.length = htons(sizeof(struct ofp_table)); + ot->header.xid = htonl(0); + + strncpy(ot->name, stats.name, OFP_MAX_TABLE_NAME_LEN); + ot->table_id = htons(i); + ot->n_flows = htonl(stats.n_flows); + ot->max_flows = htonl(stats.max_flows); + ot++; + } + + + genlmsg_end(skb, data); + err = genlmsg_reply(skb, info); skb = 0; + +nla_put_failure: +error_free_skb: + if(skb) + kfree_skb(skb); + return err; +} + +/* + * Queries a datapath for flow-table statistics + */ + + +static int dp_genl_table_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath* dp; + int err = 0; + + if (!info->attrs[DP_GENL_A_DP_IDX]) { + dprintk("dp::dp_genl_table_query received message with missing attributes\n"); + return -EINVAL; + } + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto err_out; + } + + err = dp_dump_table_stats(dp, nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]), info); + +err_out: + rcu_read_unlock(); + return err; +} + +/* + * Queries a datapath for flow-table entries. + */ + +static int dp_genl_flow_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath* dp; + struct ofp_flow_mod* ofm; + u16 table_idx; + int err = 0; + + if (!info->attrs[DP_GENL_A_DP_IDX] + || !info->attrs[DP_GENL_A_TABLEIDX] + || !info->attrs[DP_GENL_A_FLOW]) { + dprintk("dp::dp_genl_flow_query received message with missing attributes\n"); + return -EINVAL; + } + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto err_out; + } + + table_idx = nla_get_u16(info->attrs[DP_GENL_A_TABLEIDX]); + + if (dp->chain->n_tables <= table_idx){ + printk("table index %d invalid (dp has %d tables)\n", + table_idx, dp->chain->n_tables); + err = -EINVAL; + goto err_out; + } + + ofm = nla_data(info->attrs[DP_GENL_A_FLOW]); + err = dp_dump_table(dp, table_idx, info, ofm); + +err_out: + rcu_read_unlock(); + return err; +} + +static struct nla_policy dp_genl_flow_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_TABLEIDX] = { .type = NLA_U16 }, + [DP_GENL_A_NUMFLOWS] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_query_flow = { + .cmd = DP_GENL_C_QUERY_FLOW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_flow_policy, + .doit = dp_genl_flow_query, + .dumpit = NULL, +}; + +static struct nla_policy dp_genl_table_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_query_table = { + .cmd = DP_GENL_C_QUERY_TABLE, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_table_policy, + .doit = dp_genl_table_query, + .dumpit = NULL, +}; + + +static struct genl_ops dp_genl_ops_query_dp = { + .cmd = DP_GENL_C_QUERY_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_query, + .dumpit = NULL, +}; + +static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + struct net_device *port; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_PORTNAME]) + return -EINVAL; + + /* Get datapath. */ + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto out; + } + + /* Get interface to add/remove. */ + port = dev_get_by_name(&init_net, + nla_data(info->attrs[DP_GENL_A_PORTNAME])); + if (!port) { + err = -ENOENT; + goto out; + } + + /* Execute operation. */ + if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT) + err = add_switch_port(dp, port); + else { + if (port->br_port == NULL || port->br_port->dp != dp) { + err = -ENOENT; + goto out_put; + } + err = del_switch_port(port->br_port); + } + +out_put: + dev_put(port); +out: + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_add_port = { + .cmd = DP_GENL_C_ADD_PORT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add_del_port, + .dumpit = NULL, +}; + +static struct genl_ops dp_genl_ops_del_port = { + .cmd = DP_GENL_C_DEL_PORT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add_del_port, + .dumpit = NULL, +}; + +static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW]; + struct datapath *dp; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !va) + return -EINVAL; + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto out; + } + + va = info->attrs[DP_GENL_A_OPENFLOW]; + + err = fwd_control_input(dp->chain, nla_data(va), nla_len(va)); + +out: + rcu_read_unlock(); + return err; +} + +static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_openflow = { + .cmd = DP_GENL_C_OPENFLOW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_openflow_policy, + .doit = dp_genl_openflow, + .dumpit = NULL, +}; + +static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_NPACKETS] = { .type = NLA_U32 }, + [DP_GENL_A_PSIZE] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_benchmark_nl = { + .cmd = DP_GENL_C_BENCHMARK_NL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_benchmark_policy, + .doit = dp_genl_benchmark_nl, + .dumpit = NULL, +}; + +static struct genl_ops *dp_genl_all_ops[] = { + /* Keep this operation first. Generic Netlink dispatching + * looks up operations with linear search, so we want it at the + * front. */ + &dp_genl_ops_openflow, + + &dp_genl_ops_query_flow, + &dp_genl_ops_query_table, + &dp_genl_ops_show_dp, + &dp_genl_ops_add_dp, + &dp_genl_ops_del_dp, + &dp_genl_ops_query_dp, + &dp_genl_ops_add_port, + &dp_genl_ops_del_port, + &dp_genl_ops_benchmark_nl, +}; + +static int dp_init_netlink(void) +{ + int err; + int i; + + err = genl_register_family(&dp_genl_family); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) { + err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]); + if (err) + goto err_unregister; + } + + strcpy(mc_group.name, "openflow"); + err = genl_register_mc_group(&dp_genl_family, &mc_group); + if (err < 0) + goto err_unregister; + + return 0; + +err_unregister: + genl_unregister_family(&dp_genl_family); + return err; +} + +static void dp_uninit_netlink(void) +{ + genl_unregister_family(&dp_genl_family); +} + +#define DRV_NAME "openflow" +#define DRV_VERSION VERSION +#define DRV_DESCRIPTION "OpenFlow switching datapath implementation" +#define DRV_COPYRIGHT "Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University" + + +static int __init dp_init(void) +{ + int err; + + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n"); + printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); + + err = flow_init(); + if (err) + goto error; + + err = dp_init_netlink(); + if (err) + goto error_flow_exit; + + /* Hook into callback used by the bridge to intercept packets. + * Parasites we are. */ + if (br_handle_frame_hook) + printk("openflow: hijacking bridge hook\n"); + br_handle_frame_hook = dp_frame_hook; + + return 0; + +error_flow_exit: + flow_exit(); +error: + printk(KERN_EMERG "openflow: failed to install!"); + return err; +} + +static void dp_cleanup(void) +{ + fwd_exit(); + dp_uninit_netlink(); + flow_exit(); + br_handle_frame_hook = NULL; +} + +module_init(dp_init); +module_exit(dp_cleanup); + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR(DRV_COPYRIGHT); +MODULE_LICENSE("GPL"); diff --git a/datapath/datapath.h b/datapath/datapath.h new file mode 100644 index 00000000..cba2b793 --- /dev/null +++ b/datapath/datapath.h @@ -0,0 +1,72 @@ +/* Interface exported by OpenFlow module. */ + +#ifndef DATAPATH_H +#define DATAPATH_H 1 + +#include +#include +#include +#include "openflow.h" +#include "flow.h" + + +#define NL_FLOWS_PER_MESSAGE 100 + +#ifdef NDEBUG +#define dprintk(x...) +#else +#define dprintk(x...) printk(x) +#endif + +/* Capabilities supported by this implementation. */ +#define OFP_SUPPORTED_CAPABILITIES (OFPC_MULTI_PHY_TX) + +/* Actions supported by this implementation. */ +#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ + | (1 << OFPAT_SET_DL_VLAN) \ + | (1 << OFPAT_SET_DL_SRC) \ + | (1 << OFPAT_SET_DL_DST) \ + | (1 << OFPAT_SET_NW_SRC) \ + | (1 << OFPAT_SET_NW_DST) \ + | (1 << OFPAT_SET_TP_SRC) \ + | (1 << OFPAT_SET_TP_DST) ) + +struct sk_buff; + +struct datapath { + int dp_idx; + + /* Unique identifier for this datapath, incorporates the dp_idx and + * a hardware address */ + uint64_t id; + + struct timer_list timer; /* Expiration timer. */ + struct sw_chain *chain; /* Forwarding rules. */ + + /* Data related to the "of" device of this datapath */ + struct net_device dev; + struct net_device_stats stats; + + /* Flags from the control hello message */ + uint16_t hello_flags; + + /* Maximum number of bytes that should be sent for flow misses */ + uint16_t miss_send_len; + + /* Switch ports. */ + struct net_bridge_port *ports[OFPP_MAX]; + struct list_head port_list; /* List of ports, for flooding. */ +}; + +int dp_output_port(struct datapath *, struct sk_buff *, int out_port); +int dp_output_control(struct datapath *, struct sk_buff *, + uint32_t buffer_id, size_t max_len, int reason); +int dp_set_origin(struct datapath *, uint16_t, struct sk_buff *); +int dp_send_hello(struct datapath *); +int dp_send_flow_expired(struct datapath *, struct sw_flow *); +int dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp); + +/* Should hold at least RCU read lock when calling */ +struct datapath *dp_get(int dp_idx); + +#endif /* datapath.h */ diff --git a/datapath/datapath_t.c b/datapath/datapath_t.c new file mode 100644 index 00000000..33a64a60 --- /dev/null +++ b/datapath/datapath_t.c @@ -0,0 +1,118 @@ +#include "datapath_t.h" +#include +#include +#include +#include +#include +#include + +#include "datapath.h" + +static struct sk_buff * +gen_sk_buff(struct datapath *dp, uint32_t packet_size) +{ + int in_port; + struct sk_buff *skb; + struct ethhdr *eh; + struct iphdr *ih; + struct udphdr *uh; + + for (in_port = 0; in_port < OFPP_MAX; in_port++) { + if (dp->ports[in_port] != NULL) + break; + } + + if (in_port == OFPP_MAX) { + printk("benchmark: no in_port to send packets as\n"); + return NULL; + } + + skb = alloc_skb(packet_size, GFP_ATOMIC); + if (!skb) { + printk("benchmark: cannot allocate skb for benchmark\n"); + return NULL; + } + + skb_put(skb, packet_size); + skb_set_mac_header(skb, 0); + eh = eth_hdr(skb); + memcpy(eh->h_dest, "\x12\x34\x56\x78\x9a\xbc", ETH_ALEN); + memcpy(eh->h_source, "\xab\xcd\xef\x12\x34\x56", ETH_ALEN); + eh->h_proto = htons(ETH_P_IP); + skb_set_network_header(skb, sizeof(*eh)); + ih = ip_hdr(skb); + ih->ihl = 5; + ih->version = IPVERSION; + ih->tos = 0; + ih->tot_len = htons(packet_size - sizeof(*eh)); + ih->id = htons(12345); + ih->frag_off = 0; + ih->ttl = IPDEFTTL; + ih->protocol = IPPROTO_UDP; + ih->check = 0; /* want this to be right?! */ + ih->saddr = 0x12345678; + ih->daddr = 0x1234abcd; + skb_set_transport_header(skb, sizeof(*eh) + sizeof(*ih)); + uh = udp_hdr(skb); + uh->source = htons(1234); + uh->dest = htons(5678); + uh->len = htons(packet_size - sizeof(*eh) - sizeof(*ih)); + uh->check = 0; + if (dp_set_origin(dp, in_port, skb)) { + printk("benchmark: could not set origin\n"); + kfree_skb(skb); + return NULL; + } + + return skb; +} + +int +dp_genl_benchmark_nl(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + uint32_t num_packets = 0; + int i, err = 0; + struct sk_buff *skb2; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_NPACKETS] + || !info->attrs[DP_GENL_A_PSIZE]) + return -EINVAL; + + num_packets = nla_get_u32((info->attrs[DP_GENL_A_NPACKETS])); + + rcu_read_lock(); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + err = -ENOENT; + else { + if (num_packets == 0) + goto benchmark_unlock; + + skb2 = gen_sk_buff(dp, nla_get_u32((info->attrs[DP_GENL_A_PSIZE]))); + if (skb2 == NULL) { + err = -ENOMEM; + goto benchmark_unlock; + } + + for (i = 0; i < num_packets; i++) { + struct sk_buff *copy = skb_get(skb2); + if (copy == NULL) { + printk("benchmark: skb_get failed\n"); + err = -ENOMEM; + break; + } + if ((err = dp_output_control(dp, copy, -1, + 0, OFPR_ACTION))) + { + printk("benchmark: output control ret %d on iter %d\n", err, i); + break; + } + } + kfree_skb(skb2); + } + +benchmark_unlock: + rcu_read_unlock(); + return err; +} diff --git a/datapath/datapath_t.h b/datapath/datapath_t.h new file mode 100644 index 00000000..868e734d --- /dev/null +++ b/datapath/datapath_t.h @@ -0,0 +1,12 @@ +#ifndef DATAPATH_T_H +#define DATAPATH_T_H 1 + +#include +#include +#include +#include +#include "openflow-netlink.h" + +int dp_genl_benchmark_nl(struct sk_buff *, struct genl_info *); + +#endif diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c new file mode 100644 index 00000000..e2ebf5a4 --- /dev/null +++ b/datapath/dp_dev.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include + +#include "datapath.h" +#include "forward.h" + +static int dp_dev_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + printk("xxx_do_ioctl called\n"); + return 0; +} + +static struct net_device_stats *dp_dev_get_stats(struct net_device *dev) +{ + struct datapath *dp = netdev_priv(dev); + return &dp->stats; +} + +int dp_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct datapath *dp = netdev_priv(dev); + + printk("xxx dp_dev_xmit not implemented yet!\n"); + return 0; + + printk("xxx_xmit called send to dp_frame_hook\n"); + + rcu_read_lock(); /* xxx Only for 2.4 kernels? */ + fwd_port_input(dp->chain, skb, OFPP_LOCAL); + rcu_read_unlock(); /* xxx Only for 2.4 kernels? */ + + return 0; +} + +static int dp_dev_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +static void dp_dev_set_multicast_list(struct net_device *dev) +{ + printk("xxx_set_multi called\n"); +} + +static int dp_dev_stop(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +int dp_dev_setup(struct net_device *dev) +{ + int err; + + strncpy(dev->name, "of%d", IFNAMSIZ); + err = dev_alloc_name(dev, dev->name); + if (err < 0) + return err; + + dev->do_ioctl = dp_dev_do_ioctl; + dev->get_stats = dp_dev_get_stats; + dev->hard_start_xmit = dp_dev_xmit; + dev->open = dp_dev_open; + dev->set_multicast_list = dp_dev_set_multicast_list; + dev->stop = dp_dev_stop; + dev->tx_queue_len = 0; + dev->set_mac_address = NULL; + + dev->flags = IFF_BROADCAST | IFF_NOARP | IFF_MULTICAST; + + random_ether_addr(dev->dev_addr); + + ether_setup(dev); + return register_netdevice(dev); +} diff --git a/datapath/flow.c b/datapath/flow.c new file mode 100644 index 00000000..5aa726d8 --- /dev/null +++ b/datapath/flow.c @@ -0,0 +1,311 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "flow.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow.h" +#include "compat.h" +#include "snap.h" + +struct kmem_cache *flow_cache; + +/* Internal function used to compare fields in flow. */ +static inline +int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b, + uint16_t w) +{ + return ((w & OFPFW_IN_PORT || a->in_port == b->in_port) + && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan) + && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN)) + && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN)) + && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type) + && (w & OFPFW_NW_SRC || a->nw_src == b->nw_src) + && (w & OFPFW_NW_DST || a->nw_dst == b->nw_dst) + && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto) + && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src) + && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst)); +} + +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal + * modulo wildcards, zero otherwise. */ +inline +int flow_matches(const struct sw_flow_key *a, const struct sw_flow_key *b) +{ + return flow_fields_match(a, b, (a->wildcards | b->wildcards)); +} + +/* Returns nonzero if 't' (the table entry's key) and 'd' (the key + * describing the deletion) match, that is, if their fields are + * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the + * wildcards must match in both 't_key' and 'd_key'. Note that the + * table's wildcards are ignored unless 'strict' is set. */ +inline +int flow_del_matches(const struct sw_flow_key *t, const struct sw_flow_key *d, int strict) +{ + if (strict && (t->wildcards != d->wildcards)) + return 0; + + return flow_fields_match(t, d, d->wildcards); +} + +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) +{ + to->wildcards = ntohs(from->wildcards) & OFPFW_ALL; + to->in_port = from->in_port; + to->dl_vlan = from->dl_vlan; + memcpy(to->dl_src, from->dl_src, ETH_ALEN); + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); + to->dl_type = from->dl_type; + to->nw_src = from->nw_src; + to->nw_dst = from->nw_dst; + to->nw_proto = from->nw_proto; + to->tp_src = from->tp_src; + to->tp_dst = from->tp_dst; + memset(to->pad, '\0', sizeof(to->pad)); +} + +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from) +{ + to->wildcards = htons(from->wildcards); + to->in_port = from->in_port; + to->dl_vlan = from->dl_vlan; + memcpy(to->dl_src, from->dl_src, ETH_ALEN); + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); + to->dl_type = from->dl_type; + to->nw_src = from->nw_src; + to->nw_dst = from->nw_dst; + to->nw_proto = from->nw_proto; + to->tp_src = from->tp_src; + to->tp_dst = from->tp_dst; + memset(to->pad, '\0', sizeof(to->pad)); +} + +/* Returns true if 'flow' can be deleted and set up for a deferred free, false + * if deletion has already been scheduled (by another thread). + * + * Caller must hold rcu_read_lock. */ +int flow_del(struct sw_flow *flow) +{ + return !atomic_cmpxchg(&flow->deleted, 0, 1); +} + +/* Allocates and returns a new flow with 'n_actions' action, using allocation + * flags 'flags'. Returns the new flow or a null pointer on failure. */ +struct sw_flow *flow_alloc(int n_actions, gfp_t flags) +{ + struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags); + if (unlikely(!flow)) + return NULL; + + flow->n_actions = n_actions; + flow->actions = kmalloc(n_actions * sizeof *flow->actions, + flags); + if (unlikely(!flow->actions) && n_actions > 0) { + kmem_cache_free(flow_cache, flow); + return NULL; + } + return flow; +} + +/* Frees 'flow' immediately. */ +void flow_free(struct sw_flow *flow) +{ + if (unlikely(!flow)) + return; + kfree(flow->actions); + kmem_cache_free(flow_cache, flow); +} + +/* RCU callback used by flow_deferred_free. */ +static void rcu_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + flow_free(flow); +} + +/* Schedules 'flow' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void flow_deferred_free(struct sw_flow *flow) +{ + call_rcu(&flow->rcu, rcu_callback); +} + +/* Prints a representation of 'key' to the kernel log. */ +void print_flow(const struct sw_flow_key *key) +{ + printk("wild%04x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x" + "->%02x:%02x:%02x:%02x:%02x:%02x " + "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n", + key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan), + key->dl_src[0], key->dl_src[1], key->dl_src[2], + key->dl_src[3], key->dl_src[4], key->dl_src[5], + key->dl_dst[0], key->dl_dst[1], key->dl_dst[2], + key->dl_dst[3], key->dl_dst[4], key->dl_dst[5], + ntohs(key->dl_type), + ((unsigned char *)&key->nw_src)[0], + ((unsigned char *)&key->nw_src)[1], + ((unsigned char *)&key->nw_src)[2], + ((unsigned char *)&key->nw_src)[3], + ((unsigned char *)&key->nw_dst)[0], + ((unsigned char *)&key->nw_dst)[1], + ((unsigned char *)&key->nw_dst)[2], + ((unsigned char *)&key->nw_dst)[3], + ntohs(key->tp_src), ntohs(key->tp_dst)); +} + +uint32_t hash_in6(const struct in6_addr *in) +{ + return (in->s6_addr32[0] ^ in->s6_addr32[1] + ^ in->s6_addr32[2] ^ in->s6_addr32[3]); +} + +// with inspiration from linux/if_arp.h +struct arp_eth_hdr { + uint16_t ar_hrd; /* format of hardware address */ + uint16_t ar_pro; /* format of protocol address */ + uint8_t ar_hln; /* length of hardware address */ + uint8_t ar_pln; /* length of protocol address */ + uint16_t ar_op; /* ARP opcode (command) */ + + uint8_t ar_sha[ETH_ALEN]; /* source hardware addr */ + uint32_t ar_sip; /* source protocol addr */ + uint8_t ar_tha[ETH_ALEN]; /* dest hardware addr */ + uint32_t ar_tip; /* dest protocol addr */ +} __attribute__((packed)); + +/* Parses the Ethernet frame in 'skb', which was received on 'in_port', + * and initializes 'key' to match. */ +void flow_extract(struct sk_buff *skb, uint16_t in_port, + struct sw_flow_key *key) +{ + struct ethhdr *mac; + struct udphdr *th; + int nh_ofs, th_ofs; + + key->in_port = htons(in_port); + key->wildcards = 0; + memset(key->pad, '\0', sizeof(key->pad)); + + /* This code doesn't check that skb->len is long enough to contain the + * MAC or network header. With a 46-byte minimum length frame this + * assumption is always correct. */ + + /* Doesn't verify checksums. Should it? */ + + /* Data link layer. We only support Ethernet. */ + mac = eth_hdr(skb); + nh_ofs = sizeof(struct ethhdr); + if (likely(ntohs(mac->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF)) { + /* This is an Ethernet II frame */ + key->dl_type = mac->h_proto; + } else { + /* This is an 802.2 frame */ + if (snap_get_ethertype(skb, &key->dl_type) != -EINVAL) { + nh_ofs += sizeof(struct snap_hdr); + } else { + key->dl_type = OFP_DL_TYPE_NOT_ETH_TYPE; + nh_ofs += sizeof(struct llc_pdu_un); + } + } + + /* Check for a VLAN tag */ + if (likely(key->dl_type != __constant_htons(ETH_P_8021Q))) { + key->dl_vlan = __constant_htons(OFP_VLAN_NONE); + } else { + struct vlan_hdr *vh = (struct vlan_hdr *)(skb_mac_header(skb) + nh_ofs); + key->dl_type = vh->h_vlan_encapsulated_proto; + key->dl_vlan = vh->h_vlan_TCI & __constant_htons(VLAN_VID_MASK); + nh_ofs += sizeof(*vh); + } + memcpy(key->dl_src, mac->h_source, ETH_ALEN); + memcpy(key->dl_dst, mac->h_dest, ETH_ALEN); + skb_set_network_header(skb, nh_ofs); + + /* Network layer. */ + if (likely(key->dl_type == htons(ETH_P_IP))) { + struct iphdr *nh = ip_hdr(skb); + key->nw_src = nh->saddr; + key->nw_dst = nh->daddr; + key->nw_proto = nh->protocol; + th_ofs = nh_ofs + nh->ihl * 4; + skb_set_transport_header(skb, th_ofs); + + /* Transport layer. */ + if ((key->nw_proto != IPPROTO_TCP && key->nw_proto != IPPROTO_UDP) + || skb->len < th_ofs + sizeof(struct udphdr)) { + goto no_th; + } + th = udp_hdr(skb); + key->tp_src = th->source; + key->tp_dst = th->dest; + + return; + } else if (key->dl_type == htons(ETH_P_IPV6)) { + struct ipv6hdr *nh = ipv6_hdr(skb); + key->nw_src = hash_in6(&nh->saddr); + key->nw_dst = hash_in6(&nh->daddr); + /* FIXME: Need to traverse next-headers until we find the + * upper-layer header. */ + key->nw_proto = 0; + goto no_th; + } else if (key->dl_type == htons(ETH_P_ARP)) { + /* just barely within 46-byte minimum packet */ + struct arp_eth_hdr *ah = (struct arp_eth_hdr *)skb_network_header(skb); + if (ah->ar_hrd == htons(ARPHRD_ETHER) + && ah->ar_pro == htons(ETH_P_IP) + && ah->ar_hln == ETH_ALEN + && ah->ar_pln == sizeof(key->nw_src)) + { + /* check if sha/tha match dl_src/dl_dst? */ + key->nw_src = ah->ar_sip; + key->nw_dst = ah->ar_tip; + key->nw_proto = 0; + goto no_th; + } + } else { + /* Fall through. */ + } + + key->nw_src = 0; + key->nw_dst = 0; + key->nw_proto = 0; + +no_th: + key->tp_src = 0; + key->tp_dst = 0; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int flow_init(void) +{ + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} + diff --git a/datapath/flow.h b/datapath/flow.h new file mode 100644 index 00000000..5faeaf9d --- /dev/null +++ b/datapath/flow.h @@ -0,0 +1,131 @@ +#ifndef FLOW_H +#define FLOW_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow.h" + +struct sk_buff; +struct ofp_flow_mod; + +/* Identification data for a flow. + Network byte order except for the "wildcards" field. + In decreasing order by size, so that sw_flow_key structures can + be hashed or compared bytewise. + It might be useful to reorder members from (expected) greatest to least + inter-flow variability, so that failing bytewise comparisons with memcmp + terminate as quickly as possible on average. */ +struct sw_flow_key { + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ + uint16_t wildcards; /* Wildcard fields (host byte order). */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t pad[3]; /* NB: Pad to make 32-bit aligned */ +}; + +/* We need to manually make sure that the structure is 32-bit aligned, + * since we don't want garbage values in compiler-generated pads from + * messing up hash matches. + */ +static inline void check_key_align(void) +{ + BUILD_BUG_ON(sizeof(struct sw_flow_key) != 36); +} + +/* Maximum number of actions in a single flow entry. */ +#define MAX_ACTIONS 16 + +/* Locking: + * + * - Readers must take rcu_read_lock and hold it the entire time that the flow + * must continue to exist. Readers need not take delete_lock. They *may* + * examine 'deleted' *if* it is important not to read stale data. + * + * - Deleters must take rcu_read_lock and call flow_del to verify that another + * thread has not already deleted the flow. If not, do a deferred free of + * the flow with call_rcu, then rcu_assign_pointer or [h]list_del_rcu the + * flow. + * + * - In-place update not yet contemplated. + */ +struct sw_flow { + struct sw_flow_key key; + + uint32_t group_id; /* Flow group ID (for QoS). */ + uint16_t max_idle; /* Idle time before discarding (seconds). */ + unsigned long timeout; /* Expiration time (in jiffies). */ + + /* FIXME? Probably most flows have only a single action. */ + unsigned int n_actions; + struct ofp_action *actions; + + /* For use by table implementation. */ + union { + struct list_head node; + struct hlist_node hnode; + } u; + + spinlock_t lock; /* Lock this entry...mostly for stat updates */ + unsigned long init_time; /* When the flow was created (in jiffies). */ + uint64_t packet_count; /* Number of packets associated with this entry */ + uint64_t byte_count; /* Number of bytes associated with this entry */ + + atomic_t deleted; /* 0 if not deleted, 1 if deleted. */ + struct rcu_head rcu; +}; + +int flow_matches(const struct sw_flow_key *, const struct sw_flow_key *); +int flow_del_matches(const struct sw_flow_key *, const struct sw_flow_key *, + int); +struct sw_flow *flow_alloc(int n_actions, gfp_t flags); +void flow_free(struct sw_flow *); +void flow_deferred_free(struct sw_flow *); +void flow_extract(struct sk_buff *, uint16_t in_port, struct sw_flow_key *); +int flow_del(struct sw_flow *); +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from); +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from); + +void print_flow(const struct sw_flow_key *); + +#include +static inline int flow_timeout(struct sw_flow *flow) +{ + if (flow->max_idle == OFP_FLOW_PERMANENT) + return 0; + + return time_after(jiffies, flow->timeout); +} + +static inline void flow_used(struct sw_flow *flow, struct sk_buff *skb) +{ + unsigned long flags; + + if (flow->max_idle != OFP_FLOW_PERMANENT) + flow->timeout = jiffies + HZ * flow->max_idle; + + spin_lock_irqsave(&flow->lock, flags); + flow->packet_count++; + flow->byte_count += skb->len; + spin_unlock_irqrestore(&flow->lock, flags); +} + +extern struct kmem_cache *flow_cache; + +int flow_init(void); +void flow_exit(void); + +#endif /* flow.h */ diff --git a/datapath/forward.c b/datapath/forward.c new file mode 100644 index 00000000..37dcd689 --- /dev/null +++ b/datapath/forward.c @@ -0,0 +1,585 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "forward.h" +#include "datapath.h" +#include "chain.h" +#include "flow.h" + +/* FIXME: do we need to use GFP_ATOMIC everywhere here? */ + +static void execute_actions(struct datapath *, struct sk_buff *, + const struct sw_flow_key *, + const struct ofp_action *, int n_actions); +static int make_writable(struct sk_buff **); + +static struct sk_buff *retrieve_skb(uint32_t id); +static void discard_skb(uint32_t id); + +/* 'skb' was received on 'in_port', a physical switch port between 0 and + * OFPP_MAX. Process it according to 'chain'. */ +void fwd_port_input(struct sw_chain *chain, struct sk_buff *skb, int in_port) +{ + struct sw_flow_key key; + struct sw_flow *flow; + + flow_extract(skb, in_port, &key); + flow = chain_lookup(chain, &key); + if (likely(flow != NULL)) { + flow_used(flow, skb); + execute_actions(chain->dp, skb, &key, + flow->actions, flow->n_actions); + } else { + dp_output_control(chain->dp, skb, fwd_save_skb(skb), + chain->dp->miss_send_len, OFPR_NO_MATCH); + } +} + +static int do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len, + int out_port) +{ + if (!skb) + return -ENOMEM; + return (likely(out_port != OFPP_CONTROLLER) + ? dp_output_port(dp, skb, out_port) + : dp_output_control(dp, skb, fwd_save_skb(skb), + max_len, OFPR_ACTION)); +} + +static void execute_actions(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, + const struct ofp_action *actions, int n_actions) +{ + /* Every output action needs a separate clone of 'skb', but the common + * case is just a single output action, so that doing a clone and + * then freeing the original skbuff is wasteful. So the following code + * is slightly obscure just to avoid that. */ + int prev_port; + size_t max_len=0; /* Initialze to make compiler happy */ + uint16_t eth_proto; + int i; + + prev_port = -1; + eth_proto = ntohs(key->dl_type); + + for (i = 0; i < n_actions; i++) { + const struct ofp_action *a = &actions[i]; + + if (prev_port != -1) { + do_output(dp, skb_clone(skb, GFP_ATOMIC), + max_len, prev_port); + prev_port = -1; + } + + if (likely(a->type == ntohs(OFPAT_OUTPUT))) { + prev_port = ntohs(a->arg.output.port); + max_len = ntohs(a->arg.output.max_len); + } else { + if (!make_writable(&skb)) { + printk("make_writable failed\n"); + break; + } + skb = execute_setter(skb, eth_proto, key, a); + } + } + if (prev_port != -1) + do_output(dp, skb, max_len, prev_port); + else + kfree_skb(skb); +} + +/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field + * covered by the sum has been changed from 'from' to 'to'. If set, + * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. + * Based on nf_proto_csum_replace4. */ +static void update_csum(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) +{ + __be32 diff[] = { ~from, to }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + csum_unfold(*sum))); +} + +static void modify_nh(struct sk_buff *skb, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_P_IP) { + struct iphdr *nh = ip_hdr(skb); + uint32_t new, *field; + + new = a->arg.nw_addr; + + if (a->type == OFPAT_SET_NW_SRC) + field = &nh->saddr; + else + field = &nh->daddr; + + if (nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + update_csum(&th->check, skb, *field, new, 1); + } else if (nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + update_csum(&th->check, skb, *field, new, 1); + } + update_csum(&nh->check, skb, *field, new, 0); + *field = new; + } +} + +static void modify_th(struct sk_buff *skb, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_P_IP) { + uint16_t new, *field; + + new = a->arg.tp; + + if (nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + + if (a->type == OFPAT_SET_TP_SRC) + field = &th->source; + else + field = &th->dest; + + update_csum(&th->check, skb, *field, new, 1); + *field = new; + } else if (nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + + if (a->type == OFPAT_SET_TP_SRC) + field = &th->source; + else + field = &th->dest; + + update_csum(&th->check, skb, *field, new, 1); + *field = new; + } + } +} + +static struct sk_buff *vlan_pull_tag(struct sk_buff *skb) +{ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + struct ethhdr *eh; + + + /* Verify we were given a vlan packet */ + if (vh->h_vlan_proto != __constant_htons(ETH_P_8021Q)) + return skb; + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); + + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); + + skb->protocol = eh->h_proto; + skb->mac_header += VLAN_HLEN; + + return skb; +} + +static struct sk_buff *modify_vlan(struct sk_buff *skb, + const struct sw_flow_key *key, const struct ofp_action *a) +{ + uint16_t new_id = a->arg.vlan_id; + + if (new_id != OFP_VLAN_NONE) { + if (key->dl_vlan != __constant_htons(OFP_VLAN_NONE)) { + /* Modify vlan id, but maintain other TCI values */ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + vh->h_vlan_TCI = (vh->h_vlan_TCI + & ~(__constant_htons(VLAN_VID_MASK))) | htons(new_id); + } else { + /* Add vlan header */ + skb = vlan_put_tag(skb, new_id); + } + } else { + /* Remove an existing vlan header if it exists */ + vlan_pull_tag(skb); + } + + return skb; +} + +struct sk_buff *execute_setter(struct sk_buff *skb, uint16_t eth_proto, + const struct sw_flow_key *key, const struct ofp_action *a) +{ + switch (a->type) { + case OFPAT_SET_DL_VLAN: + skb = modify_vlan(skb, key, a); + break; + + case OFPAT_SET_DL_SRC: { + struct ethhdr *eh = eth_hdr(skb); + memcpy(eh->h_source, a->arg.dl_addr, sizeof eh->h_source); + break; + } + case OFPAT_SET_DL_DST: { + struct ethhdr *eh = eth_hdr(skb); + memcpy(eh->h_dest, a->arg.dl_addr, sizeof eh->h_dest); + break; + } + + case OFPAT_SET_NW_SRC: + case OFPAT_SET_NW_DST: + modify_nh(skb, eth_proto, key->nw_proto, a); + break; + + case OFPAT_SET_TP_SRC: + case OFPAT_SET_TP_DST: + modify_th(skb, eth_proto, key->nw_proto, a); + break; + + default: + BUG(); + } + + return skb; +} + +static int +recv_control_hello(struct sw_chain *chain, const void *msg) +{ + const struct ofp_control_hello *och = msg; + + printk("control_hello(version=%d)\n", ntohl(och->version)); + + if (ntohs(och->miss_send_len) != OFP_MISS_SEND_LEN_UNCHANGED) { + chain->dp->miss_send_len = ntohs(och->miss_send_len); + } + + chain->dp->hello_flags = ntohs(och->flags); + + dp_send_hello(chain->dp); + + return 0; +} + +static int +recv_packet_out(struct sw_chain *chain, const void *msg) +{ + const struct ofp_packet_out *opo = msg; + struct sk_buff *skb; + struct vlan_ethhdr *mac; + int nh_ofs; + + if (ntohl(opo->buffer_id) == (uint32_t) -1) { + int data_len = ntohs(opo->header.length) - sizeof *opo; + + /* FIXME: there is likely a way to reuse the data in msg. */ + skb = alloc_skb(data_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* FIXME? We don't reserve NET_IP_ALIGN or NET_SKB_PAD since + * we're just transmitting this raw without examining anything + * at those layers. */ + memcpy(skb_put(skb, data_len), opo->u.data, data_len); + dp_set_origin(chain->dp, ntohs(opo->in_port), skb); + + skb_set_mac_header(skb, 0); + mac = vlan_eth_hdr(skb); + if (likely(mac->h_vlan_proto != htons(ETH_P_8021Q))) + nh_ofs = sizeof(struct ethhdr); + else + nh_ofs = sizeof(struct vlan_ethhdr); + skb_set_network_header(skb, nh_ofs); + + dp_output_port(chain->dp, skb, ntohs(opo->out_port)); + } else { + struct sw_flow_key key; + int n_acts; + + skb = retrieve_skb(ntohl(opo->buffer_id)); + if (!skb) + return -ESRCH; + dp_set_origin(chain->dp, ntohs(opo->in_port), skb); + + n_acts = (ntohs(opo->header.length) - sizeof *opo) + / sizeof *opo->u.actions; + flow_extract(skb, ntohs(opo->in_port), &key); + execute_actions(chain->dp, skb, &key, opo->u.actions, n_acts); + } + return 0; +} + +static int +recv_port_mod(struct sw_chain *chain, const void *msg) +{ + const struct ofp_port_mod *opm = msg; + + dp_update_port_flags(chain->dp, &opm->desc); + + return 0; +} + +static int +add_flow(struct sw_chain *chain, const struct ofp_flow_mod *ofm) +{ + int error = -ENOMEM; + int n_acts; + struct sw_flow *flow; + + + /* Check number of actions. */ + n_acts = (ntohs(ofm->header.length) - sizeof *ofm) / sizeof *ofm->actions; + if (n_acts > MAX_ACTIONS) { + error = -E2BIG; + goto error; + } + + /* Allocate memory. */ + flow = flow_alloc(n_acts, GFP_ATOMIC); + if (flow == NULL) + goto error; + + /* Fill out flow. */ + flow_extract_match(&flow->key, &ofm->match); + flow->group_id = ntohl(ofm->group_id); + flow->max_idle = ntohs(ofm->max_idle); + flow->timeout = jiffies + flow->max_idle * HZ; + flow->n_actions = n_acts; + flow->init_time = jiffies; + flow->byte_count = 0; + flow->packet_count = 0; + atomic_set(&flow->deleted, 0); + spin_lock_init(&flow->lock); + memcpy(flow->actions, ofm->actions, n_acts * sizeof *flow->actions); + + /* Act. */ + error = chain_insert(chain, flow); + if (error) + goto error_free_flow; + error = 0; + if (ntohl(ofm->buffer_id) != (uint32_t) -1) { + struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id)); + if (skb) { + struct sw_flow_key key; + flow_used(flow, skb); + flow_extract(skb, ntohs(ofm->match.in_port), &key); + execute_actions(chain->dp, skb, &key, + ofm->actions, n_acts); + } + else + error = -ESRCH; + } + return error; + +error_free_flow: + flow_free(flow); +error: + if (ntohl(ofm->buffer_id) != (uint32_t) -1) + discard_skb(ntohl(ofm->buffer_id)); + return error; +} + +static int +recv_flow(struct sw_chain *chain, const void *msg) +{ + const struct ofp_flow_mod *ofm = msg; + uint16_t command = ntohs(ofm->command); + + if (command == OFPFC_ADD) { + return add_flow(chain, ofm); + } else if (command == OFPFC_DELETE) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(chain, &key, 0) ? 0 : -ESRCH; + } else if (command == OFPFC_DELETE_STRICT) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(chain, &key, 1) ? 0 : -ESRCH; + } else { + return -ENOTSUPP; + } +} + +/* 'msg', which is 'length' bytes long, was received from the control path. + * Apply it to 'chain'. */ +int +fwd_control_input(struct sw_chain *chain, const void *msg, size_t length) +{ + + struct openflow_packet { + size_t min_size; + int (*handler)(struct sw_chain *, const void *); + }; + + static const struct openflow_packet packets[] = { + [OFPT_CONTROL_HELLO] = { + sizeof (struct ofp_control_hello), + recv_control_hello, + }, + [OFPT_PACKET_OUT] = { + sizeof (struct ofp_packet_out), + recv_packet_out, + }, + [OFPT_FLOW_MOD] = { + sizeof (struct ofp_flow_mod), + recv_flow, + }, + [OFPT_PORT_MOD] = { + sizeof (struct ofp_port_mod), + recv_port_mod, + }, + }; + + const struct openflow_packet *pkt; + struct ofp_header *oh; + + if (length < sizeof(struct ofp_header)) + return -EINVAL; + + oh = (struct ofp_header *) msg; + if (oh->version != 1 || oh->type >= ARRAY_SIZE(packets) + || ntohs(oh->length) > length) + return -EINVAL; + + pkt = &packets[oh->type]; + if (!pkt->handler) + return -ENOSYS; + if (length < pkt->min_size) + return -EFAULT; + + return pkt->handler(chain, msg); +} + +/* Packet buffering. */ + +#define OVERWRITE_SECS 1 +#define OVERWRITE_JIFFIES (OVERWRITE_SECS * HZ) + +struct packet_buffer { + struct sk_buff *skb; + uint32_t cookie; + unsigned long exp_jiffies; +}; + +static struct packet_buffer buffers[N_PKT_BUFFERS]; +static unsigned int buffer_idx; +static DEFINE_SPINLOCK(buffer_lock); + +uint32_t fwd_save_skb(struct sk_buff *skb) +{ + struct packet_buffer *p; + unsigned long int flags; + uint32_t id; + + spin_lock_irqsave(&buffer_lock, flags); + buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; + p = &buffers[buffer_idx]; + if (p->skb) { + /* Don't buffer packet if existing entry is less than + * OVERWRITE_SECS old. */ + if (time_before(jiffies, p->exp_jiffies)) { + spin_unlock_irqrestore(&buffer_lock, flags); + return -1; + } else + kfree_skb(p->skb); + } + /* Don't use maximum cookie value since the all-bits-1 id is + * special. */ + if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) + p->cookie = 0; + skb_get(skb); + p->skb = skb; + p->exp_jiffies = jiffies + OVERWRITE_JIFFIES; + id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); + spin_unlock_irqrestore(&buffer_lock, flags); + + return id; +} + +static struct sk_buff *retrieve_skb(uint32_t id) +{ + unsigned long int flags; + struct sk_buff *skb = NULL; + struct packet_buffer *p; + + spin_lock_irqsave(&buffer_lock, flags); + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + skb = p->skb; + p->skb = NULL; + } else { + printk("cookie mismatch: %x != %x\n", + id >> PKT_BUFFER_BITS, p->cookie); + } + spin_unlock_irqrestore(&buffer_lock, flags); + + return skb; +} + +static void discard_skb(uint32_t id) +{ + unsigned long int flags; + struct packet_buffer *p; + + spin_lock_irqsave(&buffer_lock, flags); + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + kfree_skb(p->skb); + p->skb = NULL; + } + spin_unlock_irqrestore(&buffer_lock, flags); +} + +void fwd_exit(void) +{ + int i; + + for (i = 0; i < N_PKT_BUFFERS; i++) + kfree_skb(buffers[i].skb); +} + +/* Utility functions. */ + +/* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to + * the copy. + * Returns 1 if successful, 0 on failure. */ +static int +make_writable(struct sk_buff **pskb) +{ + /* Based on skb_make_writable() in net/netfilter/core.c. */ + struct sk_buff *nskb; + + /* Not exclusive use of packet? Must copy. */ + if (skb_shared(*pskb) || skb_cloned(*pskb)) + goto copy_skb; + + return pskb_may_pull(*pskb, 64); /* FIXME? */ + +copy_skb: + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return 0; + BUG_ON(skb_is_nonlinear(nskb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; +} diff --git a/datapath/forward.h b/datapath/forward.h new file mode 100644 index 00000000..8e92330f --- /dev/null +++ b/datapath/forward.h @@ -0,0 +1,33 @@ +#ifndef FORWARD_H +#define FORWARD_H 1 + +#include +#include "flow.h" + +struct sk_buff; +struct sw_chain; +struct ofp_action; + +/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID + * into a buffer number (low bits) and a cookie (high bits). The buffer number + * is an index into an array of buffers. The cookie distinguishes between + * different packets that have occupied a single buffer. Thus, the more + * buffers we have, the lower-quality the cookie... */ +#define PKT_BUFFER_BITS 8 +#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) +#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) + +#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) + + +void fwd_port_input(struct sw_chain *, struct sk_buff *, int in_port); +int fwd_control_input(struct sw_chain *, const void *, size_t); + +uint32_t fwd_save_skb(struct sk_buff *skb); + +void fwd_exit(void); + +struct sk_buff *execute_setter(struct sk_buff *, uint16_t, + const struct sw_flow_key *, const struct ofp_action *); + +#endif /* forward.h */ diff --git a/datapath/forward_t.c b/datapath/forward_t.c new file mode 100644 index 00000000..ef284621 --- /dev/null +++ b/datapath/forward_t.c @@ -0,0 +1,581 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "forward.h" +#include "tests/forward_t.h" +#include "openflow.h" +#include "unit.h" +#include "flow.h" + +/* + * Tests execute_settings() in forward.c to check that actions are + * appropriately taken on packets, meaning: + * + * 1. Checksums are correct. + * 2. Actions are only taken on compatible packets (IP action not taken on + * non-IP packet) + * 3. Other packet data remains untouched. + + * forward_t.h contains static packet definitions. forward_t.h should be + * generated using gen_forward_t.c. This test is run on whatever packets are + * defined in forward_t.h. + * + * NOTE: Tests assume packets in forward_t.h are present in full and IP and + * transport checksums are correct. (Can prevent offloading of checksum + * computation using ethtool. + */ + +/* + * Sets 'a->data'. If 'key' != NULL, sets 'data' to equal 'key's value for type + * specified by 'a->type'. If 'key' == NULL, sets data to a random value. + */ + +static void +set_action_data(struct sk_buff *skb, struct sw_flow_key *key, struct ofp_action *a) +{ + if (key != NULL) { + switch(a->type) { + case(OFPAT_SET_DL_SRC): + memcpy(a->arg.dl_addr, key->dl_src, sizeof key->dl_src); + break; + case(OFPAT_SET_DL_DST): + memcpy(a->arg.dl_addr, key->dl_dst, sizeof key->dl_dst); + break; + case(OFPAT_SET_NW_SRC): + if (key->dl_type == htons(ETH_P_IP)) + a->arg.nw_addr = key->nw_src; + else + a->arg.nw_addr = random32(); + break; + case(OFPAT_SET_NW_DST): + if (key->dl_type == htons(ETH_P_IP)) + a->arg.nw_addr = key->nw_dst; + else + a->arg.nw_addr = random32(); + break; + case(OFPAT_SET_TP_SRC): + if (key->nw_proto == IPPROTO_TCP || key->nw_proto == IPPROTO_UDP) + a->arg.tp = key->tp_src; + else + a->arg.tp = (uint16_t) random32(); + break; + case(OFPAT_SET_TP_DST): + if (key->nw_proto == IPPROTO_TCP || key->nw_proto == IPPROTO_UDP) + a->arg.tp = key->tp_dst; + else + a->arg.tp = (uint16_t) random32(); + break; + default: + BUG(); + } + } else { + ((uint32_t*)a->arg.dl_addr)[0] = random32(); + ((uint16_t*)a->arg.dl_addr)[2] = random32(); + } +} + + +/* + * Checks the IP sum of an IP packet. Returns 0 if correct, else -1. + */ + +static void +check_IP_csum(struct iphdr *ih) +{ + uint16_t check, *data; + uint32_t n_bytes, sum; + + check = ih->check; + ih->check = 0; + data = (uint16_t*) ih; + sum = 0; + n_bytes = ih->ihl * 4; + + while (n_bytes > 1) { + sum += ntohs(*data); + sum = (sum >> 16) + (uint16_t)sum; + data++; + n_bytes -= 2; + } + + if (n_bytes == 1) { + sum += *(uint8_t*)data; + sum = (sum >> 16) + (uint16_t)sum; + } + + ih->check = htons((uint16_t)(~sum)); + if (ih->check != check) { + unit_fail("IP checksum %hu does not match %hu", + ntohs(ih->check), ntohs(check)); + } +} + +/* + * Partially computes TCP checksum over 'n_bytes' pointed to by 'data'. Can be + * called multiple times if data csum is to be computed on is fragmented. If + * 'is_last' == 0, assumes will be called again on more data and returns the + * value that should be passed in as 'incr_sum' on the next call. Else if + * 'is_last' == 1, returns the final checksum. On the first call, 'incr_sum' + * should equal 0. If 'is_last' == 0, 'n_bytes' must be even. i.e. Should + * first be called on pseudo header fields that are multiples of two, and then + * on the TCP packet. + */ +static uint32_t +compute_transport_checksum(uint16_t *data, uint32_t n_bytes, + uint32_t incr_sum, uint8_t is_last) +{ + uint8_t arr[2]; + + if (n_bytes % 2 != 0 && is_last == 0) + BUG(); + + while (n_bytes > 1) { + incr_sum += ntohs(*data); + incr_sum = (incr_sum >> 16) + (uint16_t)incr_sum; + data++; + n_bytes -= 2; + } + + if (is_last == 0) + return incr_sum; + + if(n_bytes == 1) { + arr[0] = *(uint8_t*)data; + arr[1] = 0; + incr_sum += ntohs(*((uint16_t*)arr)); + incr_sum = (incr_sum >> 16) + (uint16_t)incr_sum; + } + + return ~incr_sum; +} + +/* + * Checks the transport layer's checksum of a packet. Returns '0' if correct, + * else '1'. 'ih' should point to the IP header of the packet, if TCP, 'th' + * should point the TCP header, and if UDP, 'uh' should point to the UDP + * header. + */ +static int +check_transport_csum(struct iphdr *ih, struct tcphdr *th, + struct udphdr *uh) +{ + uint32_t tmp; + uint16_t len, check; + uint8_t arr[2]; + + tmp = compute_transport_checksum((uint16_t*)(&ih->saddr), + 2 * sizeof ih->saddr, 0, 0); + arr[0] = 0; + arr[1] = ih->protocol; + tmp = compute_transport_checksum((uint16_t*)arr, 2, tmp, 0); + len = ntohs(ih->tot_len) - (ih->ihl * 4); + *((uint16_t*)arr) = htons(len); + tmp = compute_transport_checksum((uint16_t*)arr, 2, tmp, 0); + + if (th != NULL) { + check = th->check; + th->check = 0; + th->check = htons((uint16_t)compute_transport_checksum((uint16_t*)th, + len, tmp, 1)); + if (th->check != check) { + unit_fail("TCP checksum %hu does not match %hu", + ntohs(th->check), ntohs(check)); + return -1; + } + } else if (uh != NULL) { + check = uh->check; + uh->check = 0; + uh->check = htons((uint16_t)compute_transport_checksum((uint16_t*)uh, + len, tmp, 1)); + if (uh->check != check) { + unit_fail("UDP checksum %hu does not match %hu", + ntohs(uh->check), ntohs(check)); + return -1; + } + } + + return 0; +} + + +/* + * Compares 'pkt_len' bytes of 'data' to 'pkt'. excl_start and excl_end point + * together delineate areas of 'data' that are not supposed to match 'pkt'. + * 'num_excl' specify how many such areas exist. An 'excl_start' entry is + * ignored if it equals NULL. See 'check_packet()' for usage. + */ + +static void +compare(uint8_t *data, uint8_t *pkt, uint32_t pkt_len, + uint8_t **excl_start, uint8_t **excl_end, uint32_t num_excl) +{ + uint32_t i; + uint8_t *d, *p, *end; + int ret; + + end = data + pkt_len; + d = data; + p = pkt; + ret = 0; + + for (i = 0; i < num_excl; i++) { + if(*excl_start != NULL) { + if ((ret = memcmp(d, p, *excl_start - d)) != 0) + break; + p += (*excl_end - d); + d = *excl_end; + } + excl_start++; + excl_end++; + } + + if (ret == 0) + ret = memcmp(d, p, end - d); + + if (ret != 0) { + unit_fail("skb and packet comparison failed:"); + for (i = 0; i < pkt_len; i++) { + if (data[i] != pkt[i]) { + unit_fail("skb[%u] = 0x%x != 0x%x", + i, data[i], pkt[i]); + } + } + } +} + + +/* + * Checks that a packet's data has remained consistent after an action has been + * applied. 'skb' is the modified packet, 'a' is the action that was taken on + * the packet, 'p' is a copy of the packet's data before action 'a' was taken. + * Checks that the action was in fact taken, that the checksums of the packet + * are correct, and that no other data in the packet was altered. + */ + +static void +check_packet(struct sk_buff *skb, struct ofp_action *a, struct pkt *p) +{ + struct ethhdr *eh; + struct iphdr *ih; + struct tcphdr *th; + struct udphdr *uh; + uint8_t *excl_start[5], *excl_end[5]; + + eh = eth_hdr(skb); + ih = NULL; + th = NULL; + uh = NULL; + + memset(excl_start, 0, sizeof excl_start); + memset(excl_end, 0, sizeof excl_end); + + if (eh->h_proto == htons(ETH_P_IP)) { + ih = ip_hdr(skb); + excl_start[1] = (uint8_t*)&ih->check; + excl_end[1] = (uint8_t*)(&ih->check + 1); + if (ih->protocol == IPPROTO_TCP) { + th = tcp_hdr(skb); + excl_start[4] = (uint8_t*)&th->check; + excl_end[4] = (uint8_t*)(&th->check + 1); + } else if (ih->protocol == IPPROTO_UDP) { + uh = udp_hdr(skb); + excl_start[4] = (uint8_t*)&uh->check; + excl_end[4] = (uint8_t*)(&uh->check + 1); + } + } + + if (a != NULL) { + switch(a->type) { + case(OFPAT_SET_DL_SRC): + if (memcmp(a->arg.dl_addr, eh->h_source, sizeof eh->h_source) != 0) { + unit_fail("Source eth addr has not been set"); + return; + } + excl_start[0] = (uint8_t*)(&eh->h_source); + excl_end[0] = (uint8_t*)(&eh->h_proto); + break; + case(OFPAT_SET_DL_DST): + if (memcmp(a->arg.dl_addr, eh->h_dest, sizeof eh->h_dest) != 0) { + unit_fail("Dest eth addr has not been set"); + return; + } + excl_start[0] = (uint8_t*)(&eh->h_dest); + excl_end[0] = (uint8_t*)(&eh->h_source); + break; + case(OFPAT_SET_NW_SRC): + if (ih != NULL) { + if (a->arg.nw_addr != ih->saddr) { + unit_fail("Source IP addr has not been set"); + return; + } + excl_start[2] = (uint8_t*)(&ih->saddr); + excl_end[2] = (uint8_t*)(&ih->saddr + 1); + } + break; + case(OFPAT_SET_NW_DST): + if (ih != NULL) { + if (a->arg.nw_addr != ih->daddr) { + unit_fail("Dest IP addr has not been set"); + return; + } + excl_start[2] = (uint8_t*)(&ih->daddr); + excl_end[2] = (uint8_t*)(&ih->daddr + 1); + } + break; + case(OFPAT_SET_TP_SRC): + if (th != NULL) { + if (a->arg.tp != th->source) { + unit_fail("Source port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&th->source); + excl_end[3] = (uint8_t*)(&th->source + 1); + } else if (uh != NULL) { + if (a->arg.tp != uh->source) { + unit_fail("Source port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&uh->source); + excl_end[3] = (uint8_t*)(&uh->source + 1); + } + break; + case(OFPAT_SET_TP_DST): + if (th != NULL) { + if (a->arg.tp != th->dest) { + unit_fail("Dest port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&th->dest); + excl_end[3] = (uint8_t*)(&th->dest + 1); + } else if (uh != NULL) { + if (a->arg.tp != uh->dest) { + unit_fail("Dest port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&uh->dest); + excl_end[3] = (uint8_t*)(&uh->dest + 1); + } + break; + default: + BUG(); + } + } + + compare(skb->data, p->data, p->len, excl_start, excl_end, 5); + if (unit_failed()) + return; + + if (ih == NULL) + return; + + check_IP_csum(ih); + if (unit_failed()) + return; + + if (th == NULL && uh == NULL) + return; + + check_transport_csum(ih, th, uh); +} + +/* + * Layers 3 & 4 Tests: Given packets in forward_t.h, executes all actions + * with random data, checking for consistency described in check_packet(). + */ + +void +test_l3_l4(void) +{ + struct ofp_action action; + struct sk_buff *skb; + struct sw_flow_key key; + unsigned int i, j; + uint16_t eth_proto; + int ret = 0; + + for (i = 0; i < num_packets; i++) { + skb = alloc_skb(packets[i].len, GFP_KERNEL); + if (!skb) { + unit_fail("Couldn't allocate %uth skb", i); + return; + } + + memcpy(skb_put(skb, packets[i].len), packets[i].data, + packets[i].len); + + skb_set_mac_header(skb, 0); + flow_extract(skb, 0, &key); + eth_proto = ntohs(key.dl_type); + + check_packet(skb, NULL, packets+i); + if (unit_failed()) + return; + + for (action.type = OFPAT_SET_DL_SRC; + action.type <= OFPAT_SET_TP_DST; + action.type++) + { + set_action_data(skb, NULL, &action); + for(j = 0; j < 2; j++) { + skb = execute_setter(skb, eth_proto, &key, &action); + check_packet(skb, &action, packets+i); + if (unit_failed()) { + unit_fail("Packet %u inconsistent " + "after setter on action " + "type %d, iteration %u", + i, action.type, j); + return; + } + set_action_data(skb, &key, &action); + } + } + + kfree_skb(skb); + + if (ret != 0) + break; + } +} + +int +test_vlan(void) +{ + struct ofp_action action; + struct sk_buff *skb; + struct sw_flow_key key; + unsigned int i; + uint16_t eth_proto; + int ret = 0; + struct vlan_ethhdr *vh; + struct ethhdr *eh; + struct net_device dev; + uint16_t new_id, orig_id; + + + memset((char *)&dev, '\0', sizeof(dev)); + + printk("Testing vlan\n"); + for (i = 0; i < num_packets; i++) { + skb = alloc_skb(packets[i].len, GFP_KERNEL); + if (!skb) { + unit_fail("Couldn't allocate %uth skb", i); + return -ENOMEM; + } + + memcpy(skb_put(skb, packets[i].len), packets[i].data, + packets[i].len); + skb->dev = &dev; + + skb_set_mac_header(skb, 0); + flow_extract(skb, 0, &key); + eth_proto = ntohs(key.dl_type); + +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum unmodified", + i); + goto free_skb; + } +#endif + + eh = eth_hdr(skb); + orig_id = eh->h_proto; + + action.type = OFPAT_SET_DL_VLAN; + + // Add a random vlan tag + new_id = (uint16_t) random32() & VLAN_VID_MASK; + action.arg.vlan_id = new_id; + skb = execute_setter(skb, eth_proto, &key, &action); + vh = vlan_eth_hdr(skb); + if (ntohs(vh->h_vlan_TCI) != new_id) { + unit_fail("add: vlan id doesn't match: %#x != %#x", + ntohs(vh->h_vlan_TCI), new_id); + return -1; + } + flow_extract(skb, 0, &key); +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after adding vlan", + i); + goto free_skb; + } +#endif + + // Modify the tag + new_id = (uint16_t) random32() & VLAN_VID_MASK; + action.arg.vlan_id = new_id; + skb = execute_setter(skb, eth_proto, &key, &action); + vh = vlan_eth_hdr(skb); + if (ntohs(vh->h_vlan_TCI) != new_id) { + unit_fail("mod: vlan id doesn't match: %#x != %#x", + ntohs(vh->h_vlan_TCI), new_id); + return -1; + } + flow_extract(skb, 0, &key); +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after modifying vlan", + i); + goto free_skb; + } +#endif + + // Remove the tag + action.arg.vlan_id = OFP_VLAN_NONE; + skb = execute_setter(skb, eth_proto, &key, &action); + + eh = eth_hdr(skb); + + if (eh->h_proto != orig_id) { + unit_fail("del: vlan id doesn't match: %#x != %#x", + ntohs(eh->h_proto), ntohs(orig_id)); + return -1; + } +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after removing vlan", + i); + goto free_skb; + } + + free_skb: +#endif + + kfree_skb(skb); + + if (ret != 0) + break; + } + + if (ret == 0) + printk("\nVLAN actions test passed.\n"); + + return ret; + + +} + +/* + * Actual test: Given packets in forward_t.h, executes all actions with random + * data, checking for consistency described in check_packet(). + */ + +void +run_forward_t(void) +{ + test_vlan(); + test_l3_l4(); +} diff --git a/datapath/linux-2.4/.gitignore b/datapath/linux-2.4/.gitignore new file mode 100644 index 00000000..c7afe5b2 --- /dev/null +++ b/datapath/linux-2.4/.gitignore @@ -0,0 +1,19 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/dp_dev.c +/flow.c +/forward.c +/forward_t.c +/datapath_t.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/kernel-src.inc +/tmp diff --git a/datapath/linux-2.4/Makefile.in b/datapath/linux-2.4/Makefile.in new file mode 100644 index 00000000..8e4ba5ba --- /dev/null +++ b/datapath/linux-2.4/Makefile.in @@ -0,0 +1,100 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRC24@ +export KVERSION = 2.4 +export VMDIR = @VMDIR@ + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../dp_dev.c ../datapath_t.c + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = datapath.c $(SIMLINKFILES) + +# create local symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +COMPAT24_CFILES = \ + compat-2.4/random32.c \ + compat-2.4/genetlink.c \ + compat-2.4/netlink.c \ + compat-2.4/attr.c \ + compat-2.4/rcupdate.c \ + compat-2.4/string.c \ + compat-2.4/kernel.c \ + compat-2.4/compat24.c + +UNIT_CFILES = \ + ../table_t.c \ + ../unit.c + +SHARED_T_FILES = \ + ../forward_t.c \ + ../table_t.c \ + ../crc_t.c \ + ../unit.c +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +kFILES = ../datapath.h \ + ../chain.h \ + ../crc32.h \ + ../flow.h \ + ../forward.h \ + ../table.h \ + ../datapath_t.h \ + compat-2.4/include/linux/rcupdate.h \ + compat-2.4/include/linux/jiffies.h \ + compat-2.4/include/linux/ipv6.h \ + compat-2.4/include/linux/timer.h \ + compat-2.4/include/linux/if_vlan.h \ + compat-2.4/include/linux/types.h \ + compat-2.4/include/linux/skbuff.h \ + compat-2.4/include/linux/etherdevice.h \ + compat-2.4/include/linux/tcp.h \ + compat-2.4/include/linux/genetlink.h \ + compat-2.4/include/linux/sockios.h \ + compat-2.4/include/linux/list.h \ + compat-2.4/include/linux/udp.h \ + compat-2.4/include/linux/slab.h \ + compat-2.4/include/linux/random.h \ + compat-2.4/include/linux/mutex.h \ + compat-2.4/include/linux/ip.h \ + compat-2.4/include/linux/string.h \ + compat-2.4/include/linux/netlink.h \ + compat-2.4/include/linux/compiler.h \ + compat-2.4/include/linux/kernel.h \ + compat-2.4/include/linux/if_ether.h \ + compat-2.4/include/net/checksum.h \ + compat-2.4/include/net/genetlink.h \ + compat-2.4/include/net/netlink.h \ + compat-2.4/include/asm/system.h \ + compat-2.4/compat24.h + +VERSION = @VERSION@ + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -O2 -g +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(top_srcdir)/include + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + + +PWD := $(shell pwd) + +all: default + + +check: all diff --git a/datapath/linux-2.4/README b/datapath/linux-2.4/README new file mode 100644 index 00000000..87f4cdef --- /dev/null +++ b/datapath/linux-2.4/README @@ -0,0 +1,6 @@ +Linux kernel 2.4 specific build: + + - Backports: compat-2.4/genetlink.c is a back-port of the + Linux 2.6 Generic Netlink functionality. It must be loaded + as module genetlink_mod.o before openflow_mod.o may be + loaded. diff --git a/datapath/linux-2.4/compat-2.4/TODO b/datapath/linux-2.4/compat-2.4/TODO new file mode 100644 index 00000000..c3e45611 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/TODO @@ -0,0 +1,2 @@ +* Properly port RCU to Linux 2.4. In particular we will need support + for call_rcu to properly do flow-table updating. diff --git a/datapath/linux-2.4/compat-2.4/attr.c b/datapath/linux-2.4/compat-2.4/attr.c new file mode 100644 index 00000000..a00841cf --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/attr.c @@ -0,0 +1,436 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + +static int validate_nla(struct nlattr *nla, int maxtype, + const struct nla_policy *policy) +{ + const struct nla_policy *pt; + int minlen = 0, attrlen = nla_len(nla); + + if (nla->nla_type <= 0 || nla->nla_type > maxtype) + return 0; + + pt = &policy[nla->nla_type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); + + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; + + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; + + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + case NLA_BINARY: + if (pt->len && attrlen > pt->len) + return -ERANGE; + break; + + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } + + return 0; +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + nla_for_each_attr(nla, head, len, rem) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + err = 0; +errout: + return err; +} + +/** + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessable via the attribute type. Attributes with a type + * exceeding maxtype will be silently ignored for backwards compatibility + * reasons. policy may be set to NULL if no validation is required. + * + * Returns 0 on success or a negative error code. + */ +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla->nla_type; + + if (type > 0 && type <= maxtype) { + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + tb[type] = nla; + } + } + + if (unlikely(rem > 0)) + printk(KERN_WARNING "netlink: %d bytes leftover after parsing " + "attributes.\n", rem); + + err = 0; +errout: + return err; +} + +/** + * nla_find - Find a specific attribute in a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @attrtype: type of attribute to look for + * + * Returns the first attribute in the stream matching the specified type. + */ +struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +{ + struct nlattr *nla; + int rem; + + nla_for_each_attr(nla, head, len, rem) + if (nla->nla_type == attrtype) + return nla; + + return NULL; +} + +/** + * nla_strlcpy - Copy string attribute payload into a sized buffer + * @dst: where to copy the string to + * @src: attribute to copy the string from + * @dstsize: size of destination buffer + * + * Copies at most dstsize - 1 bytes into the destination buffer. + * The result is always a valid NUL-terminated string. Unlike + * strlcpy the destination buffer is always padded out. + * + * Returns the length of the source buffer. + */ +size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla); + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + if (dstsize > 0) { + size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; + + memset(dst, 0, dstsize); + memcpy(dst, src, len); + } + + return srclen; +} + +/** + * nla_memcpy - Copy a netlink attribute into another memory area + * @dest: where to copy to memcpy + * @src: netlink attribute to copy from + * @count: size of the destination area + * + * Note: The number of bytes copied is limited by the length of + * attribute's payload. memcpy + * + * Returns the number of bytes copied. + */ +int nla_memcpy(void *dest, struct nlattr *src, int count) +{ + int minlen = min_t(int, count, nla_len(src)); + + memcpy(dest, nla_data(src), minlen); + + return minlen; +} + +/** + * nla_memcmp - Compare an attribute with sized memory area + * @nla: netlink attribute + * @data: memory area + * @size: size of memory area + */ +int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size) +{ + int d = nla_len(nla) - size; + + if (d == 0) + d = memcmp(nla_data(nla), data, size); + + return d; +} + +/** + * nla_strcmp - Compare a string attribute against a string + * @nla: netlink string attribute + * @str: another string + */ +int nla_strcmp(const struct nlattr *nla, const char *str) +{ + int len = strlen(str) + 1; + int d = nla_len(nla) - len; + + if (d == 0) + d = memcmp(nla_data(nla), str, len); + + return d; +} + +/** + * __nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + struct nlattr *nla; + + nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla->nla_type = attrtype; + nla->nla_len = nla_attr_size(attrlen); + + memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); + + return nla; +} + +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + +/** + * nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return NULL; + + return __nla_reserve(skb, attrtype, attrlen); +} + +/** + * nla_reserve - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @len: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + +/** + * __nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nlattr *nla; + + nla = __nla_reserve(skb, attrtype, attrlen); + memcpy(nla_data(nla), data, attrlen); +} + +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} + +/** + * nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return -1; + + __nla_put(skb, attrtype, attrlen, data); + return 0; +} + +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -1; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} + +EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_parse); +EXPORT_SYMBOL(nla_find); +EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); +EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); +EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); +EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); +EXPORT_SYMBOL(nla_memcpy); +EXPORT_SYMBOL(nla_memcmp); +EXPORT_SYMBOL(nla_strcmp); diff --git a/datapath/linux-2.4/compat-2.4/compat24.c b/datapath/linux-2.4/compat-2.4/compat24.c new file mode 100644 index 00000000..13641ff1 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/compat24.c @@ -0,0 +1,27 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include "compat24.h" + +int __init compat24_init(void) +{ + int err; + + rcu_init(); + + err = random32_init(); + if (err) + return err; + + return genl_init(); + +} +module_init(compat24_init); + +void __exit compat24_exit(void) +{ + genl_exit(); +} +module_exit(compat24_exit); diff --git a/datapath/linux-2.4/compat-2.4/compat24.h b/datapath/linux-2.4/compat-2.4/compat24.h new file mode 100644 index 00000000..4e7038d8 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/compat24.h @@ -0,0 +1,11 @@ +#ifndef __COMPAT24_H +#define __COMPAT24_H 1 + +int genl_init(void); +void genl_exit(void); + +int random32_init(void); + +void rcu_init(void); + +#endif /* compat24.h */ diff --git a/datapath/linux-2.4/compat-2.4/genetlink.c b/datapath/linux-2.4/compat-2.4/genetlink.c new file mode 100644 index 00000000..c9fc55a6 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/genetlink.c @@ -0,0 +1,810 @@ +/* + * NETLINK Generic Netlink Family + * + * Authors: Jamal Hadi Salim + * Thomas Graf + * Johannes Berg + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +struct sock *genl_sock = NULL; + +static DECLARE_MUTEX(genl_mutex); /* serialization of message processing */ + +static void genl_lock(void) +{ + down(&genl_mutex); +} + +static int genl_trylock(void) +{ + return down_trylock(&genl_mutex); +} + +static void genl_unlock(void) +{ + up(&genl_mutex); + + if (genl_sock && genl_sock->receive_queue.qlen) + genl_sock->data_ready(genl_sock, 0); +} + +#define GENL_FAM_TAB_SIZE 16 +#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) + +static struct list_head family_ht[GENL_FAM_TAB_SIZE]; +/* + * Bitmap of multicast groups that are currently in use. + * + * To avoid an allocation at boot of just one unsigned long, + * declare it global instead. + * Bit 0 is marked as already used since group 0 is invalid. + */ +static unsigned long mc_group_start = 0x1; +static unsigned long *mc_groups = &mc_group_start; +static unsigned long mc_groups_longs = 1; + +static int genl_ctrl_event(int event, void *data); + +static inline unsigned int genl_family_hash(unsigned int id) +{ + return id & GENL_FAM_TAB_MASK; +} + +static inline struct list_head *genl_family_chain(unsigned int id) +{ + return &family_ht[genl_family_hash(id)]; +} + +static struct genl_family *genl_family_find_byid(unsigned int id) +{ + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(id), family_list) + if (f->id == id) + return f; + + return NULL; +} + +static struct genl_family *genl_family_find_byname(char *name) +{ + struct genl_family *f; + int i; + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) + list_for_each_entry(f, genl_family_chain(i), family_list) + if (strcmp(f->name, name) == 0) + return f; + + return NULL; +} + +static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +{ + struct genl_ops *ops; + + list_for_each_entry(ops, &family->ops_list, ops_list) + if (ops->cmd == cmd) + return ops; + + return NULL; +} + +/* Of course we are going to have problems once we hit + * 2^16 alive types, but that can only happen by year 2K +*/ +static inline u16 genl_generate_id(void) +{ + static u16 id_gen_idx; + int overflowed = 0; + + do { + if (id_gen_idx == 0) + id_gen_idx = GENL_MIN_ID; + + if (++id_gen_idx > GENL_MAX_ID) { + if (!overflowed) { + overflowed = 1; + id_gen_idx = 0; + continue; + } else + return 0; + } + + } while (genl_family_find_byid(id_gen_idx)); + + return id_gen_idx; +} + +static struct genl_multicast_group notify_grp; + +/** + * genl_register_mc_group - register a multicast group + * + * Registers the specified multicast group and notifies userspace + * about the new group. + * + * Returns 0 on success or a negative error code. + * + * @family: The generic netlink family the group shall be registered for. + * @grp: The group to register, must have a name. + */ +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + int id; + + BUG_ON(grp->name[0] == '\0'); + + genl_lock(); + + /* special-case our own group */ + if (grp == ¬ify_grp) + id = GENL_ID_CTRL; + else + id = find_first_zero_bit(mc_groups, + mc_groups_longs * BITS_PER_LONG); + + + if (id >= mc_groups_longs * BITS_PER_LONG) { + genl_unlock(); + return -ENOMEM; + } + + grp->id = id; + set_bit(id, mc_groups); + list_add_tail(&grp->list, &family->mcast_groups); + grp->family = family; + + genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); + genl_unlock(); + return 0; +} +EXPORT_SYMBOL(genl_register_mc_group); + +static void __genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + BUG_ON(grp->family != family); + + /* We should clear this multicast group from any subscribers, but 2.4 + * doesn't have the proper interface to do it, and we'd need a patch to + * implement it. */ + /*netlink_clear_multicast_users(genl_sock, grp->id);*/ + clear_bit(grp->id, mc_groups); + list_del(&grp->list); + genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, grp); + grp->id = 0; + grp->family = NULL; +} + +/** + * genl_unregister_mc_group - unregister a multicast group + * + * Unregisters the specified multicast group and notifies userspace + * about it. All current listeners on the group are removed. + * + * Note: It is not necessary to unregister all multicast groups before + * unregistering the family, unregistering the family will cause + * all assigned multicast groups to be unregistered automatically. + * + * @family: Generic netlink family the group belongs to. + * @grp: The group to unregister, must have been registered successfully + * previously. + */ +void genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + genl_lock(); + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} +EXPORT_SYMBOL(genl_unregister_mc_group); + +static void genl_unregister_mc_groups(struct genl_family *family) +{ + struct genl_multicast_group *grp, *tmp; + + genl_lock(); + list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list) + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} + +/** + * genl_register_ops - register generic netlink operations + * @family: generic netlink family + * @ops: operations to be registered + * + * Registers the specified operations and assigns them to the specified + * family. Either a doit or dumpit callback must be specified or the + * operation will fail. Only one operation structure per command + * identifier may be registered. + * + * See include/net/genetlink.h for more documenation on the operations + * structure. + * + * Returns 0 on success or a negative error code. + */ +int genl_register_ops(struct genl_family *family, struct genl_ops *ops) +{ + int err = -EINVAL; + + if (ops->dumpit == NULL && ops->doit == NULL) + goto errout; + + if (genl_get_cmd(ops->cmd, family)) { + err = -EEXIST; + goto errout; + } + + if (ops->dumpit) + ops->flags |= GENL_CMD_CAP_DUMP; + if (ops->doit) + ops->flags |= GENL_CMD_CAP_DO; + if (ops->policy) + ops->flags |= GENL_CMD_CAP_HASPOL; + + genl_lock(); + list_add_tail(&ops->ops_list, &family->ops_list); + genl_unlock(); + + genl_ctrl_event(CTRL_CMD_NEWOPS, ops); + err = 0; +errout: + return err; +} + +/** + * genl_unregister_ops - unregister generic netlink operations + * @family: generic netlink family + * @ops: operations to be unregistered + * + * Unregisters the specified operations and unassigns them from the + * specified family. The operation blocks until the current message + * processing has finished and doesn't start again until the + * unregister process has finished. + * + * Note: It is not necessary to unregister all operations before + * unregistering the family, unregistering the family will cause + * all assigned operations to be unregistered automatically. + * + * Returns 0 on success or a negative error code. + */ +int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) +{ + struct genl_ops *rc; + + genl_lock(); + list_for_each_entry(rc, &family->ops_list, ops_list) { + if (rc == ops) { + list_del(&ops->ops_list); + genl_unlock(); + genl_ctrl_event(CTRL_CMD_DELOPS, ops); + return 0; + } + } + genl_unlock(); + + return -ENOENT; +} + +/** + * genl_register_family - register a generic netlink family + * @family: generic netlink family + * + * Registers the specified family after validating it first. Only one + * family may be registered with the same family name or identifier. + * The family id may equal GENL_ID_GENERATE causing an unique id to + * be automatically generated and assigned. + * + * Return 0 on success or a negative error code. + */ +int genl_register_family(struct genl_family *family) +{ + int err = -EINVAL; + + if (family->id && family->id < GENL_MIN_ID) + goto errout; + + if (family->id > GENL_MAX_ID) + goto errout; + + INIT_LIST_HEAD(&family->ops_list); + INIT_LIST_HEAD(&family->mcast_groups); + + genl_lock(); + + if (genl_family_find_byname(family->name)) { + err = -EEXIST; + goto errout_locked; + } + + if (genl_family_find_byid(family->id)) { + err = -EEXIST; + goto errout_locked; + } + + if (family->id == GENL_ID_GENERATE) { + u16 newid = genl_generate_id(); + + if (!newid) { + err = -ENOMEM; + goto errout_locked; + } + + family->id = newid; + } + + if (family->maxattr) { + family->attrbuf = kmalloc((family->maxattr+1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (family->attrbuf == NULL) { + err = -ENOMEM; + goto errout_locked; + } + } else + family->attrbuf = NULL; + + list_add_tail(&family->family_list, genl_family_chain(family->id)); + MOD_INC_USE_COUNT; + genl_unlock(); + + genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); + + return 0; + +errout_locked: + genl_unlock(); +errout: + return err; +} + +/** + * genl_unregister_family - unregister generic netlink family + * @family: generic netlink family + * + * Unregisters the specified family. + * + * Returns 0 on success or a negative error code. + */ +int genl_unregister_family(struct genl_family *family) +{ + struct genl_family *rc; + + genl_unregister_mc_groups(family); + + genl_lock(); + + list_for_each_entry(rc, genl_family_chain(family->id), family_list) { + if (family->id != rc->id || strcmp(rc->name, family->name)) + continue; + + list_del(&rc->family_list); + INIT_LIST_HEAD(&family->ops_list); + genl_unlock(); + + kfree(family->attrbuf); + genl_ctrl_event(CTRL_CMD_DELFAMILY, family); + return 0; + } + + MOD_DEC_USE_COUNT; + genl_unlock(); + + return -ENOENT; +} + +static int null_done_func(struct netlink_callback *cb) +{ + return 0; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct genl_ops *ops; + struct genl_family *family; + struct genl_info info; + struct genlmsghdr *hdr = nlmsg_data(nlh); + int hdrlen, err; + + family = genl_family_find_byid(nlh->nlmsg_type); + if (family == NULL) + return -ENOENT; + + hdrlen = GENL_HDRLEN + family->hdrsize; + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + ops = genl_get_cmd(hdr->cmd, family); + if (ops == NULL) + return -EOPNOTSUPP; + + if ((ops->flags & GENL_ADMIN_PERM) && !capable(CAP_NET_ADMIN)) + return -EPERM; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (ops->dumpit == NULL) + return -EOPNOTSUPP; + + return netlink_dump_start(genl_sock, skb, nlh, + ops->dumpit, + ops->done ?: null_done_func); + } + + if (ops->doit == NULL) + return -EOPNOTSUPP; + + if (family->attrbuf) { + err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, + ops->policy); + if (err < 0) + return err; + } + + info.snd_seq = nlh->nlmsg_seq; + info.snd_pid = NETLINK_CB(skb).pid; + info.nlhdr = nlh; + info.genlhdr = nlmsg_data(nlh); + info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; + info.attrs = family->attrbuf; + + return ops->doit(skb, &info); +} + +static void genl_rcv(struct sock *sk, int len) +{ + unsigned int qlen = 0; + + do { + if (genl_trylock()) + return; + netlink_run_queue(sk, &qlen, genl_rcv_msg); + genl_unlock(); + } while (qlen && genl_sock && genl_sock->receive_queue.qlen); +} + +/************************************************************************** + * Controller + **************************************************************************/ + +static struct genl_family genl_ctrl = { + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, +}; + +static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, + u32 flags, struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + if (hdr == NULL) + return -1; + + NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); + NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + if (!list_empty(&family->ops_list)) { + struct nlattr *nla_ops; + struct genl_ops *ops; + int idx = 1; + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); + } + + if (!list_empty(&family->mcast_groups)) { + struct genl_multicast_group *grp; + struct nlattr *nla_grps; + int idx = 1; + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + list_for_each_entry(grp, &family->mcast_groups, list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + } + nla_nest_end(skb, nla_grps); + } + + return genlmsg_end(skb, hdr); + +nla_put_failure: + return genlmsg_cancel(skb, hdr); +} + +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, + u32 seq, u32 flags, struct sk_buff *skb, + u8 cmd) +{ + void *hdr; + struct nlattr *nla_grps; + struct nlattr *nest; + + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + if (hdr == NULL) + return -1; + + NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, grp->family->name); + NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, grp->family->id); + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + nest = nla_nest_start(skb, 1); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + nla_nest_end(skb, nla_grps); + + return genlmsg_end(skb, hdr); + +nla_put_failure: + return genlmsg_cancel(skb, hdr); +} + +static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) +{ + + int i, n = 0; + struct genl_family *rt; + int chains_to_skip = cb->args[0]; + int fams_to_skip = cb->args[1]; + + if (chains_to_skip != 0) + genl_lock(); + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + if (i < chains_to_skip) + continue; + n = 0; + list_for_each_entry(rt, genl_family_chain(i), family_list) { + if (++n < fams_to_skip) + continue; + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY) < 0) + goto errout; + } + + fams_to_skip = 0; + } + +errout: + if (chains_to_skip != 0) + genl_unlock(); + + cb->args[0] = i; + cb->args[1] = n; + + return skb->len; +} + +static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, + u32 pid, int seq, u8 cmd) +{ + struct sk_buff *skb; + int err; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return ERR_PTR(-ENOBUFS); + + err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + if (err < 0) { + nlmsg_free(skb); + return ERR_PTR(err); + } + + return skb; +} + +static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, + u32 pid, int seq, u8 cmd) +{ + struct sk_buff *skb; + int err; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return ERR_PTR(-ENOBUFS); + + err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + if (err < 0) { + nlmsg_free(skb); + return ERR_PTR(err); + } + + return skb; +} + +static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { + [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, + [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, + .len = GENL_NAMSIZ - 1 }, +}; + +static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct genl_family *res = NULL; + int err = -EINVAL; + + if (info->attrs[CTRL_ATTR_FAMILY_ID]) { + u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]); + res = genl_family_find_byid(id); + } + + if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { + char *name; + + name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); + res = genl_family_find_byname(name); + } + + if (res == NULL) { + err = -ENOENT; + goto errout; + } + + msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + CTRL_CMD_NEWFAMILY); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto errout; + } + + err = genlmsg_reply(msg, info); +errout: + return err; +} + +static int genl_ctrl_event(int event, void *data) +{ + struct sk_buff *msg; + + if (genl_sock == NULL) + return 0; + + switch (event) { + case CTRL_CMD_NEWFAMILY: + case CTRL_CMD_DELFAMILY: + msg = ctrl_build_family_msg(data, 0, 0, event); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + break; + case CTRL_CMD_NEWMCAST_GRP: + case CTRL_CMD_DELMCAST_GRP: + msg = ctrl_build_mcgrp_msg(data, 0, 0, event); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + break; + } + + return 0; +} + +static struct genl_ops genl_ctrl_ops = { + .cmd = CTRL_CMD_GETFAMILY, + .doit = ctrl_getfamily, + .dumpit = ctrl_dumpfamily, + .policy = ctrl_policy, +}; + +static struct genl_multicast_group notify_grp = { + .name = "notify", +}; + +int __init genl_init(void) +{ + int i, err; + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) + INIT_LIST_HEAD(&family_ht[i]); + + err = genl_register_family(&genl_ctrl); + if (err < 0) + goto errout; + + err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); + if (err < 0) + goto errout_register; + + netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); + genl_sock = netlink_kernel_create(NETLINK_GENERIC, genl_rcv); + if (genl_sock == NULL) + panic("GENL: Cannot initialize generic netlink\n"); + + err = genl_register_mc_group(&genl_ctrl, ¬ify_grp); + if (err < 0) + goto errout_register; + + return 0; + +errout_register: + genl_unregister_family(&genl_ctrl); +errout: + panic("GENL: Cannot register controller: %d\n", err); +} + +void __exit genl_exit(void) +{ + int err; + + err = genl_unregister_ops(&genl_ctrl, &genl_ctrl_ops); + if (err) { + printk("GENL: cannot unregister ops (%d)\n", err); + return; + } + + err = genl_unregister_family(&genl_ctrl); + if (err) { + printk("GENL: cannot unregister family (%d)\n", err); + return; + } + +} + +EXPORT_SYMBOL(genl_sock); +EXPORT_SYMBOL(genl_register_ops); +EXPORT_SYMBOL(genl_unregister_ops); +EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(genl_unregister_family); + +MODULE_LICENSE("GPL"); diff --git a/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h new file mode 100644 index 00000000..1a1bb450 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h @@ -0,0 +1,56 @@ +#ifndef __ASM_ARM_ATOMIC_H_WRAPPER +#define __ASM_ARM_ATOMIC_H_WRAPPER 1 + +#include_next + +#error "Cribbed from linux-2.6/include/asm-arm/atomic.h but untested" + +#ifdef __KERNEL__ + +#if __LINUX_ARM_ARCH__ >= 6 + +static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) +{ + unsigned long oldval, res; + + do { + __asm__ __volatile__("@ atomic_cmpxchg\n" + "ldrex %1, [%2]\n" + "mov %0, #0\n" + "teq %1, %3\n" + "strexeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); + } while (res); + + return oldval; +} + +#else /* ARM_ARCH_6 */ + +#include + +#ifdef CONFIG_SMP +#error SMP not supported on pre-ARMv6 CPUs +#endif + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + unsigned long flags; + + raw_local_irq_save(flags); + ret = v->counter; + if (likely(ret == old)) + v->counter = new; + raw_local_irq_restore(flags); + + return ret; +} + +#endif /* __LINUX_ARM_ARCH__ */ + +#endif /* __KERNEL__ */ + +#endif /* asm/atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h new file mode 100644 index 00000000..7badb562 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h @@ -0,0 +1,10 @@ +#ifndef __ASM_I386_ATOMIC_WRAPPER_H +#define __ASM_I386_ATOMIC_WRAPPER_H 1 + +#include_next + +#include + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#endif /* atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h new file mode 100644 index 00000000..735c6168 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h @@ -0,0 +1,11 @@ +#ifndef __ASM_MIPS_ATOMIC_H_WRAPPER +#define __ASM_MIPS_ATOMIC_H_WRAPPER 1 + +#include_next +#include + +#error "Cribbed from linux-2.6/include/asm-mips but not tested." + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) + +#endif /* asm/atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h new file mode 100644 index 00000000..ae1d6460 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h @@ -0,0 +1,29 @@ +#ifndef __ASM_MIPS_BARRIER_H_WRAPPER +#define __ASM_MIPS_BARRIER_H_WRAPPER 1 + +#include + +#error "Cribbed from linux-2.6/include/asm-mips/barrier.h but untested." + +/* Not sure whether these really need to be defined, but the conservative + * choice seems to be to define them. */ +#define CONFIG_WEAK_ORDERING 1 +#define CONFIG_WEAK_REORDERING_BEYOND_LLSC 1 + +#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) +#define __WEAK_ORDERING_MB " sync \n" +#else +#define __WEAK_ORDERING_MB " \n" +#endif +#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP) +#define __WEAK_LLSC_MB " sync \n" +#else +#define __WEAK_LLSC_MB " \n" +#endif + +#define smp_mb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") +#define smp_rmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") +#define smp_wmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") + + +#endif /* asm/barrier.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h new file mode 100644 index 00000000..c1b08154 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h @@ -0,0 +1,268 @@ +#ifndef __ASM_MIPS_SYSTEM_H_WRAPPER +#define __ASM_MIPS_SYSTEM_H_WRAPPER 1 + +#include_next + +#error "Cribbed from linux-2.6/include/asm-mips/system.h but untested." + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, + unsigned long new) +{ + __u32 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqz $1, 3f \n" + "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + raw_local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + raw_local_irq_restore(flags); /* implies memory barrier */ + } + + smp_llsc_mb(); + + return retval; +} + +static inline unsigned long __cmpxchg_u32_local(volatile int * m, + unsigned long old, unsigned long new) +{ + __u32 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqz $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + } + + return retval; +} + +#ifdef CONFIG_64BIT +static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old, + unsigned long new) +{ + __u64 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqz $1, 3f \n" + "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + raw_local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + raw_local_irq_restore(flags); /* implies memory barrier */ + } + + smp_llsc_mb(); + + return retval; +} + +static inline unsigned long __cmpxchg_u64_local(volatile int * m, + unsigned long old, unsigned long new) +{ + __u64 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqz $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + } + + return retval; +} + +#else +extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels +extern unsigned long __cmpxchg_u64_local_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64_local __cmpxchg_u64_local_unsupported_on_32bit_kernels +#endif + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + case 8: + return __cmpxchg_u64(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +static inline unsigned long __cmpxchg_local(volatile void * ptr, + unsigned long old, unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); + case 8: + return __cmpxchg_u64_local(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,old,new) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr)))) + +#define cmpxchg_local(ptr,old,new) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr)))) + +#endif /* asm/system.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/asm/system.h b/datapath/linux-2.4/compat-2.4/include/asm/system.h new file mode 100644 index 00000000..6a1656b9 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/asm/system.h @@ -0,0 +1,18 @@ +#ifndef __ASM_SYSTEM_WRAPPER_H +#define __ASM_SYSTEM_WRAPPER_H 1 + +#include_next + +#ifdef CONFIG_ALPHA +#define read_barrier_depends __asm__ __volatile__("mb": : :"memory") +#else +#define read_barrier_depends() do { } while(0) +#endif + +#ifdef CONFIG_SMP +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define smp_read_barrier_depends() do { } while(0) +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/compiler.h b/datapath/linux-2.4/compat-2.4/include/linux/compiler.h new file mode 100644 index 00000000..3ac3ca1d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/compiler.h @@ -0,0 +1,8 @@ +#ifndef __LINUX_COMPILER_WRAPPER_H +#define __LINUX_COMPILER_WRAPPER_H + +#include_next + +# define __force + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/delay.h b/datapath/linux-2.4/compat-2.4/include/linux/delay.h new file mode 100644 index 00000000..d6d277c3 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/delay.h @@ -0,0 +1,59 @@ +#ifndef __LINUX_DELAY_WRAPPER_H +#define __LINUX_DELAY_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,29) +#include +#include +/* + * We define MAX_MSEC_OFFSET as the maximal value that can be accepted by + * msecs_to_jiffies() without risking a multiply overflow. This function + * returns MAX_JIFFY_OFFSET for arguments above those values. + */ + +#if HZ <= 1000 && !(1000 % HZ) +# define MAX_MSEC_OFFSET \ + (ULONG_MAX - (1000 / HZ) + 1) +#elif HZ > 1000 && !(HZ % 1000) +# define MAX_MSEC_OFFSET \ + (ULONG_MAX / (HZ / 1000)) +#else +# define MAX_MSEC_OFFSET \ + ((ULONG_MAX - 999) / HZ) +#endif + +/* + * Convert jiffies to milliseconds and back. + * + * Avoid unnecessary multiplications/divisions in the + * two most common HZ cases: + */ +static inline unsigned int jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= 1000 && !(1000 % HZ) + return (1000 / HZ) * j; +#elif HZ > 1000 && !(HZ % 1000) + return (j + (HZ / 1000) - 1)/(HZ / 1000); +#else + return (j * 1000) / HZ; +#endif +} + +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + if (MAX_MSEC_OFFSET < UINT_MAX && m > (unsigned int)MAX_MSEC_OFFSET) + return MAX_JIFFY_OFFSET; +#if HZ <= 1000 && !(1000 % HZ) + return ((unsigned long)m + (1000 / HZ) - 1) / (1000 / HZ); +#elif HZ > 1000 && !(HZ % 1000) + return (unsigned long)m * (HZ / 1000); +#else + return ((unsigned long)m * HZ + 999) / 1000; +#endif +} + +#endif /* linux kernel < 2.6.29 */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h b/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h new file mode 100644 index 00000000..05f9d661 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_ETHERDEVICE_WRAPPER_H +#define __LINUX_ETHERDEVICE_WRAPPER_H 1 + +#include_next +#include + +/** + * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a multicast address. + * By definition the broadcast address is also a multicast address. + */ +static inline int is_multicast_ether_addr(const u8 *addr) +{ + return (0x01 & addr[0]); +} + +/** + * is_local_ether_addr - Determine if the Ethernet address is locally-assigned + * one (IEEE 802). + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a local address. + */ +static inline int is_local_ether_addr(const u8 *addr) +{ + return (0x02 & addr[0]); +} + +/** + * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is the broadcast address. + */ +static inline int is_broadcast_ether_addr(const u8 *addr) +{ + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; +} + +/** + * random_ether_addr - Generate software assigned random Ethernet address + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Generate a random Ethernet address (MAC) that is not multicast + * and has the local assigned bit set. + */ +static inline void random_ether_addr(u8 *addr) +{ + get_random_bytes (addr, ETH_ALEN); + addr [0] &= 0xfe; /* clear multicast bit */ + addr [0] |= 0x02; /* set local assignment bit (IEEE802) */ +} + +/** + * compare_ether_addr - Compare two Ethernet addresses + * @addr1: Pointer to a six-byte array containing the Ethernet address + * @addr2: Pointer other six-byte array containing the Ethernet address + * + * Compare two ethernet addresses, returns 0 if equal + */ +static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h b/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h new file mode 100644 index 00000000..7da02c93 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h @@ -0,0 +1,82 @@ +#ifndef __LINUX_GENERIC_NETLINK_H +#define __LINUX_GENERIC_NETLINK_H + +#include + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_GENERATE 0 +#define GENL_ID_CTRL NLMSG_MIN_TYPE + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/gfp.h b/datapath/linux-2.4/compat-2.4/include/linux/gfp.h new file mode 100644 index 00000000..27881d3b --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/gfp.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_GFP_WRAPPER_H +#define __LINUX_GFP_WRAPPER_H 1 + +#include + +#endif /* linux/gfp.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h b/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h new file mode 100644 index 00000000..2b2d3db3 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_IF_ETHER_WRAPPER_H +#define __LINUX_IF_ETHER_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb_mac_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h b/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h new file mode 100644 index 00000000..21629460 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_IF_VLAN_WRAPPER_H +#define __LINUX_IF_VLAN_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include +#include +#include + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb_mac_header(skb); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,26) +static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + struct vlan_ethhdr *veth; + + if (skb_headroom(skb) < VLAN_HLEN) { + struct sk_buff *sk_tmp = skb; + skb = skb_realloc_headroom(sk_tmp, VLAN_HLEN); + kfree_skb(sk_tmp); + if (!skb) { + printk(KERN_ERR "vlan: failed to realloc headroom\n"); + return NULL; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + printk(KERN_ERR "vlan: failed to unshare skbuff\n"); + return NULL; + } + } + + veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); + + /* Move the mac addresses to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, 2 * VLAN_ETH_ALEN); + + /* first, the ethernet type */ + veth->h_vlan_proto = __constant_htons(ETH_P_8021Q); + + /* now, the tag */ + veth->h_vlan_TCI = htons(tag); + + skb_reset_mac_header(skb); + + return skb; +} + +#else + +#define vlan_put_tag(x,y) fix_vlan_put_tag((x),(y)); + +/* For some reason, older versions of vlan_put_tag do not adjust the + * pointer to the beginning of the MAC header. We get around that by + * this hack. Ugh. */ +static inline struct sk_buff *fix_vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + skb = (vlan_put_tag)(skb, tag); + skb_reset_mac_header(skb); + + return skb; +} +#endif + +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/ip.h b/datapath/linux-2.4/compat-2.4/include/linux/ip.h new file mode 100644 index 00000000..b2fbdb93 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/ip.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_IP_WRAPPER_H +#define __LINUX_IP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct iphdr *ip_hdr(const struct sk_buff *skb) +{ + return (struct iphdr *)skb_network_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h b/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h new file mode 100644 index 00000000..42b5ac0a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_IPV6_WRAPPER_H +#define __LINUX_IPV6_WRAPPER_H 1 + +#include_next + +static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) +{ + return (struct ipv6hdr *)skb_network_header(skb); +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h b/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h new file mode 100644 index 00000000..718fe91d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h @@ -0,0 +1,10 @@ +#ifndef __LINUX_JIFFIES_WRAPPER_H +#define __LINUX_JIFFIES_WRAPPER_H 1 + +#include +#include +#include + +extern unsigned long volatile jiffies; + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/kernel.h b/datapath/linux-2.4/compat-2.4/include/linux/kernel.h new file mode 100644 index 00000000..329dab53 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/kernel.h @@ -0,0 +1,43 @@ +#ifndef __LINUX_KERNEL_WRAPPER_H +#define __LINUX_KERNEL_WRAPPER_H 1 + +#include_next + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) + +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + +int vprintk(const char *msg, ...) + __attribute__((format(printf, 1, 0))); + +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/list.h b/datapath/linux-2.4/compat-2.4/include/linux/list.h new file mode 100644 index 00000000..af98d8c6 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/list.h @@ -0,0 +1,510 @@ +#ifndef __LINUX_LIST_WRAPPER_H +#define __LINUX_LIST_WRAPPER_H + +#ifdef __KERNEL__ + +#include_next +#include + +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head * new, + struct list_head * prev, struct list_head * next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/* + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is + * too wasteful. + * You lose the ability to access the tail in O(1). + */ + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +static inline int hlist_unhashed(const struct hlist_node *h) +{ + return !h->pprev; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +static inline void hlist_del_init(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + INIT_HLIST_NODE(n); + } +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/* next must be != NULL */ +static inline void hlist_add_before(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after(struct hlist_node *n, + struct hlist_node *next) +{ + next->next = n->next; + n->next = next; + next->pprev = &n->next; + + if(next->next) + next->next->pprev = &next->next; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ + pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ + pos = n) + +/** + * hlist_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) +/** + * list_for_each_entry_safe - iterate over list of given type safe against remov +al of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* linux kernel < 2.4.23 */ + + +#else +#warning "don't include kernel headers in userspace" +#endif /* __KERNEL__ */ +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/module.h b/datapath/linux-2.4/compat-2.4/include/linux/module.h new file mode 100644 index 00000000..797a330a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/module.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_MODULE_WRAPPER_H +#define __LINUX_MODULE_WRAPPER_H 1 + +#include_next + +static inline int try_module_get(struct module *module) +{ + BUG_ON(module != THIS_MODULE); + MOD_INC_USE_COUNT; + return 1; +} + +static inline void module_put(struct module *module) +{ + BUG_ON(module != THIS_MODULE); + MOD_DEC_USE_COUNT; +} + +#endif /* module.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/mutex.h b/datapath/linux-2.4/compat-2.4/include/linux/mutex.h new file mode 100644 index 00000000..98cf07a5 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/mutex.h @@ -0,0 +1,58 @@ +#ifndef __LINUX_MUTEX_H +#define __LINUX_MUTEX_H + +#include + +struct mutex { + struct semaphore sema; +}; + +#define mutex_init(mutex) init_MUTEX(&mutex->sema) +#define mutex_destroy(mutex) do { } while (0) + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) } + +/** + * mutex_is_locked - is the mutex locked + * @lock: the mutex to be queried + * + * Returns 1 if the mutex is locked, 0 if unlocked. + */ +static inline int mutex_is_locked(struct mutex *lock) +{ + return sem_getcount(&lock->sema) == 0; +} + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +static inline void mutex_lock(struct mutex *lock) +{ + down(&lock->sema); +} + +static inline int mutex_lock_interruptible(struct mutex *lock) +{ + return down_interruptible(&lock->sema); +} + +#define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) + +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +static inline int mutex_trylock(struct mutex *lock) +{ + return !down_trylock(&lock->sema); +} + +static inline void mutex_unlock(struct mutex *lock) +{ + up(&lock->sema); +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h new file mode 100644 index 00000000..6eba6513 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_NETDEVICE_WRAPPER_H +#define __LINUX_NETDEVICE_WRAPPER_H 1 + +#include_next + +/*---------------------------------------------------------------------------- + * In 2.6.24, a namespace argument became required for dev_get_by_name. + */ +#define net_init NULL + +#ifdef dev_get_by_name +#undef dev_get_by_name +#define dev_get_by_name(net, name) \ + compat_dev_get_by_name((name)) +static inline struct net_device *compat_dev_get_by_name(const char *name) +{ + return (_set_ver(dev_get_by_name))(name); +} +#else +#define dev_get_by_name(net, name) \ + dev_get_by_name((name)) +#endif /* dev_get_by_name */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) +static inline void *netdev_priv(struct net_device *dev) +{ + return dev->priv; +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/netlink.h b/datapath/linux-2.4/compat-2.4/include/linux/netlink.h new file mode 100644 index 00000000..69089e44 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/netlink.h @@ -0,0 +1,78 @@ +#ifndef __LINUX_NETLINK_WRAPPER_H +#define __LINUX_NETLINK_WRAPPER_H 1 + +#include_next + +#define NETLINK_GENERIC 16 + +#undef NLMSG_LENGTH +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr +{ + __u16 nla_len; + __u16 nla_type; +}; + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +#ifdef __KERNEL__ + +#include +#include + +static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) +{ + return (struct nlmsghdr *)skb->data; +} + +#define __nlmsg_put __rpl_nlmsg_put +static __inline__ struct nlmsghdr * +__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +{ + struct nlmsghdr *nlh; + int size = NLMSG_LENGTH(len); + + nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = size; + nlh->nlmsg_flags = flags; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + return nlh; +} + +#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) + +#undef NLMSG_NEW +#define NLMSG_NEW(skb, pid, seq, type, len, flags) \ +({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \ + goto nlmsg_failure; \ + __nlmsg_put(skb, pid, seq, type, len, flags); }) +#endif + +#undef NLMSG_PUT +#define NLMSG_PUT(skb, pid, seq, type, len) \ + NLMSG_NEW(skb, pid, seq, type, len, 0) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/random.h b/datapath/linux-2.4/compat-2.4/include/linux/random.h new file mode 100644 index 00000000..381f955c --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/random.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_RANDOM_WRAPPER_H +#define __LINUX_RANDOM_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +u32 random32(void); +void srandom32(u32 seed); +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h b/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h new file mode 100644 index 00000000..ae197ece --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h @@ -0,0 +1,205 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUPDATE_H +#define __LINUX_RCUPDATE_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#ifdef CONFIG_SMP +#error "SMP configurations not supported for RCU backport." +#endif + +/** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list + * @func: actual update function to call after the grace period. + */ +struct rcu_head { + struct rcu_head *next; + void (*func)(struct rcu_head *head); +}; + +#define RCU_HEAD_INIT { } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT +#define INIT_RCU_HEAD(ptr) do { } while (0) + + + +/** + * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * + * When synchronize_rcu() is invoked on one CPU while other CPUs + * are within RCU read-side critical sections, then the + * synchronize_rcu() is guaranteed to block until after all the other + * CPUs exit their critical sections. Similarly, if call_rcu() is invoked + * on one CPU while other CPUs are within RCU read-side critical + * sections, invocation of the corresponding RCU callback is deferred + * until after the all the other CPUs exit their critical sections. + * + * Note, however, that RCU callbacks are permitted to run concurrently + * with RCU read-side critical sections. One way that this can happen + * is via the following sequence of events: (1) CPU 0 enters an RCU + * read-side critical section, (2) CPU 1 invokes call_rcu() to register + * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, + * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU + * callback is invoked. This is legal, because the RCU read-side critical + * section that was running concurrently with the call_rcu() (and which + * therefore might be referencing something that the corresponding RCU + * callback would free up) has completed before the corresponding + * RCU callback is invoked. + * + * RCU read-side critical sections may be nested. Any deferred actions + * will be deferred until the outermost RCU read-side critical section + * completes. + * + * It is illegal to block while in an RCU read-side critical section. + */ +#define rcu_read_lock() \ + do { } while(0) + +/** + * rcu_read_unlock - marks the end of an RCU read-side critical section. + * + * See rcu_read_lock() for more information. + */ +#define rcu_read_unlock() \ + do { } while(0) + +/* + * So where is rcu_write_lock()? It does not exist, as there is no + * way for writers to lock out RCU readers. This is a feature, not + * a bug -- this property is what provides RCU's performance benefits. + * Of course, writers must coordinate with each other. The normal + * spinlock primitives work well for this, but any other technique may be + * used as well. RCU does not care how the writers keep out of each + * others' way, as long as they do so. + */ + +/** + * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks + * consider completion of a softirq handler to be a quiescent state, + * a process in RCU read-side critical section must be protected by + * disabling softirqs. Read-side critical sections in interrupt context + * can use just rcu_read_lock(). + * + */ +#define rcu_read_lock_bh() \ + do { \ + local_bh_disable(); \ + } while(0) + +/* + * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section + * + * See rcu_read_lock_bh() for more information. + */ +#define rcu_read_unlock_bh() \ + do { \ + local_bh_enable(); \ + } while(0) + +/** + * rcu_dereference - fetch an RCU-protected pointer in an + * RCU read-side critical section. This pointer may later + * be safely dereferenced. + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), and, more importantly, documents + * exactly which pointers are protected by RCU. + */ + +#define rcu_dereference(p) ({ \ + typeof(p) _________p1 = p; \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) + +/** + * rcu_assign_pointer - assign (publicize) a pointer to a newly + * initialized structure that will be dereferenced by RCU read-side + * critical sections. Returns the value assigned. + * + * Inserts memory barriers on architectures that require them + * (pretty much all of them other than x86), and also prevents + * the compiler from reordering the code that initializes the + * structure after the pointer assignment. More importantly, this + * call documents which pointers will be dereferenced by RCU read-side + * code. + */ + +#define rcu_assign_pointer(p, v) ({ \ + smp_wmb(); \ + (p) = (v); \ + }) + +/** + * synchronize_sched - block until all CPUs have exited any non-preemptive + * kernel code sequences. + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +#define synchronize_sched() synchronize_rcu() + +/* Exported interfaces */ +void synchronize_rcu(void); +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); + +static inline void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + synchronize_rcu(); + func(head); +} +void synchronize_idle(void); +extern void rcu_barrier(void); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUPDATE_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h b/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h new file mode 100644 index 00000000..2758520a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h @@ -0,0 +1,130 @@ +#ifndef __LINUX_SKBUFF_WRAPPER_H +#define __LINUX_SKBUFF_WRAPPER_H 1 + +#include_next + + +#define mac_header mac.raw +#define network_header nh.raw + +/* Emulate Linux 2.6 behavior, in which kfree_skb silently ignores null pointer + * arguments. */ +#define kfree_skb(skb) kfree_skb_maybe_null(skb) +static inline void kfree_skb_maybe_null(struct sk_buff *skb) +{ + if (likely(skb != NULL)) + (kfree_skb)(skb); +} + +/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at + * least test against it: see update_csum() in forward.c. */ +#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_COMPLETE CHECKSUM_HW + +static inline unsigned char *skb_transport_header(const struct sk_buff *skb) +{ + return skb->h.raw; +} + +static inline void skb_reset_transport_header(struct sk_buff *skb) +{ + skb->h.raw = skb->data; +} + +static inline void skb_set_transport_header(struct sk_buff *skb, + const int offset) +{ + skb->h.raw = skb->data + offset; +} + +static inline unsigned char *skb_network_header(const struct sk_buff *skb) +{ + return skb->nh.raw; +} + +static inline void skb_reset_network_header(struct sk_buff *skb) +{ + skb->nh.raw = skb->data; +} + +static inline void skb_set_network_header(struct sk_buff *skb, const int offset) +{ + skb->nh.raw = skb->data + offset; +} + +static inline unsigned char *skb_mac_header(const struct sk_buff *skb) +{ + return skb->mac.raw; +} + +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac.raw != NULL; +} + +static inline void skb_reset_mac_header(struct sk_buff *skb) +{ + skb->mac.raw = skb->data; +} + +static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) +{ + skb->mac.raw = skb->data + offset; +} +static inline int skb_transport_offset(const struct sk_buff *skb) +{ + return skb_transport_header(skb) - skb->data; +} + +static inline u32 skb_network_header_len(const struct sk_buff *skb) +{ + return skb->h.raw - skb->nh.raw; +} + +static inline int skb_network_offset(const struct sk_buff *skb) +{ + return skb_network_header(skb) - skb->data; +} + +static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) +{ + return skb->tail; +} + +static inline void skb_reset_tail_pointer(struct sk_buff *skb) +{ + skb->tail = skb->data; +} + +static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) +{ + skb->tail = skb->data + offset; +} + +/* + * CPUs often take a performance hit when accessing unaligned memory + * locations. The actual performance hit varies, it can be small if the + * hardware handles it or large if we have to take an exception and fix it + * in software. + * + * Since an ethernet header is 14 bytes network drivers often end up with + * the IP header at an unaligned offset. The IP header can be aligned by + * shifting the start of the packet by 2 bytes. Drivers should do this + * with: + * + * skb_reserve(NET_IP_ALIGN); + * + * The downside to this alignment of the IP header is that the DMA is now + * unaligned. On some architectures the cost of an unaligned DMA is high + * and this cost outweighs the gains made by aligning the IP header. + * + * Since this trade off varies between architectures, we allow NET_IP_ALIGN + * to be overridden. + */ +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + + + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/slab.h b/datapath/linux-2.4/compat-2.4/include/linux/slab.h new file mode 100644 index 00000000..e9342596 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/slab.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_SLAB_WRAPPER_H +#define __LINUX_SLAB_WRAPPER_H 1 + +/* Kluge to let "struct kmem_cache" work in both 2.4 and 2.6. */ +#define kmem_cache_s kmem_cache + +#include_next + +static inline void *kzalloc(size_t size, gfp_t flags) +{ + void *p = kmalloc(size, flags); + if (p) + memset(p, 0, size); + return p; +} + +/* Mega-kluge to wrap 2.4 kmem_cache_create for compatibility with 2.6. */ +#ifdef kmem_cache_create +#undef kmem_cache_create +#define kmem_cache_create(name, size, align, flags, ctor) \ + compat_kmem_cache_create(name, size, align, flags, ctor) +static inline struct kmem_cache * +compat_kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)) +{ + return (_set_ver(kmem_cache_create))(name, size, align, flags, ctor, + NULL); +} +#else +#define kmem_cache_create(name, size, align, flags, ctor) \ + kmem_cache_create(name, size, align, flags, ctor, NULL) +#endif /* kmem_cache_create */ + +static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) +{ + void *p = kmem_cache_alloc(k, flags); + if (p) + memset(p, 0, kmem_cache_size(k)); + return p; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/sockios.h b/datapath/linux-2.4/compat-2.4/include/linux/sockios.h new file mode 100644 index 00000000..262fb389 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/sockios.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_SOCKIOS_WRAPPER_H +#define __LINUX_SOCKIOS_WRAPPER_H 1 + +#include_next + +/* bridge calls */ +#define SIOCBRADDBR 0x89a0 /* create new bridge device */ +#define SIOCBRDELBR 0x89a1 /* remove bridge device */ +#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ +#define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h b/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h new file mode 100644 index 00000000..c18eb637 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h @@ -0,0 +1,8 @@ +#ifndef __LINUX_SPINLOCK_WRAPPER_H +#define __LINUX_SPINLOCK_WRAPPER_H 1 + +#include_next + +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED + +#endif /* linux/spinlock.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/string.h b/datapath/linux-2.4/compat-2.4/include/linux/string.h new file mode 100644 index 00000000..d491226a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/string.h @@ -0,0 +1,10 @@ +#ifndef __LINUX_STRING_WRAPPER_H +#define __LINUX_STRING_WRAPPER_H 1 + +#include_next + +#ifndef __HAVE_ARCH_STRCSPN +size_t strcspn(const char *s, const char *reject); +#endif + +#endif /* linux/string.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/tcp.h b/datapath/linux-2.4/compat-2.4/include/linux/tcp.h new file mode 100644 index 00000000..7178e6b4 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/tcp.h @@ -0,0 +1,25 @@ +#ifndef __LINUX_TCP_WRAPPER_H +#define __LINUX_TCP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb_transport_header(skb); +} + +static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) +{ + return tcp_hdr(skb)->doff * 4; +} + +static inline unsigned int tcp_optlen(const struct sk_buff *skb) +{ + return (tcp_hdr(skb)->doff - 5) * 4; +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/timer.h b/datapath/linux-2.4/compat-2.4/include/linux/timer.h new file mode 100644 index 00000000..5a03721f --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/timer.h @@ -0,0 +1,96 @@ +#ifndef __LINUX_TIMER_WRAPPER_H +#define __LINUX_TIMER_WRAPPER_H 1 + +#include_next +#include +#include + +extern unsigned long volatile jiffies; + +static inline void setup_timer(struct timer_list * timer, + void (*function)(unsigned long), + unsigned long data) +{ + timer->function = function; + timer->data = data; + init_timer(timer); +} + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long __round_jiffies(unsigned long j, int cpu) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + */ + if (rem < HZ/4) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long round_jiffies(unsigned long j) +{ + return __round_jiffies(j, 0); // FIXME +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/types.h b/datapath/linux-2.4/compat-2.4/include/linux/types.h new file mode 100644 index 00000000..7c048f44 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/types.h @@ -0,0 +1,49 @@ +#ifndef __LINUX_TYPES_WRAPPER_H +#define __LINUX_TYPES_WRAPPER_H 1 + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#error These replacement header files are for use with Linux 2.4.x only. +#endif + +#include_next + +/* + * Below are truly Linux-specific types that should never collide with + * any application/library that wants linux/types.h. + */ + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; +#endif +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#ifdef __KERNEL__ +typedef unsigned __bitwise__ gfp_t; + +#ifdef CONFIG_RESOURCES_64BIT +typedef u64 resource_size_t; +#else +typedef u32 resource_size_t; +#endif + +#endif /* __KERNEL__ */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/udp.h b/datapath/linux-2.4/compat-2.4/include/linux/udp.h new file mode 100644 index 00000000..7fdf5b9d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/udp.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_UDP_WRAPPER_H +#define __LINUX_UDP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/checksum.h b/datapath/linux-2.4/compat-2.4/include/net/checksum.h new file mode 100644 index 00000000..9868c32c --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/checksum.h @@ -0,0 +1,11 @@ +#ifndef __NET_CHECKSUM_WRAPPER_H +#define __NET_CHECKSUM_WRAPPER_H 1 + +#include_next + +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/genetlink.h b/datapath/linux-2.4/compat-2.4/include/net/genetlink.h new file mode 100644 index 00000000..decdda54 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/genetlink.h @@ -0,0 +1,252 @@ +#ifndef __NET_GENERIC_NETLINK_H +#define __NET_GENERIC_NETLINK_H + +#include +#include + +/** + * struct genl_multicast_group - generic netlink multicast group + * @name: name of the multicast group, names are per-family + * @id: multicast group ID, assigned by the core, to use with + * genlmsg_multicast(). + * @list: list entry for linking + * @family: pointer to family, need not be set before registering + */ +struct genl_multicast_group +{ + struct genl_family *family; /* private */ + struct list_head list; /* private */ + char name[GENL_NAMSIZ]; + u32 id; +}; + +/** + * struct genl_family - generic netlink family + * @id: protocol family idenfitier + * @hdrsize: length of user specific header in bytes + * @name: name of family + * @version: protocol version + * @maxattr: maximum number of attributes supported + * @attrbuf: buffer to store parsed attributes + * @ops_list: list of all assigned operations + * @family_list: family list + * @mcast_groups: multicast groups list + */ +struct genl_family +{ + unsigned int id; + unsigned int hdrsize; + char name[GENL_NAMSIZ]; + unsigned int version; + unsigned int maxattr; + struct nlattr ** attrbuf; /* private */ + struct list_head ops_list; /* private */ + struct list_head family_list; /* private */ + struct list_head mcast_groups; /* private */ +}; + +/** + * struct genl_info - receiving information + * @snd_seq: sending sequence number + * @snd_pid: netlink pid of sender + * @nlhdr: netlink message header + * @genlhdr: generic netlink message header + * @userhdr: user specific header + * @attrs: netlink attributes + */ +struct genl_info +{ + u32 snd_seq; + u32 snd_pid; + struct nlmsghdr * nlhdr; + struct genlmsghdr * genlhdr; + void * userhdr; + struct nlattr ** attrs; +}; + +/** + * struct genl_ops - generic netlink operations + * @cmd: command identifier + * @flags: flags + * @policy: attribute validation policy + * @doit: standard command callback + * @dumpit: callback for dumpers + * @done: completion callback for dumps + * @ops_list: operations list + */ +struct genl_ops +{ + u8 cmd; + unsigned int flags; + const struct nla_policy *policy; + int (*doit)(struct sk_buff *skb, + struct genl_info *info); + int (*dumpit)(struct sk_buff *skb, + struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + struct list_head ops_list; +}; + +extern int genl_register_family(struct genl_family *family); +extern int genl_unregister_family(struct genl_family *family); +extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); +extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); +extern int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); +extern void genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); + +extern struct sock *genl_sock; + +/** + * genlmsg_put - Add generic netlink header to netlink message + * @skb: socket buffer holding the message + * @pid: netlink pid the message is addressed to + * @seq: sequence number (usually the one of the sender) + * @family: generic netlink family + * @flags netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + struct genl_family *family, int flags, u8 cmd) +{ + struct nlmsghdr *nlh; + struct genlmsghdr *hdr; + + nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + family->hdrsize, flags); + if (nlh == NULL) + return NULL; + + hdr = nlmsg_data(nlh); + hdr->cmd = cmd; + hdr->version = family->version; + hdr->reserved = 0; + + return (char *) hdr + GENL_HDRLEN; +} + +/** + * genlmsg_put_reply - Add generic netlink header to a reply message + * @skb: socket buffer holding the message + * @info: receiver info + * @family: generic netlink family + * @flags: netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put_reply(struct sk_buff *skb, + struct genl_info *info, + struct genl_family *family, + int flags, u8 cmd) +{ + return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + flags, cmd); +} + +/** + * genlmsg_end - Finalize a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: user specific header + */ +static inline int genlmsg_end(struct sk_buff *skb, void *hdr) +{ + return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_cancel - Cancel construction of a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: generic netlink message header + */ +static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) +{ + return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_multicast - multicast a netlink message + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id + * @flags: allocation flags + */ +static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, + unsigned int group, gfp_t flags) +{ + return nlmsg_multicast(genl_sock, skb, pid, group, flags); +} + +/** + * genlmsg_unicast - unicast a netlink message + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(genl_sock, skb, pid); +} + +/** + * genlmsg_reply - reply to a request + * @skb: netlink message to be sent back + * @info: receiver information + */ +static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) +{ + return genlmsg_unicast(skb, info->snd_pid); +} + +/** + * gennlmsg_data - head of message payload + * @gnlh: genetlink messsage header + */ +static inline void *genlmsg_data(const struct genlmsghdr *gnlh) +{ + return ((unsigned char *) gnlh + GENL_HDRLEN); +} + +/** + * genlmsg_len - length of message payload + * @gnlh: genetlink message header + */ +static inline int genlmsg_len(const struct genlmsghdr *gnlh) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)((unsigned char *)gnlh - + NLMSG_HDRLEN); + return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + +/** + * genlmsg_new - Allocate a new generic netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + */ +static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) +{ + return nlmsg_new(genlmsg_total_size(payload), flags); +} + + +#endif /* __NET_GENERIC_NETLINK_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h b/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h new file mode 100644 index 00000000..e254dd71 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h @@ -0,0 +1,11 @@ +#ifndef __NET_LLC_PDU_H +#define __NET_LLC_PDU_H 1 + +/* Un-numbered PDU format (3 bytes in length) */ +struct llc_pdu_un { + u8 dsap; + u8 ssap; + u8 ctrl_1; +}; + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/netlink.h b/datapath/linux-2.4/compat-2.4/include/net/netlink.h new file mode 100644 index 00000000..46cdafd9 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/netlink.h @@ -0,0 +1,1014 @@ +#ifndef __NET_NETLINK_H +#define __NET_NETLINK_H + +#include +#include +#include + +/* ======================================================================== + * Netlink Messages and Attributes Interface (As Seen On TV) + * ------------------------------------------------------------------------ + * Messages Interface + * ------------------------------------------------------------------------ + * + * Message Format: + * <--- nlmsg_total_size(payload) ---> + * <-- nlmsg_msg_size(payload) -> + * +----------+- - -+-------------+- - -+-------- - - + * | nlmsghdr | Pad | Payload | Pad | nlmsghdr + * +----------+- - -+-------------+- - -+-------- - - + * nlmsg_data(nlh)---^ ^ + * nlmsg_next(nlh)-----------------------+ + * + * Payload Format: + * <---------------------- nlmsg_len(nlh) ---------------------> + * <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) -> + * +----------------------+- - -+--------------------------------+ + * | Family Header | Pad | Attributes | + * +----------------------+- - -+--------------------------------+ + * nlmsg_attrdata(nlh, hdrlen)---^ + * + * Data Structures: + * struct nlmsghdr netlink message header + * + * Message Construction: + * nlmsg_new() create a new netlink message + * nlmsg_put() add a netlink message to an skb + * nlmsg_put_answer() callback based nlmsg_put() + * nlmsg_end() finanlize netlink message + * nlmsg_get_pos() return current position in message + * nlmsg_trim() trim part of message + * nlmsg_cancel() cancel message construction + * nlmsg_free() free a netlink message + * + * Message Sending: + * nlmsg_multicast() multicast message to several groups + * nlmsg_unicast() unicast a message to a single socket + * nlmsg_notify() send notification message + * + * Message Length Calculations: + * nlmsg_msg_size(payload) length of message w/o padding + * nlmsg_total_size(payload) length of message w/ padding + * nlmsg_padlen(payload) length of padding at tail + * + * Message Payload Access: + * nlmsg_data(nlh) head of message payload + * nlmsg_len(nlh) length of message payload + * nlmsg_attrdata(nlh, hdrlen) head of attributes data + * nlmsg_attrlen(nlh, hdrlen) length of attributes data + * + * Message Parsing: + * nlmsg_ok(nlh, remaining) does nlh fit into remaining bytes? + * nlmsg_next(nlh, remaining) get next netlink message + * nlmsg_parse() parse attributes of a message + * nlmsg_find_attr() find an attribute in a message + * nlmsg_for_each_msg() loop over all messages + * nlmsg_validate() validate netlink message incl. attrs + * nlmsg_for_each_attr() loop over all attributes + * + * Misc: + * nlmsg_report() report back to application? + * + * ------------------------------------------------------------------------ + * Attributes Interface + * ------------------------------------------------------------------------ + * + * Attribute Format: + * <------- nla_total_size(payload) -------> + * <---- nla_attr_size(payload) -----> + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * | Header | Pad | Payload | Pad | Header + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * <- nla_len(nla) -> ^ + * nla_data(nla)----^ | + * nla_next(nla)-----------------------------' + * + * Data Structures: + * struct nlattr netlink attribtue header + * + * Attribute Construction: + * nla_reserve(skb, type, len) reserve room for an attribute + * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr + * nla_put(skb, type, len, data) add attribute to skb + * nla_put_nohdr(skb, len, data) add attribute w/o hdr + * + * Attribute Construction for Basic Types: + * nla_put_u8(skb, type, value) add u8 attribute to skb + * nla_put_u16(skb, type, value) add u16 attribute to skb + * nla_put_u32(skb, type, value) add u32 attribute to skb + * nla_put_u64(skb, type, value) add u64 attribute to skb + * nla_put_string(skb, type, str) add string attribute to skb + * nla_put_flag(skb, type) add flag attribute to skb + * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * + * Exceptions Based Attribute Construction: + * NLA_PUT(skb, type, len, data) add attribute to skb + * NLA_PUT_U8(skb, type, value) add u8 attribute to skb + * NLA_PUT_U16(skb, type, value) add u16 attribute to skb + * NLA_PUT_U32(skb, type, value) add u32 attribute to skb + * NLA_PUT_U64(skb, type, value) add u64 attribute to skb + * NLA_PUT_STRING(skb, type, str) add string attribute to skb + * NLA_PUT_FLAG(skb, type) add flag attribute to skb + * NLA_PUT_MSECS(skb, type, jiffies) add msecs attribute to skb + * + * The meaning of these functions is equal to their lower case + * variants but they jump to the label nla_put_failure in case + * of a failure. + * + * Nested Attributes Construction: + * nla_nest_start(skb, type) start a nested attribute + * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_cancel(skb, nla) cancel nested attribute construction + * + * Attribute Length Calculations: + * nla_attr_size(payload) length of attribute w/o padding + * nla_total_size(payload) length of attribute w/ padding + * nla_padlen(payload) length of padding + * + * Attribute Payload Access: + * nla_data(nla) head of attribute payload + * nla_len(nla) length of attribute payload + * + * Attribute Payload Access for Basic Types: + * nla_get_u8(nla) get payload for a u8 attribute + * nla_get_u16(nla) get payload for a u16 attribute + * nla_get_u32(nla) get payload for a u32 attribute + * nla_get_u64(nla) get payload for a u64 attribute + * nla_get_flag(nla) return 1 if flag is true + * nla_get_msecs(nla) get payload for a msecs attribute + * + * Attribute Misc: + * nla_memcpy(dest, nla, count) copy attribute into memory + * nla_memcmp(nla, data, size) compare attribute with memory area + * nla_strlcpy(dst, nla, size) copy attribute to a sized string + * nla_strcmp(nla, str) compare attribute with string + * + * Attribute Parsing: + * nla_ok(nla, remaining) does nla fit into remaining bytes? + * nla_next(nla, remaining) get next netlink attribute + * nla_validate() validate a stream of attributes + * nla_validate_nested() validate a stream of nested attributes + * nla_find() find attribute in stream of attributes + * nla_find_nested() find attribute in nested attributes + * nla_parse() parse and validate stream of attrs + * nla_parse_nested() parse nested attribuets + * nla_for_each_attr() loop over all attributes + * nla_for_each_nested() loop over the nested attributes + *========================================================================= + */ + + /** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, + NLA_U8, + NLA_U16, + NLA_U32, + NLA_U64, + NLA_STRING, + NLA_FLAG, + NLA_MSECS, + NLA_NESTED, + NLA_NESTED_COMPAT, + NLA_NUL_STRING, + NLA_BINARY, + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * struct nla_policy - attribute validation policy + * @type: Type of attribute or NLA_UNSPEC + * @len: Type specific length of payload + * + * Policies are defined as arrays of this struct, the array must be + * accessible by attribute type up to the highest identifier to be expected. + * + * Meaning of `len' field: + * NLA_STRING Maximum length of string + * NLA_NUL_STRING Maximum length of string (excluding NUL) + * NLA_FLAG Unused + * NLA_BINARY Maximum length of attribute payload + * All other Exact length of attribute payload + * + * Example: + * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { + * [ATTR_FOO] = { .type = NLA_U16 }, + * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, + * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * }; + */ +struct nla_policy { + u16 type; + u16 len; +}; + +/** + * struct nl_info - netlink source information + * @nlh: Netlink message header of original request + * @pid: Netlink PID of requesting application + */ +struct nl_info { + struct nlmsghdr *nlh; + u32 pid; +}; + +extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, + struct nlmsghdr *)); +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); + +extern int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy); +extern int nla_parse(struct nlattr *tb[], int maxtype, + struct nlattr *head, int len, + const struct nla_policy *policy); +extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); +extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, + size_t dstsize); +extern int nla_memcpy(void *dest, struct nlattr *src, int count); +extern int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size); +extern int nla_strcmp(const struct nlattr *nla, const char *str); +extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern void * __nla_reserve_nohdr(struct sk_buff *skb, int attrlen); +extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern void * nla_reserve_nohdr(struct sk_buff *skb, int attrlen); +extern void __nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +extern void __nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); +extern int nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +extern int nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); + +/************************************************************************** + * Netlink Messages + **************************************************************************/ + +/** + * nlmsg_msg_size - length of netlink message not including padding + * @payload: length of message payload + */ +static inline int nlmsg_msg_size(int payload) +{ + return NLMSG_HDRLEN + payload; +} + +/** + * nlmsg_total_size - length of netlink message including padding + * @payload: length of message payload + */ +static inline int nlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(nlmsg_msg_size(payload)); +} + +/** + * nlmsg_padlen - length of padding at the message's tail + * @payload: length of message payload + */ +static inline int nlmsg_padlen(int payload) +{ + return nlmsg_total_size(payload) - nlmsg_msg_size(payload); +} + +/** + * nlmsg_data - head of message payload + * @nlh: netlink messsage header + */ +static inline void *nlmsg_data(const struct nlmsghdr *nlh) +{ + return (unsigned char *) nlh + NLMSG_HDRLEN; +} + +/** + * nlmsg_len - length of message payload + * @nlh: netlink message header + */ +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * nlmsg_attrdata - head of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh, + int hdrlen) +{ + unsigned char *data = nlmsg_data(nlh); + return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen)); +} + +/** + * nlmsg_attrlen - length of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen) +{ + return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen); +} + +/** + * nlmsg_ok - check if the netlink message fits into the remaining bytes + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + */ +static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) +{ + return (remaining >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len <= remaining); +} + +/** + * nlmsg_next - next netlink message in message stream + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + * + * Returns the next netlink message in the message stream and + * decrements remaining by the size of the current message. + */ +static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) +{ + int totlen = NLMSG_ALIGN(nlh->nlmsg_len); + + *remaining -= totlen; + + return (struct nlmsghdr *) ((unsigned char *) nlh + totlen); +} + +/** + * nlmsg_parse - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy); +} + +/** + * nlmsg_find_attr - find a specific attribute in a netlink message + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, + int hdrlen, int attrtype) +{ + return nla_find(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), attrtype); +} + +/** + * nlmsg_validate - validate a netlink message including attributes + * @nlh: netlinket message header + * @hdrlen: length of familiy specific header + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + */ +static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, + const struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, policy); +} + +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + +/** + * nlmsg_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ + nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \ + nlmsg_attrlen(nlh, hdrlen), rem) + +#if 0 +/* FIXME: Enable once all users have been converted */ + +/** + * __nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for both the netlink header and payload. + */ +static inline struct nlmsghdr *__nlmsg_put(struct sk_buff *skb, u32 pid, + u32 seq, int type, int payload, + int flags) +{ + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *) skb_put(skb, nlmsg_total_size(payload)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = nlmsg_msg_size(payload); + nlh->nlmsg_flags = flags; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + + memset((unsigned char *) nlmsg_data(nlh) + payload, 0, + nlmsg_padlen(payload)); + + return nlh; +} +#endif + +/** + * nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + int type, int payload, int flags) +{ + if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) + return NULL; + + return __nlmsg_put(skb, pid, seq, type, payload, flags); +} + +/** + * nlmsg_put_answer - Add a new callback based netlink message to an skb + * @skb: socket buffer to store message in + * @cb: netlink callback + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, + struct netlink_callback *cb, + int type, int payload, + int flags) +{ + return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + type, payload, flags); +} + +/** + * nlmsg_new - Allocate a new netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + * + * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known + * and a good default is needed. + */ +static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) +{ + return alloc_skb(nlmsg_total_size(payload), flags); +} + +/** + * nlmsg_end - Finalize a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Corrects the netlink message header to include the appeneded + * attributes. Only necessary if attributes have been added to + * the message. + * + * Returns the total data length of the skb. + */ +static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + + return skb->len; +} + +/** + * nlmsg_get_pos - return current position in netlink message + * @skb: socket buffer the message is stored in + * + * Returns a pointer to the current tail of the message. + */ +static inline void *nlmsg_get_pos(struct sk_buff *skb) +{ + return skb_tail_pointer(skb); +} + +/** + * nlmsg_trim - Trim message to a mark + * @skb: socket buffer the message is stored in + * @mark: mark to trim to + * + * Trims the message to the provided mark. Returns -1. + */ +static inline int nlmsg_trim(struct sk_buff *skb, const void *mark) +{ + if (mark) + skb_trim(skb, (unsigned char *) mark - skb->data); + + return -1; +} + +/** + * nlmsg_cancel - Cancel construction of a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Removes the complete netlink message including all + * attributes from the socket buffer again. Returns -1. + */ +static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + return nlmsg_trim(skb, nlh); +} + +/** + * nlmsg_free - free a netlink message + * @skb: socket buffer of netlink message + */ +static inline void nlmsg_free(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/** + * nlmsg_multicast - multicast a netlink message + * @sk: netlink socket to spread messages to + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id (*not* bit-mask) + * @flags: allocation flags + */ +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, gfp_t flags) +{ + NETLINK_CB(skb).dst_groups = 1UL << (group - 1); + netlink_broadcast(sk, skb, pid, 1UL << (group - 1), flags); + return 0; +} + +/** + * nlmsg_unicast - unicast a netlink message + * @sk: netlink socket to spread message to + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +{ + int err; + + err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + if (err > 0) + err = 0; + + return err; +} + +/** + * nlmsg_for_each_msg - iterate over a stream of messages + * @pos: loop counter, set to current message + * @head: head of message stream + * @len: length of message stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_msg(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nlmsg_ok(pos, rem); \ + pos = nlmsg_next(pos, &(rem))) + +/************************************************************************** + * Netlink Attributes + **************************************************************************/ + +/** + * nla_attr_size - length of attribute not including padding + * @payload: length of payload + */ +static inline int nla_attr_size(int payload) +{ + return NLA_HDRLEN + payload; +} + +/** + * nla_total_size - total length of attribute including padding + * @payload: length of payload + */ +static inline int nla_total_size(int payload) +{ + return NLA_ALIGN(nla_attr_size(payload)); +} + +/** + * nla_padlen - length of padding at the tail of attribute + * @payload: length of payload + */ +static inline int nla_padlen(int payload) +{ + return nla_total_size(payload) - nla_attr_size(payload); +} + +/** + * nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +/** + * nla_len - length of payload + * @nla: netlink attribute + */ +static inline int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +/** + * nla_ok - check if the netlink attribute fits into the remaining bytes + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + */ +static inline int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +/** + * nla_next - next netlink attribte in attribute stream + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + * + * Returns the next netlink attribute in the attribute stream and + * decrements remaining by the size of the current attribute. + */ +static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +/** + * nla_find_nested - find attribute in a set of nested attributes + * @nla: attribute containing the nested attributes + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) +{ + return nla_find(nla_data(nla), nla_len(nla), attrtype); +} + +/** + * nla_parse_nested - parse nested attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy) +{ + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); +} +/** + * nla_put_u8 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) +{ + return nla_put(skb, attrtype, sizeof(u8), &value); +} + +/** + * nla_put_u16 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) +{ + return nla_put(skb, attrtype, sizeof(u16), &value); +} + +/** + * nla_put_u32 - Add a u32 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) +{ + return nla_put(skb, attrtype, sizeof(u32), &value); +} + +/** + * nla_put_64 - Add a u64 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value) +{ + return nla_put(skb, attrtype, sizeof(u64), &value); +} + +/** + * nla_put_string - Add a string netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @str: NUL terminated string + */ +static inline int nla_put_string(struct sk_buff *skb, int attrtype, + const char *str) +{ + return nla_put(skb, attrtype, strlen(str) + 1, str); +} + +/** + * nla_put_flag - Add a flag netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + */ +static inline int nla_put_flag(struct sk_buff *skb, int attrtype) +{ + return nla_put(skb, attrtype, 0, NULL); +} + +/** + * nla_put_msecs - Add a msecs netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @jiffies: number of msecs in jiffies + */ +static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, + unsigned long jiffies) +{ + u64 tmp = jiffies_to_msecs(jiffies); + return nla_put(skb, attrtype, sizeof(u64), &tmp); +} + +#define NLA_PUT(skb, attrtype, attrlen, data) \ + do { \ + if (nla_put(skb, attrtype, attrlen, data) < 0) \ + goto nla_put_failure; \ + } while(0) + +#define NLA_PUT_TYPE(skb, type, attrtype, value) \ + do { \ + type __tmp = value; \ + NLA_PUT(skb, attrtype, sizeof(type), &__tmp); \ + } while(0) + +#define NLA_PUT_U8(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u8, attrtype, value) + +#define NLA_PUT_U16(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u16, attrtype, value) + +#define NLA_PUT_LE16(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __le16, attrtype, value) + +#define NLA_PUT_U32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u32, attrtype, value) + +#define NLA_PUT_BE32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __be32, attrtype, value) + +#define NLA_PUT_U64(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u64, attrtype, value) + +#define NLA_PUT_STRING(skb, attrtype, value) \ + NLA_PUT(skb, attrtype, strlen(value) + 1, value) + +#define NLA_PUT_FLAG(skb, attrtype) \ + NLA_PUT(skb, attrtype, 0, NULL) + +#define NLA_PUT_MSECS(skb, attrtype, jiffies) \ + NLA_PUT_U64(skb, attrtype, jiffies_to_msecs(jiffies)) + +/** + * nla_get_u32 - return payload of u32 attribute + * @nla: u32 netlink attribute + */ +static inline u32 nla_get_u32(struct nlattr *nla) +{ + return *(u32 *) nla_data(nla); +} + +/** + * nla_get_be32 - return payload of __be32 attribute + * @nla: __be32 netlink attribute + */ +static inline __be32 nla_get_be32(struct nlattr *nla) +{ + return *(__be32 *) nla_data(nla); +} + +/** + * nla_get_u16 - return payload of u16 attribute + * @nla: u16 netlink attribute + */ +static inline u16 nla_get_u16(struct nlattr *nla) +{ + return *(u16 *) nla_data(nla); +} + +/** + * nla_get_le16 - return payload of __le16 attribute + * @nla: __le16 netlink attribute + */ +static inline __le16 nla_get_le16(struct nlattr *nla) +{ + return *(__le16 *) nla_data(nla); +} + +/** + * nla_get_u8 - return payload of u8 attribute + * @nla: u8 netlink attribute + */ +static inline u8 nla_get_u8(struct nlattr *nla) +{ + return *(u8 *) nla_data(nla); +} + +/** + * nla_get_u64 - return payload of u64 attribute + * @nla: u64 netlink attribute + */ +static inline u64 nla_get_u64(struct nlattr *nla) +{ + u64 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + + return tmp; +} + +/** + * nla_get_flag - return payload of flag attribute + * @nla: flag netlink attribute + */ +static inline int nla_get_flag(struct nlattr *nla) +{ + return !!nla; +} + +/** + * nla_get_msecs - return payload of msecs attribute + * @nla: msecs netlink attribute + * + * Returns the number of milliseconds in jiffies. + */ +static inline unsigned long nla_get_msecs(struct nlattr *nla) +{ + u64 msecs = nla_get_u64(nla); + + return msecs_to_jiffies((unsigned long) msecs); +} + +/** + * nla_nest_start - Start a new level of nested attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); + + if (nla_put(skb, attrtype, 0, NULL) < 0) + return NULL; + + return start; +} + +/** + * nla_nest_end - Finalize nesting of attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) +{ + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; + return skb->len; +} + +/** + * nla_nest_cancel - Cancel nesting of attributes + * @skb: socket buffer the message is stored in + * @start: container attribute + * + * Removes the container attribute and including all nested + * attributes. Returns -1. + */ +static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) +{ + return nlmsg_trim(skb, start); +} + +/** + * nla_validate_nested - Validate a stream of nested attributes + * @start: container attribute + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the nested attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_nested(struct nlattr *start, int maxtype, + const struct nla_policy *policy) +{ + return nla_validate(nla_data(start), nla_len(start), maxtype, policy); +} + +/** + * nla_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +/** + * nla_for_each_nested - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested(pos, nla, rem) \ + nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/kernel.c b/datapath/linux-2.4/compat-2.4/kernel.c new file mode 100644 index 00000000..a08bb2d8 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/kernel.c @@ -0,0 +1,27 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include +#include + +int vprintk(const char *msg, ...) +{ +#define BUFFER_SIZE 1024 + char *buffer = kmalloc(BUFFER_SIZE, GFP_ATOMIC); + int retval; + if (buffer) { + va_list args; + va_start(args, msg); + vsnprintf(buffer, BUFFER_SIZE, msg, args); + va_end(args); + retval = printk("%s", buffer); + kfree(buffer); + } else { + retval = printk("<> %s", msg); + } + return retval; +} + +EXPORT_SYMBOL(vprintk); diff --git a/datapath/linux-2.4/compat-2.4/netlink.c b/datapath/linux-2.4/compat-2.4/netlink.c new file mode 100644 index 00000000..79aedee2 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/netlink.c @@ -0,0 +1,116 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * netlink_queue_skip - Skip netlink message while processing queue. + * @nlh: Netlink message to be skipped + * @skb: Socket buffer containing the netlink messages. + * + * Pulls the given netlink message off the socket buffer so the next + * call to netlink_queue_run() will not reconsider the message. + */ +static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) +{ + int msglen = NLMSG_ALIGN(nlh->nlmsg_len); + + if (msglen > skb->len) + msglen = skb->len; + + skb_pull(skb, msglen); +} + +static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, + struct nlmsghdr *)) +{ + struct nlmsghdr *nlh; + int err; + + while (skb->len >= nlmsg_total_size(0)) { + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) + return 0; + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) + goto skip; + + /* Skip control messages */ + if (nlh->nlmsg_type < NLMSG_MIN_TYPE) + goto skip; + + err = cb(skb, nlh); + if (err == -EINTR) { + /* Not an error, but we interrupt processing */ + netlink_queue_skip(nlh, skb); + return err; + } +skip: + if (nlh->nlmsg_flags & NLM_F_ACK || err) + netlink_ack(skb, nlh, err); + + netlink_queue_skip(nlh, skb); + } + + return 0; +} + +/** + * netlink_run_queue - Process netlink receive queue. + * @sk: Netlink socket containing the queue + * @qlen: Place to store queue length upon entry + * @cb: Callback function invoked for each netlink message found + * + * Processes as much as there was in the queue upon entry and invokes + * a callback function for each netlink message found. The callback + * function may refuse a message by returning a negative error code + * but setting the error pointer to 0 in which case this function + * returns with a qlen != 0. + * + * qlen must be initialized to 0 before the initial entry, afterwards + * the function may be called repeatedly until qlen reaches 0. + * + * The callback function may return -EINTR to signal that processing + * of netlink messages shall be interrupted. In this case the message + * currently being processed will NOT be requeued onto the receive + * queue. + */ +void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, struct nlmsghdr *)) +{ + struct sk_buff *skb; + + if (!*qlen || *qlen > skb_queue_len(&sk->receive_queue)) + *qlen = skb_queue_len(&sk->receive_queue); + + for (; *qlen; (*qlen)--) { + skb = skb_dequeue(&sk->receive_queue); + if (netlink_rcv_skb(skb, cb)) { + if (skb->len) + skb_queue_head(&sk->receive_queue, skb); + else { + kfree_skb(skb); + (*qlen)--; + } + break; + } + + kfree_skb(skb); + } +} diff --git a/datapath/linux-2.4/compat-2.4/random32.c b/datapath/linux-2.4/compat-2.4/random32.c new file mode 100644 index 00000000..3a19e73a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/random32.c @@ -0,0 +1,142 @@ +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +struct rnd_state { + u32 s1, s2, s3; +}; + +static struct rnd_state net_rand_state[NR_CPUS]; + +static u32 __random32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __set_random32(struct rnd_state *state, unsigned long s) +{ + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); +} + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + return __random32(&net_rand_state[smp_processor_id()]); +} +EXPORT_SYMBOL(random32); + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + * Note: this pool is per cpu so it only affects current CPU. + */ +void srandom32(u32 entropy) +{ + struct rnd_state *state = &net_rand_state[smp_processor_id()]; + __set_random32(state, state->s1 ^ entropy); +} +EXPORT_SYMBOL(srandom32); + +static int __init random32_reseed(void); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +int __init random32_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + __set_random32(state, i + jiffies); + } + random32_reseed(); + return 0; +} + +/* + * Generate better values after random number generator + * is fully initalized. + */ +static int __init random32_reseed(void) +{ + int i; + unsigned long seed; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + + get_random_bytes(&seed, sizeof(seed)); + __set_random32(state, seed); + } + return 0; +} diff --git a/datapath/linux-2.4/compat-2.4/rcupdate.c b/datapath/linux-2.4/compat-2.4/rcupdate.c new file mode 100644 index 00000000..62066d2f --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/rcupdate.c @@ -0,0 +1,145 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +#ifdef CONFIG_SMP +#error "SMP configurations not supported for RCU backport." +#endif + +static int default_blimit = 10; +static int blimit; +static int qhimark = 10000; +static int qlowmark = 100; + +static struct rcu_head *head, **tail; +static int qlen = 0; + +static struct tq_struct rcu_task; + +/* + * Invoke the completed RCU callbacks. They are expected to be in + * a per-cpu list. + */ +static void rcu_task_routine(void *unused) +{ + struct rcu_head *list, *next; + int count = 0; + + local_irq_disable(); + list = head; + head = NULL; + tail = &head; + local_irq_enable(); + + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + if (++count >= blimit) + break; + } + + local_irq_disable(); + qlen -= count; + local_irq_enable(); + if (blimit == INT_MAX && qlen <= qlowmark) + blimit = default_blimit; + + if (head) + schedule_task(&rcu_task); +} + + +static inline void force_quiescent_state(void) +{ + current->need_resched = 1; +} + +/** + * call_rcu - Queue an RCU callback for invocation after a grace period. + * @rcu: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void call_rcu(struct rcu_head *rcu, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + + /* FIXME? Following may be mildly expensive, may be worthwhile to + optimize common case. */ + schedule_task(&rcu_task); + + rcu->func = func; + rcu->next = NULL; + local_irq_save(flags); + *tail = rcu; + tail = &rcu->next; + if (unlikely(++qlen > qhimark)) { + blimit = INT_MAX; + force_quiescent_state(); + } + local_irq_restore(flags); +} +EXPORT_SYMBOL(call_rcu); + +void rcu_init(void) +{ + head = NULL; + tail = &head; + blimit = default_blimit; + rcu_task.routine = rcu_task_routine; +} + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(struct rcu_head *head) +{ + struct rcu_synchronize *rcu; + + rcu = container_of(head, struct rcu_synchronize, head); + complete(&rcu->completion); +} + +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + * + * If your read-side code is not protected by rcu_read_lock(), do -not- + * use synchronize_rcu(). + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished */ + call_rcu(&rcu.head, wakeme_after_rcu); + + /* Wait for it */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL(synchronize_rcu); diff --git a/datapath/linux-2.4/compat-2.4/string.c b/datapath/linux-2.4/compat-2.4/string.c new file mode 100644 index 00000000..e15c16bd --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/string.c @@ -0,0 +1,30 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include + +#ifndef __HAVE_ARCH_STRCSPN +/** + * strcspn - Calculate the length of the initial substring of @s which does not contain letters in @reject + * @s: The string to be searched + * @reject: The string to avoid + */ +size_t strcspn(const char *s, const char *reject) +{ + const char *p; + const char *r; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) { + for (r = reject; *r != '\0'; ++r) { + if (*p == *r) + return count; + } + ++count; + } + return count; +} +EXPORT_SYMBOL(strcspn); +#endif diff --git a/datapath/linux-2.4/config/config-linux-2.4.35-kvm b/datapath/linux-2.4/config/config-linux-2.4.35-kvm new file mode 100644 index 00000000..d88f754d --- /dev/null +++ b/datapath/linux-2.4/config/config-linux-2.4.35-kvm @@ -0,0 +1,600 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MELAN is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_X86_TSC is not set +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_HAS_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_F00F_WORKS_OK=y +# CONFIG_X86_MCE is not set +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +# CONFIG_HIGHMEM is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +# CONFIG_SMP is not set +# CONFIG_X86_UP_APIC is not set +# CONFIG_X86_TSC_DISABLE is not set +CONFIG_X86_TSC=y + +# +# General setup +# +CONFIG_NET=y +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_ISA=y +CONFIG_PCI_NAMES=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +# CONFIG_OOM_KILLER is not set +CONFIG_PM=y +# CONFIG_APM is not set + +# +# ACPI Support +# +# CONFIG_ACPI is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_BLK_STATS is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +CONFIG_BRIDGE=y + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_BLK_DEV_GENERIC is not set +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_BLK_DEV_ADMA100 is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_ATIIXP is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +CONFIG_BLK_DEV_RZ1000=y +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_CHIPSETS is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION_BOOT is not set +# CONFIG_FUSION_ISENSE is not set +# CONFIG_FUSION_CTL is not set +# CONFIG_FUSION_LAN is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +# CONFIG_TULIP is not set +# CONFIG_DE4X5 is not set +# CONFIG_DGRS is not set +# CONFIG_DM9102 is not set +CONFIG_EEPRO100=y +# CONFIG_EEPRO100_PIO is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_R8169 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_SK98LIN is not set +# CONFIG_TIGON3 is not set +# CONFIG_FDDI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_SCx200 is not set +# CONFIG_AMD_RNG is not set +# CONFIG_INTEL_RNG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set + +# +# Direct Rendering Manager (XFree86 DRI support) +# +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_REISERFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +# CONFIG_FAT_FS is not set +# CONFIG_CRAMFS is not set +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_JFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_EXT2_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_XFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SUNRPC is not set +# CONFIG_LOCKD is not set +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_ZISOFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VIDEO_SELECT is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# Support for USB gadgets +# +# CONFIG_USB_GADGET is not set + +# +# Bluetooth support +# +# CONFIG_BLUEZ is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SPINLOCK is not set +CONFIG_FRAME_POINTER=y +CONFIG_LOG_BUF_SHIFT=0 + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Library routines +# +CONFIG_CRC32=y +# CONFIG_ZLIB_INFLATE is not set +# CONFIG_ZLIB_DEFLATE is not set diff --git a/datapath/linux-2.4/kbuild.inc b/datapath/linux-2.4/kbuild.inc new file mode 100644 index 00000000..faa25e6f --- /dev/null +++ b/datapath/linux-2.4/kbuild.inc @@ -0,0 +1,246 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +# Kernel Search Path +# All the places we look for kernel source +KSP := /lib/modules/$(BUILD_KERNEL)/build \ + /lib/modules/$(BUILD_KERNEL)/source \ + /usr/src/linux-$(BUILD_KERNEL) \ + /usr/src/linux-$($(BUILD_KERNEL) | sed 's/-.*//') \ + /usr/src/kernel-headers-$(BUILD_KERNEL) \ + /usr/src/kernel-source-$(BUILD_KERNEL) \ + /usr/src/linux-$($(BUILD_KERNEL) | sed 's/\([0-9]*\.[0-9]*\)\..*/\1/') \ + /usr/src/linux + +# prune the list down to only values that exist +# and have an include/linux sub-directory +test_dir = $(shell [ -e $(dir)/include/linux ] && echo $(dir)) +KSP := $(foreach dir, $(KSP), $(test_dir)) + +# we will use this first valid entry in the search path +ifeq (,$(KSRC)) + KSRC := $(firstword $(KSP)) +endif + +CFLAGS += $(CFLAGS_EXTRA) + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.4) + $(error Linux kernel source in $(KSRC) not 2.4) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + +# pick a compiler +ifeq ($(K_VERSION),2.6) + CC := gcc cc +else + # Older kernels require GCC 2.95 + K_SUBLEVEL:=$(shell sed -n 's/SUBLEVEL = // p' $(KSRC)/Makefile) + ifeq ($(K_SUBLEVEL),20) + CC := gcc-2.95 + else + CC := gcc-3.4 gcc-3.3 gcc-2.95 + endif +endif +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ifeq ($(ARCH),) + # Set the architecture if it hasn't been already set for cross-compilation + ARCH := $(shell uname -m | sed 's/i.86/i386/') +endif +ifeq ($(ARCH),alpha) + CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +CFLAGS += -I$(KSRC)/include -I. +CFLAGS += -I$(srcdir)/compat-2.4 -I$(srcdir)/compat-2.4/include +CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[4-9]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.4.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + CFLAGS += -D__SMP__ +endif + +########################################################################### +# 2.4.x + +# Makefile for 2.4.x kernel +TARGET = openflow_mod.o unit_mod.o compat24_mod.o + +CFLAGS += -Wno-sign-compare -fno-strict-aliasing +CFLAGS := -I $(srcdir)/compat-2.4/include $(CFLAGS) +CFLAGS := -I $(srcdir)/compat-2.4/include-$(ARCH) $(CFLAGS) + +default: $(TARGET) + +openflow_mod.o: $(filter-out $(TARGET), $(CFILES:.c=.o)) + $(LD) $(LDFLAGS) -r $^ -o $@ +unit_mod.o: $(UNIT_CFILES:.c=.o) + $(LD) $(LDFLAGS) -r $^ -o $@ +compat24_mod.o: $(COMPAT24_CFILES:.c=.o) + $(LD) $(LDFLAGS) -r $^ -o $@ + +ALL_CFILES = $(FILES) $(UNIT_CFILES) $(COMPAT24_CFILES) +$(ALL_CFILES:.c=.o): $(HFILES) Makefile + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c)\ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(COMPAT24_CFILES:.c=.o)\ + $(UNIT_CFILES:.c=.o) $(MANFILE).gz .*cmd .tmp_versions\ + compat24_mod.o unit_mod.o tmp/ \ diff --git a/datapath/linux-2.4/kernel-src.inc.in b/datapath/linux-2.4/kernel-src.inc.in new file mode 100644 index 00000000..399cfefe --- /dev/null +++ b/datapath/linux-2.4/kernel-src.inc.in @@ -0,0 +1 @@ +KSRC=@KSRC24@ diff --git a/datapath/linux-2.6-uml/.gitignore b/datapath/linux-2.6-uml/.gitignore new file mode 100644 index 00000000..fd7d3c88 --- /dev/null +++ b/datapath/linux-2.6-uml/.gitignore @@ -0,0 +1,15 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/flow.c +/forward.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/tmp diff --git a/datapath/linux-2.6-uml/Makefile.in b/datapath/linux-2.6-uml/Makefile.in new file mode 100644 index 00000000..f6130517 --- /dev/null +++ b/datapath/linux-2.6-uml/Makefile.in @@ -0,0 +1,51 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRCUML@ +export KVERSION = 2.6 +export VMDIR = @VMDIR@ +export VERSION = @VERSION@ + +ARCH = um +export ARCH + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(top_srcdir)/include + +# Files shared between 2.4 and 2.6 builds + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../datapath_t.c + +HFILES = ../openflow.h ../chain.h ../crc32.h ../flow.h ../forward.h \ + ../table.h ../datapath_t.h + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = openflow.c $(SIMLINKFILES) + +# Testing files used for both 2.6 and 2.4 kernels. Are symlinked +# locally +SHARED_T_FILES = ../table_t.c ../crc_t.c ../unit.c +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +# General rule to create symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + +all: default + +check: all diff --git a/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm b/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm new file mode 100644 index 00000000..687e8841 --- /dev/null +++ b/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm @@ -0,0 +1,896 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.23-rc3 +# Wed Sep 26 08:31:01 2007 +# +CONFIG_DEFCONFIG_LIST="arch/$ARCH/defconfig" +CONFIG_GENERIC_HARDIRQS=y +CONFIG_UML=y +CONFIG_MMU=y +CONFIG_NO_IOMEM=y +# CONFIG_TRACE_IRQFLAGS_SUPPORT is not set +CONFIG_LOCKDEP_SUPPORT=y +# CONFIG_STACKTRACE_SUPPORT is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_BUG=y +CONFIG_IRQ_RELEASE_METHOD=y + +# +# UML-specific options +# +# CONFIG_STATIC_LINK is not set +CONFIG_MODE_SKAS=y + +# +# Host processor type and features +# +# CONFIG_M386 is not set +CONFIG_M486=y +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_F00F_BUG=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_ALIGNMENT_16=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_UML_X86=y +# CONFIG_64BIT is not set +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_HOST_VMSPLIT_3G=y +# CONFIG_HOST_VMSPLIT_3G_OPT is not set +# CONFIG_HOST_VMSPLIT_2G is not set +# CONFIG_HOST_VMSPLIT_1G is not set +CONFIG_TOP_ADDR=0xC0000000 +# CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_STUB_CODE=0xbfffe000 +CONFIG_STUB_DATA=0xbffff000 +CONFIG_STUB_START=0xbfffe000 +CONFIG_ARCH_HAS_SC_SIGNALS=y +CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_LD_SCRIPT_DYN=y +CONFIG_NET=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOSTFS=m +# CONFIG_HPPFS is not set +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_NEST_LEVEL=0 +CONFIG_HIGHMEM=y +CONFIG_KERNEL_STACK_ORDER=0 +CONFIG_UML_REAL_TIME_CLOCK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=128 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_LSF=y +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_COW_COMMON=y +# CONFIG_MMAPPER is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_ATA_OVER_ETH is not set + +# +# Character Devices +# +CONFIG_STDERR_CONSOLE=y +CONFIG_STDIO_CONSOLE=y +CONFIG_SSL=y +CONFIG_NULL_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y +# CONFIG_NOCONFIG_CHAN is not set +CONFIG_CON_ZERO_CHAN="fd:0,fd:1" +CONFIG_CON_CHAN="xterm" +CONFIG_SSL_CHAN="pty" +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=y +CONFIG_UML_WATCHDOG=m +# CONFIG_UML_SOUND is not set +# CONFIG_SOUND is not set +# CONFIG_HOSTAUDIO is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_UML_RANDOM is not set + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Networking +# + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +# CONFIG_IP_ROUTE_VERBOSE is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +# CONFIG_NF_CT_PROTO_UDPLITE is not set +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_TRACE is not set +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_U32 is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +# CONFIG_DECNET_NF_GRABULATOR is not set + +# +# Bridge: Netfilter Configuration +# +# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m +CONFIG_IP_DCCP_ACKVEC=y + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_TFRC_LIB=m +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_TIPC=m +CONFIG_TIPC_ADVANCED=y +CONFIG_TIPC_ZONES=3 +CONFIG_TIPC_CLUSTERS=1 +CONFIG_TIPC_NODES=255 +CONFIG_TIPC_SLAVE_NODES=0 +CONFIG_TIPC_PORTS=8191 +CONFIG_TIPC_LOG=0 +# CONFIG_TIPC_DEBUG is not set +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +# CONFIG_ATM_MPOA is not set +CONFIG_ATM_BR2684=m +CONFIG_ATM_BR2684_IPFILTER=y +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_IPX_INTERN=y +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +# CONFIG_NET_SCH_RR is not set +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +# CONFIG_NET_CLS_POLICE is not set +CONFIG_NET_CLS_IND=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=m +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# UML Network Devices +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_UML_NET_PCAP=y +CONFIG_UML_NET_SLIRP=y +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_ATM_DRIVERS=y +# CONFIG_ATM_DUMMY is not set +# CONFIG_ATM_TCP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +CONFIG_CONNECTOR=m + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=m +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY is not set +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=m +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_ECB is not set +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_TEA=m +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_DMA=y + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_MD is not set +# CONFIG_INPUT is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +CONFIG_DEBUG_LIST=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +CONFIG_RCU_TORTURE_TEST=m +# CONFIG_FAULT_INJECTION is not set +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set +# CONFIG_DEBUG_STACK_USAGE is not set diff --git a/datapath/linux-2.6-uml/kbuild.inc b/datapath/linux-2.6-uml/kbuild.inc new file mode 100644 index 00000000..07276c78 --- /dev/null +++ b/datapath/linux-2.6-uml/kbuild.inc @@ -0,0 +1,210 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +CC := gcc cc +CFLAGS += $(CFLAGS_EXTRA) + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? (we assume 2.2 isn't in use anymore +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.6) + $(error Linux kernel source not not 2.6) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + + +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ARCH := $(shell uname -m | sed 's/i.86/i386/') +ifeq ($(ARCH),alpha) + CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +CFLAGS += -I$(KSRC)/include -I. +CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[6]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.6.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + CFLAGS += -D__SMP__ +endif + +########################################################################### +# Makefile for 2.6.x kernel +all: $(TARGET) +TARGET = openflow_mod.ko unit_mod.ko + +$(UNIT_CFILES): + $(foreach UNIT_CFILE, $(UNIT_CFILES), $(shell ln -s $(patsubst %,../t/%,$(UNIT_CFILE)) $(UNIT_CFILE))) + +ifneq ($(PATCHLEVEL),) +EXTRA_CFLAGS += $(CFLAGS_EXTRA) +obj-m += openflow_mod.o unit_mod.o +openflow_mod-objs := $(CFILES:.c=.o) +unit_mod-objs := $(UNIT_CFILES:.c=.o) +else +default: +ifeq ($(KOBJ),$(KSRC)) + $(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) modules +else + $(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) modules +endif +endif + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c) \ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(MANFILE).gz .*cmd \ + .tmp_versions t/ tmp/ diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore new file mode 100644 index 00000000..098a312e --- /dev/null +++ b/datapath/linux-2.6/.gitignore @@ -0,0 +1,18 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/dp_dev.c +/flow.c +/forward.c +/forward_t.c +/datapath_t.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/tmp diff --git a/datapath/linux-2.6/Makefile.in b/datapath/linux-2.6/Makefile.in new file mode 100644 index 00000000..5a7cb55f --- /dev/null +++ b/datapath/linux-2.6/Makefile.in @@ -0,0 +1,65 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRC26@ +export KVERSION = 2.6 +export VMDIR = @VMDIR@ +export VERSION = @VERSION@ + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(srcdir)/datapath/ -I $(top_srcdir)/include + +# Files shared between 2.4 and 2.6 builds + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../datapath_t.c ../dp_dev.c \ + compat-2.6/genetlink.c \ + compat-2.6/random32.c + +HFILES = ../datapath.h ../chain.h ../crc32.h ../flow.h ../forward.h \ + ../table.h ../datapath_t.h \ + compat-2.6/include/compat26.h \ + compat-2.6/include/linux/ip.h \ + compat-2.6/include/linux/ipv6.h \ + compat-2.6/include/linux/lockdep.h \ + compat-2.6/include/linux/mutex.h \ + compat-2.6/include/linux/netlink.h \ + compat-2.6/include/linux/random.h \ + compat-2.6/include/linux/skbuff.h \ + compat-2.6/include/linux/tcp.h \ + compat-2.6/include/linux/timer.h \ + compat-2.6/include/linux/types.h \ + compat-2.6/include/linux/udp.h \ + compat-2.6/include/net/checksum.h \ + compat-2.6/include/net/genetlink.h + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = datapath.c $(SIMLINKFILES) + +# Testing files used for both 2.6 and 2.4 kernels. Are symlinked +# locally +SHARED_T_FILES = ../table_t.c ../crc_t.c ../forward_t.c ../unit.c + +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +# General rule to create symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + +all: default + +check: all diff --git a/datapath/linux-2.6/compat-2.6/compat26.h b/datapath/linux-2.6/compat-2.6/compat26.h new file mode 100644 index 00000000..80132324 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/compat26.h @@ -0,0 +1,25 @@ +#ifndef __COMPAT26_H +#define __COMPAT26_H 1 + +#include + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) +/*---------------------------------------------------------------------------- + * In 2.6.24, a namespace argument became required for dev_get_by_name. */ +#define net_init NULL + +#define dev_get_by_name(net, name) \ + dev_get_by_name((name)) + +#endif /* linux kernel <= 2.6.23 */ + + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,22) +/*---------------------------------------------------------------------------- + * In 2.6.23, the last argument was dropped from kmem_cache_create. */ +#define kmem_cache_create(n, s, a, f, c) \ + kmem_cache_create((n), (s), (a), (f), (c), NULL) + +#endif /* linux kernel <= 2.6.22 */ + +#endif /* compat26.h */ diff --git a/datapath/linux-2.6/compat-2.6/genetlink.c b/datapath/linux-2.6/compat-2.6/genetlink.c new file mode 100644 index 00000000..c0e6ae9f --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/genetlink.c @@ -0,0 +1,15 @@ +#include "net/genetlink.h" + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + grp->id = 1; + grp->family = family; + + return 0; +} + +#endif /* kernel < 2.6.23 */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ip.h b/datapath/linux-2.6/compat-2.6/include/linux/ip.h new file mode 100644 index 00000000..79158735 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ip.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_IP_WRAPPER_H +#define __LINUX_IP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +#include + +static inline struct iphdr *ip_hdr(const struct sk_buff *skb) +{ + return (struct iphdr *)skb_network_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h new file mode 100644 index 00000000..e735a780 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_IPV6_WRAPPER_H +#define __LINUX_IPV6_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +#include + +static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) +{ + return (struct ipv6hdr *)skb_network_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h new file mode 100644 index 00000000..1c839423 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h @@ -0,0 +1,450 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_WRAPPER_H +#define __LINUX_LOCKDEP_WRAPPER_H + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +struct task_struct; +struct lockdep_map; + +#ifdef CONFIG_LOCKDEP + +#include +#include +#include +#include + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[4]; +#endif +}; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[4]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache; + const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; +#endif +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; + int distance; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key, 0) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key, 0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + lockdep_init_map(&(lock)->dep_map, #key, key, sub) +#define lockdep_set_subclass(lock, sub) \ + lockdep_init_map(&(lock)->dep_map, #lock, \ + (lock)->dep_map.key, sub) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + do { (void)(key); } while (0) +#define lockdep_set_subclass(lock, sub) do { } while (0) + +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +#define lockdep_depth(tsk) (0) + +#endif /* !LOCKDEP */ + +#ifdef CONFIG_LOCK_STAT + +extern void lock_contended(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); + +#define LOCK_CONTENDED(_lock, try, lock) \ +do { \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + lock(_lock); \ + } \ + lock_acquired(&(_lock)->dep_map); \ +} while (0) + +#else /* CONFIG_LOCK_STAT */ + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) + +#define LOCK_CONTENDED(_lock, try, lock) \ + lock(_lock) + +#endif /* CONFIG_LOCK_STAT */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +static inline void early_init_irq_lock_class(void) +{ +} +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +extern void print_irqtrace_events(struct task_struct *curr); +#else +static inline void early_boot_irqs_off(void) +{ +} +static inline void early_boot_irqs_on(void) +{ +} +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* linux kernel < 2.6.18 */ + +#endif /* __LINUX_LOCKDEP_WRAPPER_H */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/mutex.h b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h new file mode 100644 index 00000000..cb5b2738 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h @@ -0,0 +1,59 @@ +#ifndef __LINUX_MUTEX_WRAPPER_H +#define __LINUX_MUTEX_WRAPPER_H + + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + +#include + +struct mutex { + struct semaphore sema; +}; + +#define mutex_init(mutex) init_MUTEX(&mutex->sema) +#define mutex_destroy(mutex) do { } while (0) + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) } + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +static inline void mutex_lock(struct mutex *lock) +{ + down(&lock->sema); +} + +static inline int mutex_lock_interruptible(struct mutex *lock) +{ + return down_interruptible(&lock->sema); +} + +#define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) + +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +static inline int mutex_trylock(struct mutex *lock) +{ + return !down_trylock(&lock->sema); +} + +static inline void mutex_unlock(struct mutex *lock) +{ + up(&lock->sema); +} +#else + +#include_next + +#endif /* linux version < 2.6.16 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netlink.h b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h new file mode 100644 index 00000000..f1588af0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_NETLINK_WRAPPER_H +#define __LINUX_NETLINK_WRAPPER_H 1 + +#include +#include_next +#include + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) + +#define nlmsg_new(s, f) nlmsg_new_proper((s), (f)) +static inline struct sk_buff *nlmsg_new_proper(int size, gfp_t flags) +{ + return alloc_skb(size, flags); +} + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h new file mode 100644 index 00000000..4e4932c9 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/random.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_RANDOM_WRAPPER_H +#define __LINUX_RANDOM_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#ifdef __KERNEL__ +u32 random32(void); +void srandom32(u32 seed); +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h new file mode 100644 index 00000000..67726747 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h @@ -0,0 +1,63 @@ +#ifndef __LINUX_SKBUFF_WRAPPER_H +#define __LINUX_SKBUFF_WRAPPER_H 1 + +#include_next + +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +/* Emulate Linux 2.6.17 and later behavior, in which kfree_skb silently ignores + * null pointer arguments. */ +#define kfree_skb(skb) kfree_skb_maybe_null(skb) +static inline void kfree_skb_maybe_null(struct sk_buff *skb) +{ + if (likely(skb != NULL)) + (kfree_skb)(skb); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#define mac_header mac.raw +#define network_header nh.raw + + +/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at + * least test against it: see update_csum() in forward.c. */ +#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_COMPLETE CHECKSUM_HW + +static inline unsigned char *skb_transport_header(const struct sk_buff *skb) +{ + return skb->h.raw; +} + +static inline void skb_set_transport_header(struct sk_buff *skb, + const int offset) +{ + skb->h.raw = skb->data + offset; +} + +static inline unsigned char *skb_network_header(const struct sk_buff *skb) +{ + return skb->nh.raw; +} + +static inline void skb_set_network_header(struct sk_buff *skb, const int offset) +{ + skb->nh.raw = skb->data + offset; +} + +static inline unsigned char *skb_mac_header(const struct sk_buff *skb) +{ + return skb->mac.raw; +} + +static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) +{ + skb->mac.raw = skb->data + offset; +} + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/tcp.h b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h new file mode 100644 index 00000000..528f16af --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_TCP_WRAPPER_H +#define __LINUX_TCP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb_transport_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/timer.h b/datapath/linux-2.6/compat-2.6/include/linux/timer.h new file mode 100644 index 00000000..d37fcadd --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/timer.h @@ -0,0 +1,90 @@ +#ifndef __LINUX_TIMER_WRAPPER_H +#define __LINUX_TIMER_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +extern unsigned long volatile jiffies; + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long __round_jiffies(unsigned long j, int cpu) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + */ + if (rem < HZ/4) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long round_jiffies(unsigned long j) +{ + return __round_jiffies(j, 0); // FIXME +} + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h new file mode 100644 index 00000000..c1f375eb --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h @@ -0,0 +1,14 @@ +#ifndef __LINUX_TYPES_WRAPPER_H +#define __LINUX_TYPES_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/udp.h b/datapath/linux-2.6/compat-2.6/include/linux/udp.h new file mode 100644 index 00000000..ffab1873 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/udp.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_UDP_WRAPPER_H +#define __LINUX_UDP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif /* __KERNEL__ */ + + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/net/checksum.h b/datapath/linux-2.6/compat-2.6/include/net/checksum.h new file mode 100644 index 00000000..c64c6bd0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/checksum.h @@ -0,0 +1,16 @@ +#ifndef __NET_CHECKSUM_WRAPPER_H +#define __NET_CHECKSUM_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} + +#endif /* linux kernel < 2.6.20 */ + +#endif /* checksum.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h new file mode 100644 index 00000000..57a47316 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h @@ -0,0 +1,123 @@ +#ifndef __NET_GENERIC_NETLINK_WRAPPER_H +#define __NET_GENERIC_NETLINK_WRAPPER_H 1 + + +#include +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +#include + +/*---------------------------------------------------------------------------- + * In 2.6.23, registering of multicast groups was added. Our compatability + * layer just supports registering a single group, since that's all we + * need. + */ + +/** + * struct genl_multicast_group - generic netlink multicast group + * @name: name of the multicast group, names are per-family + * @id: multicast group ID, assigned by the core, to use with + * genlmsg_multicast(). + * @list: list entry for linking + * @family: pointer to family, need not be set before registering + */ +struct genl_multicast_group +{ + struct genl_family *family; /* private */ + struct list_head list; /* private */ + char name[GENL_NAMSIZ]; + u32 id; +}; + +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); +#endif /* linux kernel < 2.6.23 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + +#define genlmsg_multicast(s, p, g, f) \ + genlmsg_multicast_flags((s), (p), (g), (f)) + +static inline int genlmsg_multicast_flags(struct sk_buff *skb, u32 pid, + unsigned int group, gfp_t flags) +{ + int err; + + NETLINK_CB(skb).dst_group = group; + + err = netlink_broadcast(genl_sock, skb, pid, group, flags); + if (err > 0) + err = 0; + + return err; +} +#endif /* linux kernel < 2.6.19 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#define genlmsg_put(skb, p, seq, fam, flg, c) \ + genlmsg_put((skb), (p), (seq), (fam)->id, (fam)->hdrsize, \ + (flg), (c), (fam)->version) + +/** + * genlmsg_put_reply - Add generic netlink header to a reply message + * @skb: socket buffer holding the message + * @info: receiver info + * @family: generic netlink family + * @flags: netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put_reply(struct sk_buff *skb, + struct genl_info *info, struct genl_family *family, + int flags, u8 cmd) +{ + return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + flags, cmd); +} + +/** + * genlmsg_reply - reply to a request + * @skb: netlink message to be sent back + * @info: receiver information + */ +static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) +{ + return genlmsg_unicast(skb, info->snd_pid); +} + +/** + * genlmsg_new - Allocate a new generic netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + */ +static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) +{ + return nlmsg_new(genlmsg_total_size(payload), flags); +} +#endif /* linux kernel < 2.6.20 */ + +#endif /* genetlink.h */ diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c new file mode 100644 index 00000000..981b55c1 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/random32.c @@ -0,0 +1,146 @@ +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include +#include +#include +#include +#include + +#include "compat26.h" + +struct rnd_state { + u32 s1, s2, s3; +}; + +static struct rnd_state net_rand_state[NR_CPUS]; + +static u32 __random32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __set_random32(struct rnd_state *state, unsigned long s) +{ + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); +} + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + return __random32(&net_rand_state[smp_processor_id()]); +} +EXPORT_SYMBOL(random32); + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + * Note: this pool is per cpu so it only affects current CPU. + */ +void srandom32(u32 entropy) +{ + struct rnd_state *state = &net_rand_state[smp_processor_id()]; + __set_random32(state, state->s1 ^ entropy); +} +EXPORT_SYMBOL(srandom32); + +static int __init random32_reseed(void); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +int __init random32_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + __set_random32(state, i + jiffies); + } + random32_reseed(); + return 0; +} + +/* + * Generate better values after random number generator + * is fully initalized. + */ +static int __init random32_reseed(void) +{ + int i; + unsigned long seed; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + + get_random_bytes(&seed, sizeof(seed)); + __set_random32(state, seed); + } + return 0; +} + +#endif /* kernel < 2.6.19 */ diff --git a/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm new file mode 100644 index 00000000..f287cf72 --- /dev/null +++ b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm @@ -0,0 +1,1408 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.23-rc9 +# Fri Oct 19 15:08:37 2007 +# +CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_LSF=y +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Processor type and features +# +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_SMP=y +CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set +# CONFIG_M386 is not set +CONFIG_M486=y +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_F00F_BUG=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_ALIGNMENT_16=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +# CONFIG_HPET_TIMER is not set +CONFIG_NR_CPUS=8 +# CONFIG_SCHED_SMT is not set +CONFIG_SCHED_MC=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +# CONFIG_X86_MCE is not set +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_STATIC=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +CONFIG_IRQBALANCE=y +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +CONFIG_HOTPLUG_CPU=y +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND_SMP_POSSIBLE=y +CONFIG_SUSPEND=y +CONFIG_HIBERNATION_SMP_POSSIBLE=y +# CONFIG_HIBERNATION is not set +# CONFIG_ACPI is not set +CONFIG_APM=y +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_DEBUG is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=m + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +# CONFIG_IP_ROUTE_VERBOSE is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +# CONFIG_NF_CT_PROTO_UDPLITE is not set +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_TRACE is not set +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_U32 is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +# CONFIG_DECNET_NF_GRABULATOR is not set + +# +# Bridge: Netfilter Configuration +# +# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m +CONFIG_IP_DCCP_ACKVEC=y + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_TFRC_LIB=m +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_TIPC=m +CONFIG_TIPC_ADVANCED=y +CONFIG_TIPC_ZONES=3 +CONFIG_TIPC_CLUSTERS=1 +CONFIG_TIPC_NODES=255 +CONFIG_TIPC_SLAVE_NODES=0 +CONFIG_TIPC_PORTS=8191 +CONFIG_TIPC_LOG=0 +# CONFIG_TIPC_DEBUG is not set +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +# CONFIG_ATM_MPOA is not set +CONFIG_ATM_BR2684=m +CONFIG_ATM_BR2684_IPFILTER=y +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_IPX_INTERN=y +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +# CONFIG_LTPC is not set +# CONFIG_COPS is not set +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +# CONFIG_NET_SCH_RR is not set +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +# CONFIG_NET_CLS_POLICE is not set +CONFIG_NET_CLS_IND=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=m +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_CONNECTOR=m +# CONFIG_MTD is not set +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set +# CONFIG_PARPORT_1284 is not set +# CONFIG_PNP is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +# CONFIG_BLK_DEV_IDEDMA_PCI is not set +# CONFIG_IDE_ARM is not set +# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_IDEDMA is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_NET_TULIP is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_PCNET32_NAPI is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_CS89x0 is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +CONFIG_8139CP=y +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +# CONFIG_NET_POCKET is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_ATM_DRIVERS=y +# CONFIG_ATM_DUMMY is not set +# CONFIG_ATM_TCP is not set +# CONFIG_ATM_LANAI is not set +# CONFIG_ATM_ENI is not set +# CONFIG_ATM_FIRESTREAM is not set +# CONFIG_ATM_ZATM is not set +# CONFIG_ATM_NICSTAR is not set +# CONFIG_ATM_IDT77252 is not set +# CONFIG_ATM_AMBASSADOR is not set +# CONFIG_ATM_HORIZON is not set +# CONFIG_ATM_IA is not set +# CONFIG_ATM_FORE200E_MAYBE is not set +# CONFIG_ATM_HE is not set +# CONFIG_FDDI is not set +CONFIG_HIPPI=y +# CONFIG_ROADRUNNER is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_INPORT is not set +# CONFIG_MOUSE_LOGIBM is not set +# CONFIG_MOUSE_PC110PAD is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +# CONFIG_TIPAR is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=y +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# ISA-based Watchdog Cards +# +# CONFIG_PCWATCHDOG is not set +# CONFIG_MIXCOMWD is not set +# CONFIG_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=m +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_VIDEO_SELECT is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +# CONFIG_HID is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# +# CONFIG_AUXDISPLAY is not set +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set + +# +# Userspace I/O +# +# CONFIG_UIO is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=m +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set +CONFIG_INSTRUMENTATION=y +# CONFIG_PROFILING is not set +# CONFIG_KPROBES is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_LOCK_STAT=y +# CONFIG_DEBUG_LOCKDEP is not set +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +CONFIG_DEBUG_KOBJECT=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_LIST=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +CONFIG_RCU_TORTURE_TEST=m +# CONFIG_FAULT_INJECTION is not set +CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_RODATA=y +CONFIG_4KSTACKS=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY is not set +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=m +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_ECB is not set +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_TEA=m +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y diff --git a/datapath/linux-2.6/kbuild.inc b/datapath/linux-2.6/kbuild.inc new file mode 100644 index 00000000..c3102d61 --- /dev/null +++ b/datapath/linux-2.6/kbuild.inc @@ -0,0 +1,214 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +ifeq (,$(CC)) + CC := gcc cc +endif + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? (we assume 2.2 isn't in use anymore +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.6) + $(error Linux kernel source not not 2.6) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + EXTRA_CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + + +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ifeq ($(ARCH),) + # Set the architecture if it hasn't been already set for cross-compilation + ARCH := $(shell uname -m | sed 's/i.86/i386/') +endif +ifeq ($(ARCH),alpha) + EXTRA_CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + EXTRA_CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + EXTRA_CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + EXTRA_CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +EXTRA_CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +NOSTDINC_FLAGS += -I$(srcdir)/compat-2.6 -I$(srcdir)/compat-2.6/include +EXTRA_CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + EXTRA_CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[6]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.6.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + EXTRA_CFLAGS += -D__SMP__ +endif + +########################################################################### +# Makefile for 2.6.x kernel +all: $(TARGET) +TARGET = openflow_mod.ko unit_mod.ko + +$(UNIT_CFILES): + $(foreach UNIT_CFILE, $(UNIT_CFILES), $(shell ln -s $(patsubst %,../t/%,$(UNIT_CFILE)) $(UNIT_CFILE))) + +ifneq ($(PATCHLEVEL),) +EXTRA_CFLAGS += $(CFLAGS_EXTRA) +obj-m += openflow_mod.o unit_mod.o +openflow_mod-objs := $(CFILES:.c=.o) +unit_mod-objs := $(UNIT_CFILES:.c=.o) +else +default: +ifeq ($(KOBJ),$(KSRC)) + $(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) modules +else + $(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) modules +endif +endif + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c) \ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(MANFILE).gz .*cmd \ + .tmp_versions t/ *.o tmp/ diff --git a/datapath/linux-2.6/kernel-src.inc.in b/datapath/linux-2.6/kernel-src.inc.in new file mode 100644 index 00000000..531f7bc4 --- /dev/null +++ b/datapath/linux-2.6/kernel-src.inc.in @@ -0,0 +1 @@ +KSRC=@KSRC26@ diff --git a/datapath/run-unit-tests b/datapath/run-unit-tests new file mode 100755 index 00000000..85257fcb --- /dev/null +++ b/datapath/run-unit-tests @@ -0,0 +1,64 @@ +#! /bin/sh -ex + +fail () { + echo "$@" + exit 1 +} + +test -n "$VMDIR" || fail "must pass --with-vm to configure to run unit tests" + +rm -rf tmp +mkdir tmp +cd tmp + +ln -s $KSRC/arch/i386/boot/bzImage kernel.bin +ln -s $VMDIR/hda.dsk hda.dsk + +cat > unit.conf < unit.cd/runme < + +#define SNAP_OUI_LEN 3 + + +struct snap_hdr +{ + uint8_t dsap; /* Always 0xAA */ + uint8_t ssap; /* Always 0xAA */ + uint8_t ctrl; + uint8_t oui[SNAP_OUI_LEN]; + uint16_t ethertype; +} __attribute__ ((packed)); + +static inline int snap_get_ethertype(struct sk_buff *skb, uint16_t *ethertype) +{ + struct snap_hdr *sh = (struct snap_hdr *)(skb->data + + sizeof(struct ethhdr)); + if (((sh->dsap & 0xFE) != LLC_SAP_SNAP) + || ((sh->dsap & 0xFE) != LLC_SAP_SNAP) + || (!memcmp(sh->oui, "\0\0\0", SNAP_OUI_LEN))) + return -EINVAL; + + *ethertype = sh->ethertype; + + return 0; +} + +#endif /* snap.h */ diff --git a/datapath/t/.gitignore b/datapath/t/.gitignore new file mode 100644 index 00000000..35e75b7a --- /dev/null +++ b/datapath/t/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/forward_t.h +/fwdhgen diff --git a/datapath/table-hash.c b/datapath/table-hash.c new file mode 100644 index 00000000..57a9f1c0 --- /dev/null +++ b/datapath/table-hash.c @@ -0,0 +1,466 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "crc32.h" +#include "flow.h" +#include "datapath.h" + +#include +#include +#include +#include +#include + +static void *kmem_alloc(size_t); +static void *kmem_zalloc(size_t); +static void kmem_free(void *, size_t); + +struct sw_table_hash { + struct sw_table swt; + spinlock_t lock; + struct crc32 crc32; + atomic_t n_flows; + unsigned int bucket_mask; /* Number of buckets minus 1. */ + struct sw_flow **buckets; +}; + +static struct sw_flow **find_bucket(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int crc = crc32_calculate(&th->crc32, key, sizeof *key); + return &th->buckets[crc & th->bucket_mask]; +} + +static struct sw_flow *table_hash_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_flow *flow = *find_bucket(swt, key); + return flow && !memcmp(&flow->key, key, sizeof *key) ? flow : NULL; +} + +static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + struct sw_flow **bucket; + unsigned long int flags; + int retval; + + if (flow->key.wildcards != 0) + return 0; + + spin_lock_irqsave(&th->lock, flags); + bucket = find_bucket(swt, &flow->key); + if (*bucket == NULL) { + atomic_inc(&th->n_flows); + rcu_assign_pointer(*bucket, flow); + retval = 1; + } else { + struct sw_flow *old_flow = *bucket; + if (!memcmp(&old_flow->key, &flow->key, sizeof flow->key) + && flow_del(old_flow)) { + rcu_assign_pointer(*bucket, flow); + flow_deferred_free(old_flow); + retval = 1; + } else { + retval = 0; + } + } + spin_unlock_irqrestore(&th->lock, flags); + return retval; +} + +/* Caller must update n_flows. */ +static int do_delete(struct sw_flow **bucket, struct sw_flow *flow) +{ + if (flow_del(flow)) { + rcu_assign_pointer(*bucket, NULL); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +/* Returns number of deleted flows. */ +static int table_hash_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int count = 0; + + if (key->wildcards == 0) { + struct sw_flow **bucket = find_bucket(swt, key); + struct sw_flow *flow = *bucket; + if (flow && !memcmp(&flow->key, key, sizeof *key)) + count = do_delete(bucket, flow); + } else { + unsigned int i; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_del_matches(&flow->key, key, strict)) + count += do_delete(bucket, flow); + } + } + if (count) + atomic_sub(count, &th->n_flows); + return count; +} + +static int table_hash_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + int count = 0; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_timeout(flow)) { + count += do_delete(bucket, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + + if (count) + atomic_sub(count, &th->n_flows); + return count; +} + +static void table_hash_destroy(struct sw_table *swt) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + for (i = 0; i <= th->bucket_mask; i++) + if (th->buckets[i]) + flow_free(th->buckets[i]); + kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets); + kfree(th); +} + +struct swt_iterator_hash { + struct sw_table_hash *th; + unsigned int bucket_i; +}; + +static struct sw_flow *next_flow(struct swt_iterator_hash *ih) +{ + for (;ih->bucket_i <= ih->th->bucket_mask; ih->bucket_i++) { + struct sw_flow *f = ih->th->buckets[ih->bucket_i]; + if (f != NULL) + return f; + } + + return NULL; +} + +static int table_hash_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash *ih; + + swt_iter->private = ih = kmalloc(sizeof *ih, GFP_KERNEL); + + if (ih == NULL) + return 0; + + ih->th = (struct sw_table_hash *) swt; + + ih->bucket_i = 0; + swt_iter->flow = next_flow(ih); + + return 1; +} + +static void table_hash_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash *ih; + + if (swt_iter->flow == NULL) + return; + + ih = (struct swt_iterator_hash *) swt_iter->private; + + ih->bucket_i++; + swt_iter->flow = next_flow(ih); +} + +static void table_hash_iterator_destroy(struct swt_iterator *swt_iter) +{ + kfree(swt_iter->private); +} + +static void table_hash_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + stats->name = "hash"; + stats->n_flows = atomic_read(&th->n_flows); + stats->max_flows = th->bucket_mask + 1; +} + +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets) +{ + struct sw_table_hash *th; + struct sw_table *swt; + + th = kmalloc(sizeof *th, GFP_KERNEL); + if (th == NULL) + return NULL; + + BUG_ON(n_buckets & (n_buckets - 1)); + th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets); + if (th->buckets == NULL) { + printk("failed to allocate %u buckets\n", n_buckets); + kfree(th); + return NULL; + } + th->bucket_mask = n_buckets - 1; + + swt = &th->swt; + swt->lookup = table_hash_lookup; + swt->insert = table_hash_insert; + swt->delete = table_hash_delete; + swt->timeout = table_hash_timeout; + swt->destroy = table_hash_destroy; + swt->iterator = table_hash_iterator; + swt->iterator_next = table_hash_next; + swt->iterator_destroy = table_hash_iterator_destroy; + swt->stats = table_hash_stats; + + spin_lock_init(&th->lock); + crc32_init(&th->crc32, polynomial); + atomic_set(&th->n_flows, 0); + + return swt; +} + +/* Double-hashing table. */ + +struct sw_table_hash2 { + struct sw_table swt; + struct sw_table *subtable[2]; +}; + +static struct sw_flow *table_hash2_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + int i; + + for (i = 0; i < 2; i++) { + struct sw_flow *flow = *find_bucket(t2->subtable[i], key); + if (flow && !memcmp(&flow->key, key, sizeof *key)) + return flow; + } + return NULL; +} + +static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + + if (table_hash_insert(t2->subtable[0], flow)) + return 1; + return table_hash_insert(t2->subtable[1], flow); +} + +static int table_hash2_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_delete(t2->subtable[0], key, strict) + + table_hash_delete(t2->subtable[1], key, strict)); +} + +static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_timeout(dp, t2->subtable[0]) + + table_hash_timeout(dp, t2->subtable[1])); +} + +static void table_hash2_destroy(struct sw_table *swt) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + table_hash_destroy(t2->subtable[0]); + table_hash_destroy(t2->subtable[1]); + kfree(t2); +} + +struct swt_iterator_hash2 { + struct sw_table_hash2 *th2; + struct swt_iterator ih; + uint8_t table_i; +}; + +static int table_hash2_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + swt_iter->private = ih2 = kmalloc(sizeof *ih2, GFP_KERNEL); + if (ih2 == NULL) + return 0; + + ih2->th2 = (struct sw_table_hash2 *) swt; + if (!table_hash_iterator(ih2->th2->subtable[0], &ih2->ih)) { + kfree(ih2); + return 0; + } + + if (ih2->ih.flow != NULL) { + swt_iter->flow = ih2->ih.flow; + ih2->table_i = 0; + } else { + table_hash_iterator_destroy(&ih2->ih); + ih2->table_i = 1; + if (!table_hash_iterator(ih2->th2->subtable[1], &ih2->ih)) { + kfree(ih2); + return 0; + } + swt_iter->flow = ih2->ih.flow; + } + + return 1; +} + +static void table_hash2_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + if (swt_iter->flow == NULL) + return; + + ih2 = (struct swt_iterator_hash2 *) swt_iter->private; + table_hash_next(&ih2->ih); + + if (ih2->ih.flow != NULL) { + swt_iter->flow = ih2->ih.flow; + } else { + if (ih2->table_i == 0) { + table_hash_iterator_destroy(&ih2->ih); + ih2->table_i = 1; + if (!table_hash_iterator(ih2->th2->subtable[1], &ih2->ih)) { + ih2->ih.private = NULL; + swt_iter->flow = NULL; + } else { + swt_iter->flow = ih2->ih.flow; + } + } else { + swt_iter->flow = NULL; + } + } +} + +static void table_hash2_iterator_destroy(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + ih2 = (struct swt_iterator_hash2 *) swt_iter->private; + if (ih2->ih.private != NULL) + table_hash_iterator_destroy(&ih2->ih); + kfree(ih2); +} + +static void table_hash2_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + struct sw_table_stats substats[2]; + int i; + + for (i = 0; i < 2; i++) + table_hash_stats(t2->subtable[i], &substats[i]); + stats->name = "hash2"; + stats->n_flows = substats[0].n_flows + substats[1].n_flows; + stats->max_flows = substats[0].max_flows + substats[1].max_flows; +} + +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1) + +{ + struct sw_table_hash2 *t2; + struct sw_table *swt; + + t2 = kmalloc(sizeof *t2, GFP_KERNEL); + if (t2 == NULL) + return NULL; + + t2->subtable[0] = table_hash_create(poly0, buckets0); + if (t2->subtable[0] == NULL) + goto out_free_t2; + + t2->subtable[1] = table_hash_create(poly1, buckets1); + if (t2->subtable[1] == NULL) + goto out_free_subtable0; + + swt = &t2->swt; + swt->lookup = table_hash2_lookup; + swt->insert = table_hash2_insert; + swt->delete = table_hash2_delete; + swt->timeout = table_hash2_timeout; + swt->destroy = table_hash2_destroy; + swt->stats = table_hash2_stats; + + swt->iterator = table_hash2_iterator; + swt->iterator_next = table_hash2_next; + swt->iterator_destroy = table_hash2_iterator_destroy; + + return swt; + +out_free_subtable0: + table_hash_destroy(t2->subtable[0]); +out_free_t2: + kfree(t2); + return NULL; +} + +/* From fs/xfs/linux-2.4/kmem.c. */ + +static void * +kmem_alloc(size_t size) +{ + void *ptr; + +#ifdef KMALLOC_MAX_SIZE + if (size > KMALLOC_MAX_SIZE) + return NULL; +#endif + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + ptr = vmalloc(size); + if (ptr) + printk("openflow: used vmalloc for %lu bytes\n", + (unsigned long)size); + } + return ptr; +} + +static void * +kmem_zalloc(size_t size) +{ + void *ptr = kmem_alloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} + +static void +kmem_free(void *ptr, size_t size) +{ + if (((unsigned long)ptr < VMALLOC_START) || + ((unsigned long)ptr >= VMALLOC_END)) { + kfree(ptr); + } else { + vfree(ptr); + } +} diff --git a/datapath/table-linear.c b/datapath/table-linear.c new file mode 100644 index 00000000..3baede66 --- /dev/null +++ b/datapath/table-linear.c @@ -0,0 +1,202 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "flow.h" +#include "datapath.h" + +#include +#include +#include + +struct sw_table_linear { + struct sw_table swt; + + spinlock_t lock; + unsigned int max_flows; + atomic_t n_flows; + struct list_head flows; +}; + +static struct sw_flow *table_linear_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow; + list_for_each_entry_rcu (flow, &tl->flows, u.node) { + if (flow_matches(&flow->key, key)) + return flow; + } + return NULL; +} + +static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + unsigned long int flags; + struct sw_flow *f; + + /* Replace flows that match exactly. */ + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry_rcu (f, &tl->flows, u.node) { + if (f->key.wildcards == flow->key.wildcards + && flow_matches(&f->key, &flow->key) + && flow_del(f)) { + list_replace_rcu(&f->u.node, &flow->u.node); + spin_unlock_irqrestore(&tl->lock, flags); + flow_deferred_free(f); + return 1; + } + } + + /* Table overflow? */ + if (atomic_read(&tl->n_flows) >= tl->max_flows) { + spin_unlock_irqrestore(&tl->lock, flags); + return 0; + } + atomic_inc(&tl->n_flows); + + /* FIXME: need to order rules from most to least specific. */ + list_add_rcu(&flow->u.node, &tl->flows); + spin_unlock_irqrestore(&tl->lock, flags); + return 1; +} + +static int do_delete(struct sw_table *swt, struct sw_flow *flow) +{ + if (flow_del(flow)) { + list_del_rcu(&flow->u.node); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +static int table_linear_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct list_head *pos, *n; + unsigned int count = 0; + + list_for_each_safe_rcu (pos, n, &tl->flows) { + struct sw_flow *flow = list_entry(pos, struct sw_flow, u.node); + if (flow_del_matches(&flow->key, key, strict)) + count += do_delete(swt, flow); + } + if (count) + atomic_sub(count, &tl->n_flows); + return count; +} + +static int table_linear_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct list_head *pos, *n; + int count = 0; + + list_for_each_safe_rcu (pos, n, &tl->flows) { + struct sw_flow *flow = list_entry(pos, struct sw_flow, u.node); + if (flow_timeout(flow)) { + count += do_delete(swt, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + if (count) + atomic_sub(count, &tl->n_flows); + return count; +} + +static void table_linear_destroy(struct sw_table *swt) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + + while (!list_empty(&tl->flows)) { + struct sw_flow *flow = list_entry(tl->flows.next, + struct sw_flow, u.node); + list_del(&flow->u.node); + flow_free(flow); + } + kfree(tl); +} + +/* Linear table's private data is just a pointer to the table */ + +static int table_linear_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + + swt_iter->private = tl; + + if (atomic_read(&tl->n_flows) == 0) + swt_iter->flow = NULL; + else + swt_iter->flow = list_entry(tl->flows.next, + struct sw_flow, u.node); + + return 1; +} + +static void table_linear_next(struct swt_iterator *swt_iter) +{ + struct sw_table_linear *tl; + struct list_head *next; + + if (swt_iter->flow == NULL) + return; + + tl = (struct sw_table_linear *) swt_iter->private; + + next = swt_iter->flow->u.node.next; + if (next == &tl->flows) + swt_iter->flow = NULL; + else + swt_iter->flow = list_entry(next, struct sw_flow, u.node); +} + +static void table_linear_iterator_destroy(struct swt_iterator *swt_iter) +{} + +static void table_linear_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + stats->name = "linear"; + stats->n_flows = atomic_read(&tl->n_flows); + stats->max_flows = tl->max_flows; +} + + +struct sw_table *table_linear_create(unsigned int max_flows) +{ + struct sw_table_linear *tl; + struct sw_table *swt; + + tl = kzalloc(sizeof *tl, GFP_KERNEL); + if (tl == NULL) + return NULL; + + swt = &tl->swt; + swt->lookup = table_linear_lookup; + swt->insert = table_linear_insert; + swt->delete = table_linear_delete; + swt->timeout = table_linear_timeout; + swt->destroy = table_linear_destroy; + swt->stats = table_linear_stats; + + swt->iterator = table_linear_iterator; + swt->iterator_next = table_linear_next; + swt->iterator_destroy = table_linear_iterator_destroy; + + tl->max_flows = max_flows; + atomic_set(&tl->n_flows, 0); + INIT_LIST_HEAD(&tl->flows); + spin_lock_init(&tl->lock); + + return swt; +} diff --git a/datapath/table-mac.c b/datapath/table-mac.c new file mode 100644 index 00000000..06f68a36 --- /dev/null +++ b/datapath/table-mac.c @@ -0,0 +1,272 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "crc32.h" +#include "flow.h" +#include "openflow.h" +#include "datapath.h" + +#include + +struct sw_table_mac { + struct sw_table swt; + spinlock_t lock; + struct crc32 crc32; + atomic_t n_flows; + unsigned int max_flows; + unsigned int bucket_mask; /* Number of buckets minus 1. */ + struct hlist_head *buckets; +}; + +static struct hlist_head *find_bucket(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int crc = crc32_calculate(&tm->crc32, key, sizeof *key); + return &tm->buckets[crc & tm->bucket_mask]; +} + +static struct sw_flow *table_mac_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct hlist_head *bucket = find_bucket(swt, key); + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) + if (!memcmp(key->dl_src, flow->key.dl_src, 6)) + return flow; + return NULL; +} + +static int table_mac_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + struct hlist_head *bucket; + struct hlist_node *pos; + unsigned long int flags; + struct sw_flow *f; + + /* MAC table only handles flows that match on Ethernet + source address and wildcard everything else. */ + if (likely(flow->key.wildcards != (OFPFW_ALL & ~OFPFW_DL_SRC))) + return 0; + bucket = find_bucket(swt, &flow->key); + + spin_lock_irqsave(&tm->lock, flags); + hlist_for_each_entry_rcu (f, pos, bucket, u.hnode) { + if (!memcmp(f->key.dl_src, flow->key.dl_src, 6) + && flow_del(f)) { + hlist_replace_rcu(&f->u.hnode, &flow->u.hnode); + spin_unlock_irqrestore(&tm->lock, flags); + flow_deferred_free(f); + return 1; + } + } + + /* Table overflow? */ + if (atomic_read(&tm->n_flows) >= tm->max_flows) { + spin_unlock_irqrestore(&tm->lock, flags); + return 0; + } + atomic_inc(&tm->n_flows); + + hlist_add_head_rcu(&flow->u.hnode, bucket); + spin_unlock_irqrestore(&tm->lock, flags); + return 1; +} + +static int do_delete(struct sw_table *swt, struct sw_flow *flow) +{ + if (flow_del(flow)) { + hlist_del_rcu(&flow->u.hnode); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +/* Returns number of deleted flows. */ +static int table_mac_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + + if (key->wildcards == (OFPFW_ALL & ~OFPFW_DL_SRC)) { + struct sw_flow *flow = table_mac_lookup(swt, key); + if (flow && do_delete(swt, flow)) { + atomic_dec(&tm->n_flows); + return 1; + } + return 0; + } else { + unsigned int i; + int count = 0; + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *bucket = &tm->buckets[i]; + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) + if (flow_del_matches(&flow->key, key, strict)) + count += do_delete(swt, flow); + } + if (count) + atomic_sub(count, &tm->n_flows); + return count; + } +} + +static int table_mac_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int i; + int count = 0; + + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *bucket = &tm->buckets[i]; + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) { + if (flow_timeout(flow)) { + count += do_delete(swt, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + } + if (count) + atomic_sub(count, &tm->n_flows); + return count; +} + +static void table_mac_destroy(struct sw_table *swt) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int i; + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *hlist = &tm->buckets[i]; + while (!hlist_empty(hlist)) { + struct sw_flow *flow = hlist_entry(hlist->first, + struct sw_flow, u.hnode); + hlist_del(&flow->u.hnode); + flow_free(flow); + } + } + kfree(tm->buckets); + kfree(tm); +} + +struct swt_iterator_mac { + struct sw_table_mac *tm; + unsigned int bucket_i; +}; + +static struct sw_flow *next_head_flow(struct swt_iterator_mac *im) +{ + for (; im->bucket_i <= im->tm->bucket_mask; im->bucket_i++) { + struct hlist_node *first = im->tm->buckets[im->bucket_i].first; + if (first != NULL) { + struct sw_flow *f = hlist_entry(first, + struct sw_flow, + u.hnode); + return f; + } + } + return NULL; +} + +static int table_mac_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_mac *im; + + swt_iter->private = im = kmalloc(sizeof *im, GFP_KERNEL); + if (im == NULL) + return 0; + + im->tm = (struct sw_table_mac *) swt; + + if (atomic_read(&im->tm->n_flows) == 0) + swt_iter->flow = NULL; + else { + im->bucket_i = 0; + swt_iter->flow = next_head_flow(im); + } + + return 1; +} + +static void table_mac_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_mac *im; + struct hlist_node *next; + + if (swt_iter->flow == NULL) + return; + + im = (struct swt_iterator_mac *) swt_iter->private; + + next = swt_iter->flow->u.hnode.next; + if (next != NULL) { + swt_iter->flow = hlist_entry(next, struct sw_flow, u.hnode); + } else { + im->bucket_i++; + swt_iter->flow = next_head_flow(im); + } +} + +static void table_mac_iterator_destroy(struct swt_iterator *swt_iter) +{ + kfree(swt_iter->private); +} + +static void table_mac_stats(struct sw_table *swt, struct sw_table_stats *stats) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + stats->name = "mac"; + stats->n_flows = atomic_read(&tm->n_flows); + stats->max_flows = tm->max_flows; +} + +struct sw_table *table_mac_create(unsigned int n_buckets, + unsigned int max_flows) +{ + struct sw_table_mac *tm; + struct sw_table *swt; + + tm = kzalloc(sizeof *tm, GFP_KERNEL); + if (tm == NULL) + return NULL; + + BUG_ON(n_buckets & (n_buckets - 1)); + + tm->buckets = kzalloc(n_buckets * sizeof *tm->buckets, GFP_KERNEL); + if (tm->buckets == NULL) { + printk("failed to allocate %u buckets\n", n_buckets); + kfree(tm); + return NULL; + } + tm->bucket_mask = n_buckets - 1; + + swt = &tm->swt; + swt->lookup = table_mac_lookup; + swt->insert = table_mac_insert; + swt->delete = table_mac_delete; + swt->timeout = table_mac_timeout; + swt->destroy = table_mac_destroy; + swt->stats = table_mac_stats; + + swt->iterator = table_mac_iterator; + swt->iterator_next = table_mac_next; + swt->iterator_destroy = table_mac_iterator_destroy; + + crc32_init(&tm->crc32, 0x04C11DB7); /* Ethernet CRC. */ + atomic_set(&tm->n_flows, 0); + tm->max_flows = max_flows; + spin_lock_init(&tm->lock); + + return swt; +} diff --git a/datapath/table.h b/datapath/table.h new file mode 100644 index 00000000..9a303670 --- /dev/null +++ b/datapath/table.h @@ -0,0 +1,74 @@ +/* Individual switching tables. Generally grouped together in a chain (see + * chain.h). */ + +#ifndef TABLE_H +#define TABLE_H 1 + +struct sw_flow; +struct sw_flow_key; +struct datapath; + +/* Iterator through the flows stored in a table. */ +struct swt_iterator { + struct sw_flow *flow; /* Current flow, for use by client. */ + void *private; +}; + +/* Table statistics. */ +struct sw_table_stats { + const char *name; /* Human-readable name. */ + unsigned long int n_flows; /* Number of active flows. */ + unsigned long int max_flows; /* Flow capacity. */ +}; + +/* A single table of flows. + * + * All functions, except destroy, must be called holding the + * rcu_read_lock. destroy must be fully serialized. + */ +struct sw_table { + /* Searches 'table' for a flow matching 'key', which must not have any + * wildcard fields. Returns the flow if successful, a null pointer + * otherwise. */ + struct sw_flow *(*lookup)(struct sw_table *table, + const struct sw_flow_key *key); + + /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns + * 0 if successful or a negative error. Error can be due to an + * over-capacity table or because the flow is not one of the kind that + * the table accepts. + * + * If successful, 'flow' becomes owned by 'table', otherwise it is + * retained by the caller. */ + int (*insert)(struct sw_table *table, struct sw_flow *flow); + + /* Deletes from 'table' any and all flows that match 'key' from + * 'table'. If 'strict' set, wildcards must match. Returns the + * number of flows that were deleted. */ + int (*delete)(struct sw_table *table, const struct sw_flow_key *key, + int strict); + + /* Performs timeout processing on all the flow entries in 'table'. + * Returns the number of flow entries deleted through expiration. */ + int (*timeout)(struct datapath *dp, struct sw_table *table); + + /* Destroys 'table', which must not have any users. */ + void (*destroy)(struct sw_table *table); + + int (*iterator)(struct sw_table *, struct swt_iterator *); + void (*iterator_next)(struct swt_iterator *); + void (*iterator_destroy)(struct swt_iterator *); + + /* Dumps statistics for 'table' into 'stats'. */ + void (*stats)(struct sw_table *table, struct sw_table_stats *stats); +}; + +struct sw_table *table_mac_create(unsigned int n_buckets, + unsigned int max_flows); +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets); +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1); +struct sw_table *table_linear_create(unsigned int max_flows); + +#endif /* table.h */ diff --git a/datapath/table_t.c b/datapath/table_t.c new file mode 100644 index 00000000..3f92a118 --- /dev/null +++ b/datapath/table_t.c @@ -0,0 +1,879 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include + +#include "flow.h" +#include "table.h" +#include "openflow.h" +#include "unit.h" + +static const char * +table_name(struct sw_table *table) +{ + struct sw_table_stats stats; + table->stats(table, &stats); + return stats.name; +} + +static unsigned long int +table_max_flows(struct sw_table *table) +{ + struct sw_table_stats stats; + table->stats(table, &stats); + return stats.max_flows; +} + +static struct sw_flow *flow_zalloc(int n_actions, gfp_t flags) +{ + struct sw_flow *flow = flow_alloc(n_actions, flags); + if (flow) { + struct ofp_action *actions = flow->actions; + memset(flow, 0, sizeof *flow); + flow->actions = actions; + } + return flow; +} + +static void +simple_insert_delete(struct sw_table *swt, uint16_t wildcards) +{ + struct sw_flow *a_flow = flow_zalloc(0, GFP_KERNEL); + struct sw_flow *b_flow = flow_zalloc(0, GFP_KERNEL); + struct sw_flow *found; + + if (!swt) { + unit_fail("table creation failed"); + return; + } + + printk("simple_insert_delete: testing %s table\n", table_name(swt)); + *((uint32_t*)a_flow->key.dl_src) = 0x12345678; + *((uint32_t*)b_flow->key.dl_src) = 0x87654321; + + a_flow->key.nw_src = 0xdeadbeef; + b_flow->key.nw_src = 0x001dd0d0; + + a_flow->key.wildcards = wildcards; + b_flow->key.wildcards = wildcards; + + if (!(swt->insert(swt, a_flow))) + unit_fail("insert failed"); + found = swt->lookup(swt, &a_flow->key); + if(found != a_flow) + unit_fail("%p != %p", found, a_flow); + if (swt->lookup(swt, &b_flow->key)) + unit_fail("lookup should not succeed (1)"); + + swt->delete(swt, &a_flow->key, 0); + if (swt->lookup(swt, &a_flow->key)) + unit_fail("lookup should not succeed (3)"); + + flow_free(b_flow); + swt->destroy(swt); +} + +static void +multiple_insert_destroy(struct sw_table *swt, int inserts, uint16_t wildcards, + int min_collisions, int max_collisions) +{ + int i; + int col = 0; + + if (!swt) { + unit_fail("table creation failed"); + return; + } + + printk("inserting %d flows into %s table with max %lu flows: ", + inserts, table_name(swt), table_max_flows(swt)); + for(i = 0; i < inserts; ++i){ + struct sw_flow *a_flow = flow_zalloc(0, GFP_KERNEL); + *((uint32_t*)&(a_flow->key.dl_src[0])) = random32(); + a_flow->key.nw_src = random32(); + a_flow->key.wildcards = wildcards; + + if(!swt->insert(swt, a_flow)) { + col++; + flow_free(a_flow); + } + } + printk("%d failures\n", col); + if (min_collisions <= col && col <= max_collisions) + printk("\tmin = %d <= %d <= %d = max, OK.\n", + min_collisions, col, max_collisions); + else { + if (col < min_collisions) + unit_fail("too few collisions (%d < %d)", + col, min_collisions); + else if (col > max_collisions) + unit_fail("too many collisions (%d > %d)", + col, max_collisions); + printk("(This is statistically possible " + "but should not occur often.)\n"); + } + + swt->destroy(swt); +} + +static void +set_random_key(struct sw_flow_key *key, uint16_t wildcards) +{ + key->nw_src = random32(); + key->nw_dst = random32(); + key->in_port = (uint16_t) random32(); + key->dl_vlan = (uint16_t) random32(); + key->dl_type = (uint16_t) random32(); + key->tp_src = (uint16_t) random32(); + key->tp_dst = (uint16_t) random32(); + key->wildcards = wildcards; + *((uint32_t*)key->dl_src) = random32(); + *(((uint32_t*)key->dl_src) + 1) = random32(); + *((uint32_t*)key->dl_dst) = random32(); + *(((uint32_t*)key->dl_dst) + 1) = random32(); + key->nw_proto = (uint8_t) random32(); +} + +struct flow_key_entry { + struct sw_flow_key key; + struct list_head node; +}; + +/* + * Allocates memory for 'n_keys' flow_key_entrys. Initializes the allocated + * keys with random values, setting their wildcard values to 'wildcards', and + * places them all in a list. Returns a pointer to a flow_key_entry that + * serves solely as the list's head (its key has not been set). If allocation + * fails, returns NULL. Returned pointer should be freed with vfree (which + * frees the memory associated with the keys as well.) + */ + +static struct flow_key_entry * +allocate_random_keys(int n_keys, uint16_t wildcards) +{ + struct flow_key_entry *entries, *pos; + struct list_head *keys; + + if (n_keys < 0) + return NULL; + + entries = vmalloc((n_keys+1) * sizeof *entries); + if (entries == NULL) { + unit_fail("cannot allocate memory for %u keys", + n_keys); + return NULL; + } + + keys = &entries->node; + INIT_LIST_HEAD(keys); + + for(pos = entries+1; pos < (entries + n_keys + 1); pos++) { + set_random_key(&pos->key, wildcards); + list_add(&pos->node, keys); + } + + return entries; +} + +/* + * Attempts to insert the first 'n_flows' flow keys in list 'keys' into table + * 'swt', where 'keys' is a list of flow_key_entrys. key_entrys that are + * inserted into the table are removed from the 'keys' list and placed in + * 'added' list. Returns -1 if flow memory allocation fails, else returns the + * number of flows that were actually inserted (some attempts might fail due to + * collisions). + */ + +static int +insert_flows(struct sw_table *swt, struct list_head *keys, struct list_head *added, int n_flows) +{ + struct flow_key_entry *pos, *next; + int cnt; + + cnt = 0; + + + list_for_each_entry_safe (pos, next, keys, node) { + struct sw_flow *flow = flow_zalloc(0, GFP_KERNEL); + if (flow == NULL) { + unit_fail("Could only allocate %u flows", cnt); + return -1; + } + + flow->key = pos->key; + + if (!swt->insert(swt, flow)) { + flow_free(flow); + list_del(&pos->node); + } else { + list_del(&pos->node); + list_add(&pos->node, added); + cnt++; + if (n_flows != -1 && cnt == n_flows) + break; + } + } + + return cnt; +} + +/* + * Finds and returns the flow_key_entry in list 'keys' matching the passed in + * flow's key. If not found, returns NULL. + */ + +static struct flow_key_entry * +find_flow(struct list_head *keys, struct sw_flow *flow) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(!memcmp(&pos->key, &flow->key, sizeof(struct sw_flow_key))) + return pos; + } + + return NULL; +} + +/* + * Checks that all flow_key_entrys in list 'keys' return successful lookups on + * the table 'swt'. + */ + +static int +check_lookup(struct sw_table *swt, struct list_head *keys) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(swt->lookup(swt, &pos->key) == NULL) + return -1; + } + + return 0; +} + +/* + * Checks that all flow_key_entrys in list 'keys' DO NOT return successful + * lookups in the 'swt' table. + */ + +static int +check_no_lookup(struct sw_table *swt, struct list_head *keys) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(swt->lookup(swt, &pos->key) != NULL) + return -1; + } + + return 0; +} + + +/* + * Compares an iterator's view of the 'swt' table to the list of + * flow_key_entrys in 'to_find'. flow_key_entrys that are matched are removed + * from the 'to_find' list and placed in the 'found' list. Returns -1 if the + * iterator cannot be initialized or it encounters a flow with a key not in + * 'to_find'. Else returns the number of flows found by the iterator + * (i.e. there might still be flow keys in the 'to_find' list that were not + * encountered by the iterator. it is up to the caller to determine if that is + * acceptable behavior) + */ + +static int +check_iteration(struct sw_table *swt, struct list_head *to_find, struct list_head *found) +{ + struct swt_iterator iter; + struct flow_key_entry *entry; + int n_found = 0; + + rcu_read_lock(); + if (!swt->iterator(swt, &iter)) { + rcu_read_unlock(); + unit_fail("Could not initialize iterator"); + return -1; + } + + while (iter.flow != NULL) { + entry = find_flow(to_find, iter.flow); + if (entry == NULL) { + unit_fail("UNKNOWN ITERATOR FLOW %p", + iter.flow); + swt->iterator_destroy(&iter); + rcu_read_unlock(); + return -1; + } + n_found++; + list_del(&entry->node); + list_add(&entry->node, found); + swt->iterator_next(&iter); + } + + swt->iterator_destroy(&iter); + rcu_read_unlock(); + + return n_found; +} + +/* + * Deletes from table 'swt' keys from the list of flow_key_entrys 'keys'. + * Removes flow_key_entrys of deleted flows from 'keys' and places them in the + * 'deleted' list. If 'del_all' == 1, all flows in 'keys' will be deleted, + * else only every third key will be deleted. Returns the number flows deleted + * from the table. + */ + +static int +delete_flows(struct sw_table *swt, struct list_head *keys, + struct list_head *deleted, uint8_t del_all) +{ + struct flow_key_entry *pos, *next; + int i, n_del, total_del; + + total_del = 0; + i = 0; + + list_for_each_entry_safe (pos, next, keys, node) { + if (del_all == 1 || i % 3 == 0) { + n_del = swt->delete(swt, &pos->key, 0); + if (n_del > 1) { + unit_fail("%d flows deleted for one entry", n_del); + unit_fail("\tfuture 'errors' could just be product duplicate flow_key_entries"); + unit_fail("THIS IS VERY UNLIKELY...SHOULDN'T HAPPEN OFTEN"); + } + total_del += n_del; + list_del(&pos->node); + list_add(&pos->node, deleted); + } + i++; + } + + return total_del; +} + +/* + * Checks that both iteration and lookups are consistent with the caller's view + * of the table. In particular, checks that all keys in flow_key_entry list + * 'deleted' do not show up in lookup or iteration, and keys in flow_key_entry + * list 'added' do show up. 'tmp' should be an empty list that can be used for + * iteration. References to list_head pointers are needed for 'added' and 'tmp' + * because iteration will cause the list_heads to change. Function thus + * switches 'added' to point to the list of added keys after the iteration. + */ + +static int +check_lookup_and_iter(struct sw_table *swt, struct list_head *deleted, + struct list_head **added, struct list_head **tmp) +{ + struct list_head *tmp2; + int ret; + + if (check_no_lookup(swt, deleted) < 0) { + unit_fail("Uninserted flows returning lookup"); + return -1; + } + + if (check_lookup(swt, *added) < 0) { + unit_fail("Inserted flows not returning lookup"); + return -1; + } + + ret = check_iteration(swt, *added, *tmp); + + tmp2 = *added; + *added = *tmp; + *tmp = tmp2; + + if ((*tmp)->next != *tmp) { + unit_fail("WARNING: not all flows in 'added' found by iterator"); + unit_fail("\tcould be a product of duplicate flow_key_entrys, though should be VERY rare."); + /* To avoid reoccurence */ + (*tmp)->next = (*tmp)->prev = *tmp; + } + + return ret; +} + +/* + * Verifies iteration and lookup after inserting 'n_flows', then after deleting + * some flows, and once again after deleting all flows in table 'swt'. + */ + +static int +iterator_test(struct sw_table *swt, int n_flows, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp; + int ret, n_del, success; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + success = -1; + + allocated = allocate_random_keys(n_flows, wildcards); + if(allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + ret = insert_flows(swt, deleted, added, -1); + if (ret < 0) + goto iterator_test_destr; + + n_flows = ret; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after insertion"); + goto iterator_test_destr; + } else if (ret != n_flows) { + unit_fail("Iterator only found %d of %d flows", + ret, n_flows); + goto iterator_test_destr; + } + + n_del = delete_flows(swt, added, deleted, 0); + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after some deletion"); + goto iterator_test_destr; + } else if (ret + n_del != n_flows) { + unit_fail("iterator after deletion inconsistent"); + unit_fail("\tn_del = %d, n_found = %d, n_flows = %d", + n_del, ret, n_flows); + goto iterator_test_destr; + } + + n_flows -= n_del; + + n_del = delete_flows(swt, added, deleted, 1); + if (n_del != n_flows) { + unit_fail("Not all flows deleted - only %d of %d", + n_del, n_flows); + goto iterator_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after all deletion"); + goto iterator_test_destr; + } else if (ret != 0) { + unit_fail("Empty table iterator failed. %d flows found", + ret); + goto iterator_test_destr; + } + + success = 0; + +iterator_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + + +/* + * Checks lookup and iteration consistency after adding one flow, adding the + * flow again, and then deleting the flow from table 'swt'. + */ + +static int +add_test(struct sw_table *swt, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp, *tmp2; + int ret, success = -1; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(2, wildcards); + if (allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup before table modification"); + goto add_test_destr; + } else if (ret != 0) { + unit_fail("Iterator on empty table found %d flows", + ret); + goto add_test_destr; + } + + if (insert_flows(swt, deleted, added, 1) != 1) { + unit_fail("Cannot add one flow to table"); + goto add_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after single add"); + goto add_test_destr; + } else if (ret != 1) { + unit_fail("Iterator on single add found %d flows", + ret); + goto add_test_destr; + } + + /* Re-adding flow */ + if (insert_flows(swt, added, tmp, 1) != 1) { + unit_fail("Cannot insert same flow twice"); + goto add_test_destr; + } + + tmp2 = added; + added = tmp; + tmp = tmp2; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after double add"); + goto add_test_destr; + } else if (ret != 1) { + unit_fail("Iterator on double add found %d flows", + ret); + goto add_test_destr; + } + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 1) { + unit_fail("Unexpected %d flows deleted", ret); + goto add_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after delete."); + goto add_test_destr; + } else if (ret != 0) { + unit_fail("unexpected %d flows found delete", ret); + goto add_test_destr; + } + + success = 0; + +add_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + +/* + * Checks lookup and iteration consistency after each deleting a non-existent + * flow, adding and then deleting a flow, adding the flow again, and then + * deleting the flow twice in table 'swt'. + */ + +static int +delete_test(struct sw_table *swt, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp, *tmp2; + int i, ret, success = -1; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(2, wildcards); + if (allocated == NULL) + return success; + + /* Not really added...*/ + + added = &allocated->node; + deleted = &h1.node; + tmp = &h2.node; + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 0) { + unit_fail("Deleting non-existent keys from table returned unexpected value %d", + ret); + goto delete_test_destr; + } + + for (i = 0; i < 3; i++) { + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + if (i == 0) + unit_fail("Loop %d. Bad lookup before modification.", i); + else + unit_fail("Loop %d. Bad lookup after delete.", i); + goto delete_test_destr; + } else if (ret != 0) { + if(i == 0) + unit_fail("Loop %d. Unexpected %d flows found before modification", + i, ret); + else + unit_fail("Loop %d. Unexpected %d flows found after delete", + i, ret); + goto delete_test_destr; + } + + if(i == 2) + break; + + if (insert_flows(swt, deleted, added, 1) != 1) { + unit_fail("loop %d: cannot add flow to table", i); + goto delete_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("loop %d: bad lookup after single add.", i); + goto delete_test_destr; + } else if (ret != 1) { + unit_fail("loop %d: unexpected %d flows found after single add", + i, ret); + goto delete_test_destr; + } + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 1) { + unit_fail("loop %d: deleting inserted key from table returned unexpected value %d", + i, ret); + goto delete_test_destr; + } + } + + + ret = delete_flows(swt, deleted, tmp, 1); + + tmp2 = deleted; + deleted = tmp2; + tmp = tmp2; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after double delete."); + goto delete_test_destr; + } else if (ret != 0) { + unit_fail("Unexpected %d flows found after double delete", ret); + goto delete_test_destr; + } + + success = 0; + +delete_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + +/* + * Randomly adds and deletes from a set of size 'n_flows', looping for 'i' + * iterations. + */ + +static int +complex_add_delete_test(struct sw_table *swt, int n_flows, int i, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp; + int cnt, ret, n_added, n_deleted, success = -1; + uint8_t del_all; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(n_flows, wildcards); + if (allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + n_deleted = n_flows; + n_added = 0; + + for (;i > 0; i--) { + if (n_deleted != 0 && random32() % 2 == 0) { + cnt = random32() % n_deleted; + cnt = insert_flows(swt, deleted, added, cnt); + if (cnt < 0) + goto complex_test_destr; + n_deleted -= cnt; + n_added += cnt; + } else { + if (random32() % 7 == 0) + del_all = 1; + else + del_all = 0; + cnt = delete_flows(swt, added, deleted, del_all); + n_deleted += cnt; + n_added -= cnt; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup on iteration %d.", i); + goto complex_test_destr; + } + } + + delete_flows(swt, added, deleted, 1); + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup on end deletion."); + goto complex_test_destr; + } else if (ret != 0) { + unit_fail("Unexpected %d flows found on end deletion", ret); + goto complex_test_destr; + } + + success = 0; + +complex_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; + +} + +void run_table_t(void) +{ + int mac_buckets, mac_max, linear_max, hash_buckets, hash2_buckets1; + int hash2_buckets2, num_flows, num_iterations; + int i; + + struct sw_table *swt; + + /* Most basic operations. */ + simple_insert_delete(table_mac_create(2048, 65536), + OFPFW_ALL & ~OFPFW_DL_SRC); + simple_insert_delete(table_linear_create(2048), 0); + simple_insert_delete(table_hash_create(0x04C11DB7, 2048), 0); + simple_insert_delete(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 0); + + /* MAC table operations. */ + multiple_insert_destroy(table_mac_create(2048, 65536), 1024, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), 2048, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), 65535, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), + 131072, OFPFW_ALL & ~OFPFW_DL_SRC, 65536, 65536); + + /* Linear table operations. */ + multiple_insert_destroy(table_linear_create(2048), 1024, 0, 0, 0); + multiple_insert_destroy(table_linear_create(2048), 2048, 0, 0, 0); + multiple_insert_destroy(table_linear_create(2048), 8192, 0, + 8192 - 2048, 8192 - 2048); + + /* Hash table operations. */ + multiple_insert_destroy(table_hash_create(0x04C11DB7, 2048), 1024, 0, + 100, 300); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 2048), 2048, 0, + 500, 1000); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 1 << 20), 8192, 0, + 0, 50); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 1 << 20), 65536, 0, + 1500, 3000); + + /* Hash table 2, two hash functions. */ + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 1024, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 2048, 0, 50, 200); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x1EDC6F41, 1<<20), 8192, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x1EDC6F41, 1<<20), 65536, 0, 0, 20); + + /* Hash table 2, one hash function. */ + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x04C11DB7, 2048), 1024, 0, 0, 50); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x04C11DB7, 2048), 2048, 0, 100, 300); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 8192, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 65536, 0, 0, 100); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 1<<16, 0, 0, 100); + + mac_buckets = 1024; + mac_max = 2048; + linear_max = 2048; + hash_buckets = 2048; + hash2_buckets1 = 1024; + hash2_buckets2 = 1024; + + num_flows = 2300; + num_iterations = 100; + + printk("\nTesting on each table type:\n"); + printk(" iteration_test on 0 flows\n"); + printk(" iteration_test on %d flows\n", num_flows); + printk(" add_test\n"); + printk(" delete_test\n"); + printk(" complex_add_delete_test with %d flows and %d iterations\n\n", + num_flows, num_iterations); + + for (i = 0; i < 4; i++) { + unsigned int mask = i == 0 ? : 0; + + if (unit_failed()) + return; + + mask = 0; + switch (i) { + case 0: + swt = table_mac_create(mac_buckets, mac_max); + mask = OFPFW_ALL & ~OFPFW_DL_SRC; + break; + case 1: + swt = table_linear_create(linear_max); + break; + case 2: + swt = table_hash_create (0x04C11DB7, hash_buckets); + break; + case 3: + swt = table_hash2_create(0x04C11DB7, hash2_buckets1, + 0x1EDC6F41, hash2_buckets2); + break; + default: + BUG(); + return; + } + + if (swt == NULL) { + unit_fail("failed to allocate table %d", i); + return; + } + printk("Testing %s table with %d buckets and %d max flows...\n", + table_name(swt), mac_buckets, mac_max); + iterator_test(swt, 0, mask); + iterator_test(swt, num_flows, mask); + add_test(swt, mask); + delete_test(swt, mask); + complex_add_delete_test(swt, num_flows, num_iterations, mask); + swt->destroy(swt); + } +} + diff --git a/datapath/tests/.gitignore b/datapath/tests/.gitignore new file mode 100644 index 00000000..35e75b7a --- /dev/null +++ b/datapath/tests/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/forward_t.h +/fwdhgen diff --git a/datapath/tests/Makefile.am b/datapath/tests/Makefile.am new file mode 100644 index 00000000..fc5fd8f8 --- /dev/null +++ b/datapath/tests/Makefile.am @@ -0,0 +1,7 @@ +noinst_HEADERS = forward_t.h + +forward_t.h: gen_forward_t.pl example.pcap + perl $(srcdir)/gen_forward_t.pl $(srcdir)/example.pcap > forward_t.h.tmp + mv forward_t.h.tmp forward_t.h + +EXTRA_DIST = gen_forward_t.pl example.pcap diff --git a/datapath/tests/example.pcap b/datapath/tests/example.pcap new file mode 100644 index 0000000000000000000000000000000000000000..7bda45893ee5616ff72e8046f28cdab28835a275 GIT binary patch literal 82 zcmca|c+)~A1{MYwNB}Yzfw-k2!2Jj>7lRg%4Z{DyfMLNIjg?b4*ccc&7}!{V3c>sX OD;R} output.h\n"; + print "where input.pcap is a packet capture in pcap format\n"; + print "and output.c is a C header file containing the packets\n"; + exit(1); +} +my ($in_file_name) = $ARGV[0]; +open(INPUT, '<', $in_file_name) or die "$in_file_name: open: $!\n"; + +my ($file_header); +if (read(INPUT, $file_header, 24) != 24) { + die "$in_file_name: could not read pcap header\n"; +} + +my ($s, $l); +if (substr($file_header, 0, 4) eq pack('V', 0xa1b2c3d4)) { + ($s, $l) = ('v', 'V'); +} elsif (substr($file_header, 0, 4) eq pack('N', 0xa1b2c3d4)) { + ($s, $l) = ('n', 'N'); +} else { + die "$in_file_name: not a pcap file\n"; +} + +print <<'EOF'; +#ifndef DP_TEST_PACKETS_H +#define DP_TEST_PACKETS_H 1 + +struct pkt { + unsigned char *data; + unsigned int len; +}; +EOF + +my ($n_packets) = 0; +for (;;) { + my ($pkt_hdr) = must_read(16); + last if $pkt_hdr eq ''; + + my ($ts_sec, $ts_usec, $incl_len, $orig_len) = unpack("${l}4", $pkt_hdr); + print STDERR "warning: captured less than len %u\n" + if $incl_len < $orig_len; + + my ($pkt) = must_read($incl_len); + die "$in_file_name: unexpected end of file\n" if !$pkt; + + print "\nstatic unsigned char p${n_packets}[] = {"; + my ($line_bytes) = 0; + for my $c (map(ord($_), split(//, $pkt))) { + if ($line_bytes++ % 13 == 0) { + print "\n"; + } + printf " 0x%02x,", $c; + } + print "\n};\n"; + $n_packets++; +} + +print "\nstatic int num_packets = $n_packets;\n"; +print "\nstatic struct pkt packets[] = {\n"; +for my $i (0..$n_packets - 1) { + print " { p$i, sizeof p$i },\n"; +} +print "};\n"; + +print "\n#endif\n"; + +sub must_read { + my ($rq_bytes) = @_; + my ($data); + my ($nbytes) = read(INPUT, $data, $rq_bytes); + die "$in_file_name: read: $!\n" if !defined $nbytes; + die "$in_file_name: unexpected end of file\n" + if $nbytes && $nbytes != $rq_bytes; + return $data; +} diff --git a/datapath/tests/ofp_pcap.c b/datapath/tests/ofp_pcap.c new file mode 100644 index 00000000..e1b0d226 --- /dev/null +++ b/datapath/tests/ofp_pcap.c @@ -0,0 +1,97 @@ +/* A cheap knock-off of the pcap library to remove that dependency. */ + +#include +#include +#include +#include "ofp_pcap.h" + +int +ofp_pcap_open(struct ofp_pcap *p, const char *fname, char *errbuf) +{ + FILE *fp; + struct pcap_file_header hdr; + size_t amt_read; + + fp = fopen(fname, "r"); + + memset((char *)p, 0, sizeof(*p)); + + amt_read = fread((char *)&hdr, 1, sizeof(hdr), fp); + if (amt_read != sizeof(hdr)) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + goto error; + } + + if (hdr.magic != TCPDUMP_MAGIC) { + hdr.magic = SWAPLONG(hdr.magic); + hdr.version_major = SWAPSHORT(hdr.version_major); + hdr.version_minor = SWAPSHORT(hdr.version_minor); + hdr.thiszone = SWAPLONG(hdr.thiszone); + hdr.sigfigs = SWAPLONG(hdr.sigfigs); + hdr.snaplen = SWAPLONG(hdr.snaplen); + hdr.linktype = SWAPLONG(hdr.linktype); + + p->swapped = 1; + } + + p->fp = fp; + p->errbuf = errbuf; + p->bufsize = hdr.snaplen+sizeof(struct pcap_pkthdr); + p->buf = malloc(p->bufsize); + if (!p->buf) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "error allocating buffer"); + goto error; + } + + if (hdr.version_major < OFP_PCAP_VERSION_MAJOR) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "archaic file format"); + goto error; + } + + return 0; + +error: + if (p->buf) + free(p->buf); + return 1; +} + +char * +ofp_pcap_next(struct ofp_pcap *p, struct pcap_pkthdr *hdr) +{ + size_t amt_read; + + amt_read = fread(hdr, 1, sizeof(*hdr), p->fp); + if (amt_read != sizeof(*hdr)) { + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + if (p->swapped) { + hdr->caplen = SWAPLONG(hdr->caplen); + hdr->len = SWAPLONG(hdr->len); + hdr->ts.tv_sec = SWAPLONG(hdr->ts.tv_sec); + hdr->ts.tv_usec = SWAPLONG(hdr->ts.tv_usec); + } + + if (hdr->caplen > p->bufsize) { + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + amt_read = fread((char *)p->buf, 1, hdr->caplen, p->fp); + if (amt_read != hdr->caplen){ + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + return p->buf; +} + +void +ofp_pcap_close(struct ofp_pcap *p) +{ + fclose(p->fp); + free(p->buf); +} + diff --git a/datapath/tests/ofp_pcap.h b/datapath/tests/ofp_pcap.h new file mode 100644 index 00000000..6bd2dcb3 --- /dev/null +++ b/datapath/tests/ofp_pcap.h @@ -0,0 +1,64 @@ +#ifndef OFP_PCAP_H +#define OFP_PCAP_H + +#include +#include +#include + +#define OFP_PCAP_VERSION_MAJOR 2 +#define OFP_PCAP_VERSION_MINOR 4 + +#define TCPDUMP_MAGIC 0xa1b2c3d4 + +#define OFP_LINKTYPE_ETHERNET 1 + +#define OFP_PCAP_ERRBUF_SIZE 256 + +/* Swap the byte order regardless of the architecture */ +#define SWAPLONG(x) \ + ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) +#define SWAPSHORT(x) \ + ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) + +struct ofp_pcap { + FILE *fp; /* File pointer to currently processed file */ + int swapped; /* Indicate whether endian-ness needs to change */ + char *buf; /* Buffer to hold packet data */ + size_t bufsize; /* Size of buffer */ + char *errbuf; /* Pointer to buffer to hold error message */ +}; + +struct pcap_file_header { + uint32_t magic; /* Magic number */ + uint16_t version_major; /* Version number major */ + uint16_t version_minor; /* Version number minor */ + int32_t thiszone; /* Gmt to local correction */ + uint32_t sigfigs; /* Accuracy of timestamps */ + uint32_t snaplen; /* Max length saved portion of each pkt */ + uint32_t linktype; /* Data link type (LINKTYPE_*) */ +}; + +/* + * This is a timeval as stored in disk in a dumpfile. + * It has to use the same types everywhere, independent of the actual + * `struct timeval' + */ +struct pcap_timeval { + int32_t tv_sec; /* Seconds */ + int32_t tv_usec; /* Microseconds */ +}; + +/* + * How a `pcap_pkthdr' is actually stored in the dumpfile. + */ +struct pcap_pkthdr { + struct pcap_timeval ts; /* Time stamp */ + uint32_t caplen; /* Length of portion present */ + uint32_t len; /* Length this packet (off wire) */ +}; + +int ofp_pcap_open(struct ofp_pcap *p, const char *fname, char *errbuf); +char *ofp_pcap_next(struct ofp_pcap *p, struct pcap_pkthdr *hdr); +void ofp_pcap_close(struct ofp_pcap *p); + +#endif /* ofp_pcap.h */ diff --git a/datapath/unit-exports.c b/datapath/unit-exports.c new file mode 100644 index 00000000..275f01ad --- /dev/null +++ b/datapath/unit-exports.c @@ -0,0 +1,26 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "flow.h" +#include "crc32.h" +#include "forward.h" +#include + +EXPORT_SYMBOL(flow_alloc); +EXPORT_SYMBOL(flow_free); +EXPORT_SYMBOL(flow_cache); + +EXPORT_SYMBOL(table_mac_create); +EXPORT_SYMBOL(table_hash_create); +EXPORT_SYMBOL(table_hash2_create); +EXPORT_SYMBOL(table_linear_create); + +EXPORT_SYMBOL(crc32_init); +EXPORT_SYMBOL(crc32_calculate); + +EXPORT_SYMBOL(flow_extract); +EXPORT_SYMBOL(execute_setter); diff --git a/datapath/unit.c b/datapath/unit.c new file mode 100644 index 00000000..dfc12797 --- /dev/null +++ b/datapath/unit.c @@ -0,0 +1,100 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include + +#include "unit.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +static char run[1024]; +module_param_string(run, run, sizeof run, 0); +MODULE_PARM_DESC(run_tests, "run=\"test1,[test2,...]\"\n"); +#else +static char *run; +MODULE_PARM(run, "s"); +#endif + +static int test_failed; +static const char *test_name; + +void unit_fail_function(const char *function, const char *msg, ...) +{ + va_list args; + + printk("%s: FAIL: %s: ", test_name, function); + va_start(args, msg); + vprintk(msg, args); + va_end(args); + printk("\n"); + test_failed = 1; +} + +int unit_failed(void) +{ + return test_failed; +} + +static int run_test(const char *name, size_t len) +{ + static const struct test { + const char *name; + void (*func)(void); + } tests[] = { +#define UNIT_TEST(NAME) {#NAME, run_##NAME}, + UNIT_TESTS +#undef UNIT_TEST + }; + + const struct test *p; + + for (p = tests; p < &tests[ARRAY_SIZE(tests)]; p++) + if (len == strlen(p->name) + && !memcmp(name, p->name, len)) { + test_name = p->name; + test_failed = 0; + p->func(); + printk("%s: %s\n", test_name, + test_failed ? "FAIL" : "PASS"); + return !test_failed; + } + printk("unknown unit test %.*s\n", (int) len, name); + return 0; +} + +int unit_init(void) +{ + int n_pass = 0, n_fail = 0; + char *p = run ?: ""; + for (;;) { + static const char white_space[] = " \t\r\n\v,"; + int len; + + p += strspn(p, white_space); + if (!*p) + break; + + len = strcspn(p, white_space); + if (run_test(p, len)) + n_pass++; + else + n_fail++; + p += len; + } + + if (n_pass + n_fail == 0) + printk("no tests specified (use run=\"test1 [test2...]\")\n"); + else + printk("%d tests passed, %d failed\n", n_pass, n_fail); + + return -ENODEV; +} + +module_init(unit_init); +MODULE_LICENSE("GPL"); diff --git a/datapath/unit.h b/datapath/unit.h new file mode 100644 index 00000000..6d180a8b --- /dev/null +++ b/datapath/unit.h @@ -0,0 +1,21 @@ +#ifndef UNIT_H +#define UNIT_H 1 + +/* List of unit tests. */ +#define UNIT_TESTS \ + UNIT_TEST(table_t) \ + UNIT_TEST(crc_t) \ + UNIT_TEST(forward_t) + +/* Prototype a function run_ for each of the unit tests. */ +#define UNIT_TEST(NAME) void run_##NAME(void); +UNIT_TESTS +#undef UNIT_TEST + +void unit_fail_function(const char *function, const char *msg, ...) + __attribute__((format(printf, 2, 3))); +#define unit_fail(...) unit_fail_function(__func__, __VA_ARGS__) + +int unit_failed(void); + +#endif /* unit.h */ diff --git a/include/.gitignore b/include/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/include/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 00000000..0406acb1 --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,25 @@ +noinst_HEADERS = \ + buffer.h \ + command-line.h \ + compiler.h \ + dynamic-string.h \ + dpif.h \ + fatal-signal.h \ + fault.h \ + flow.h \ + hash.h \ + ip.h \ + list.h \ + mac.h \ + Makefile.am \ + netlink.h \ + ofp-print.h \ + openflow.h \ + openflow-netlink.h \ + packets.h \ + socket-util.h \ + util.h \ + vconn.h \ + vlog-socket.h \ + vlog.h \ + xtoxll.h diff --git a/include/buffer.h b/include/buffer.h new file mode 100644 index 00000000..98750244 --- /dev/null +++ b/include/buffer.h @@ -0,0 +1,63 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BUFFER_H +#define BUFFER_H 1 + +#include + +/* Buffer for holding arbitrary data. A buffer is automatically reallocated as + * necessary if it grows too large for the available memory. */ +struct buffer { + void *base; /* First byte of area malloc()'d area. */ + size_t allocated; /* Number of bytes allocated. */ + + void *data; /* First byte actually in use. */ + size_t size; /* Number of bytes in use. */ + + struct buffer *next; /* Next in a list of buffers. */ +}; + +void buffer_use(struct buffer *, void *, size_t); + +void buffer_init(struct buffer *, size_t); +void buffer_uninit(struct buffer *); +void buffer_reinit(struct buffer *, size_t); + +struct buffer *buffer_new(size_t); +void buffer_delete(struct buffer *); + +void *buffer_at(const struct buffer *, size_t offset, size_t size); +void *buffer_at_assert(const struct buffer *, size_t offset, size_t size); +void *buffer_tail(const struct buffer *); +void *buffer_end(const struct buffer *); + +void *buffer_put_uninit(struct buffer *, size_t); +void buffer_put(struct buffer *, const void *, size_t); + +size_t buffer_headroom(struct buffer *); +size_t buffer_tailroom(struct buffer *); +void buffer_reserve_tailroom(struct buffer *, size_t); + +void buffer_clear(struct buffer *); +void buffer_pull(struct buffer *, size_t); + +#endif /* buffer.h */ diff --git a/include/command-line.h b/include/command-line.h new file mode 100644 index 00000000..57d3e9bd --- /dev/null +++ b/include/command-line.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef COMMAND_LINE_H +#define COMMAND_LINE_H 1 + +/* Utilities for command-line parsing. */ + +struct option; +char *long_options_to_short_options(const struct option *options); + +#endif /* command-line.h */ diff --git a/include/compiler.h b/include/compiler.h new file mode 100644 index 00000000..bfd3f167 --- /dev/null +++ b/include/compiler.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef COMPILER_H +#define COMPILER_H 1 + +#define NO_RETURN __attribute__((__noreturn__)) +#define UNUSED __attribute__((__unused__)) +#define PACKED __attribute__((__packed__)) +#define PRINTF_FORMAT(FMT, ARG1) __attribute__((__format__(printf, FMT, ARG1))) +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +#endif /* compiler.h */ diff --git a/include/dpif.h b/include/dpif.h new file mode 100644 index 00000000..795a5007 --- /dev/null +++ b/include/dpif.h @@ -0,0 +1,55 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DPIF_H +#define DPIF_H 1 + +/* Operations for the datapath running in the local kernel. The interface can + * generalize to multiple types of local datapaths, but the implementation only + * supports the openflow kernel module via netlink. */ + +#include +#include + +struct buffer; +struct ofp_match; + +/* A datapath interface. Opaque. */ +struct dpif +{ + int dp_idx; + struct nl_sock *sock; +}; + +int dpif_open(int dp_idx, bool subscribe, struct dpif *); +void dpif_close(struct dpif *); +int dpif_recv_openflow(struct dpif *, struct buffer **, bool wait); +int dpif_send_openflow(struct dpif *, struct buffer *, bool wait); +int dpif_add_dp(struct dpif *); +int dpif_del_dp(struct dpif *); +int dpif_add_port(struct dpif *, const char *netdev); +int dpif_del_port(struct dpif *, const char *netdev); +int dpif_show(struct dpif *); +int dpif_dump_tables(struct dpif *); +int dpif_dump_flows(struct dpif *, int table, struct ofp_match *); +int dpif_benchmark_nl(struct dpif *, uint32_t, uint32_t); + +#endif /* dpif.h */ diff --git a/include/dynamic-string.h b/include/dynamic-string.h new file mode 100644 index 00000000..5d21c092 --- /dev/null +++ b/include/dynamic-string.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DYNAMIC_STRING_H +#define DYNAMIC_STRING_H 1 + +#include +#include +#include "compiler.h" + +struct ds { + char *string; /* Null-terminated string. */ + size_t length; /* Bytes used, not including null terminator. */ + size_t allocated; /* Bytes allocated, not including null terminator. */ +}; + +#define DS_EMPTY_INITIALIZER { NULL, 0, 0 } + +void ds_init(struct ds *); +void ds_reserve(struct ds *, size_t min_length); +void ds_put_format(struct ds *, const char *, ...) PRINTF_FORMAT(2, 3); +void ds_put_format_valist(struct ds *, const char *, va_list) + PRINTF_FORMAT(2, 0); +char *ds_cstr(struct ds *); +void ds_destroy(struct ds *); + +#endif /* dynamic-string.h */ diff --git a/include/fatal-signal.h b/include/fatal-signal.h new file mode 100644 index 00000000..7d716da4 --- /dev/null +++ b/include/fatal-signal.h @@ -0,0 +1,28 @@ +/* Utility functions for hooking process termination signals. + * + * Hooks registered with this module are called by handlers for signals that + * terminate the process normally (e.g. SIGTERM, SIGINT). They are not called + * for signals that indicate program errors (e.g. SIGFPE, SIGSEGV). They are + * useful for cleanup, such as deleting temporary files. + * + * The hooks are not called upon normal process termination via exit(). Use + * atexit() to hook normal process termination. + * + * These functions will only work properly for single-threaded processes. */ + +#ifndef FATAL_SIGNAL_H +#define FATAL_SIGNAL_H 1 + +/* Basic interface. */ +void fatal_signal_add_hook(void (*)(void *aux), void *aux); +void fatal_signal_block(void); +void fatal_signal_unblock(void); + +/* Convenience functions for unlinking files upon termination. + * + * These functions also unlink the files upon normal process termination via + * exit(). */ +void fatal_signal_add_file_to_unlink(const char *); +void fatal_signal_remove_file_to_unlink(const char *); + +#endif /* fatal-signal.h */ diff --git a/include/fault.h b/include/fault.h new file mode 100644 index 00000000..c1e8ff18 --- /dev/null +++ b/include/fault.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef FAULT_H +#define FAULT_H 1 + +void register_fault_handlers(void); +void log_backtrace(void); + +#endif /* fault.h */ diff --git a/include/flow.h b/include/flow.h new file mode 100644 index 00000000..63db5679 --- /dev/null +++ b/include/flow.h @@ -0,0 +1,33 @@ +#ifndef FLOW_H +#define FLOW_H 1 + +#include +#include "util.h" + +struct buffer; + +/* Identification data for a flow. + All fields are in network byte order. + In decreasing order by size, so that flow structures can be hashed or + compared bytewise. */ +struct flow { + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port. */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t reserved; /* One byte of padding. */ +}; +BUILD_ASSERT_DECL(sizeof (struct flow) == 32); + +void flow_extract(const struct buffer *, uint16_t in_port, struct flow *); +void flow_print(FILE *, const struct flow *); +int flow_compare(const struct flow *, const struct flow *); +unsigned long int flow_hash(const struct flow *, uint32_t basis); + +#endif /* flow.h */ diff --git a/include/hash.h b/include/hash.h new file mode 100644 index 00000000..2a17c0e1 --- /dev/null +++ b/include/hash.h @@ -0,0 +1,12 @@ +#ifndef HASH_H +#define HASH_H 1 + +#include +#include + +#define HASH_FNV_BASIS UINT32_C(2166136261) +#define HASH_FNV_PRIME UINT32_C(16777619) + +uint32_t hash_fnv(const void *, size_t, uint32_t basis); + +#endif /* hash.h */ diff --git a/include/ip.h b/include/ip.h new file mode 100644 index 00000000..2fa8aa98 --- /dev/null +++ b/include/ip.h @@ -0,0 +1,11 @@ +#ifndef IP_H +#define IP_H 1 + +#define IP_FMT "%"PRIu8".%"PRIu8".%"PRIu8".%"PRIu8 +#define IP_ARGS(ip) \ + ((uint8_t *) ip)[0], \ + ((uint8_t *) ip)[1], \ + ((uint8_t *) ip)[2], \ + ((uint8_t *) ip)[3] + +#endif /* ip.h */ diff --git a/include/list.h b/include/list.h new file mode 100644 index 00000000..6bf934bd --- /dev/null +++ b/include/list.h @@ -0,0 +1,53 @@ +#ifndef LIST_H +#define LIST_H 1 + +/* Doubly linked list. */ + +#include +#include +#include "util.h" + +/* Doubly linked list head or element. */ +struct list + { + struct list *prev; /* Previous list element. */ + struct list *next; /* Next list element. */ + }; + +#define LIST_INITIALIZER(LIST) { LIST, LIST } + +void list_init(struct list *); + +/* List insertion. */ +void list_insert(struct list *, struct list *); +void list_splice(struct list *before, struct list *first, struct list *last); +void list_push_front(struct list *, struct list *); +void list_push_back(struct list *, struct list *); + +/* List removal. */ +struct list *list_remove(struct list *); +struct list *list_pop_front(struct list *); +struct list *list_pop_back(struct list *); + +/* List elements. */ +struct list *list_front(struct list *); +struct list *list_back(struct list *); + +/* List properties. */ +size_t list_size(const struct list *); +bool list_is_empty(const struct list *); + +#define LIST_ELEM__(ELEM, STRUCT, MEMBER, LIST) \ + (ELEM != LIST ? CONTAINER_OF(ELEM, STRUCT, MEMBER) : NULL) +#define LIST_FOR_EACH(ITER, STRUCT, MEMBER, LIST) \ + for (ITER = LIST_ELEM__((LIST)->next, STRUCT, MEMBER, LIST); \ + ITER != NULL; \ + ITER = LIST_ELEM__((ITER)->MEMBER.next, STRUCT, MEMBER, LIST)) +#define LIST_FOR_EACH_SAFE(ITER, NEXT, STRUCT, MEMBER, LIST) \ + for (ITER = LIST_ELEM__((LIST)->next, STRUCT, MEMBER, LIST); \ + (ITER != NULL \ + ? (NEXT = LIST_ELEM__((ITER)->MEMBER.next, STRUCT, MEMBER, LIST), 1) \ + : 0), \ + ITER = NEXT) + +#endif /* list.h */ diff --git a/include/mac.h b/include/mac.h new file mode 100644 index 00000000..a8516df7 --- /dev/null +++ b/include/mac.h @@ -0,0 +1,41 @@ +#ifndef MAC_H +#define MAC_H 1 + +#include +#include +#include +#include "packets.h" + +static inline bool mac_is_multicast(const uint8_t mac[ETH_ADDR_LEN]) +{ + return mac[0] & 0x80; +} + +static inline bool mac_is_private(const uint8_t mac[ETH_ADDR_LEN]) +{ + return mac[0] & 0x40; +} + +static inline bool mac_is_broadcast(const uint8_t mac[ETH_ADDR_LEN]) +{ + return (mac[0] & mac[1] & mac[2] & mac[3] & mac[4] & mac[5]) == 0xff; +} + +static inline bool mac_is_zero(const uint8_t mac[ETH_ADDR_LEN]) +{ + return (mac[0] | mac[1] | mac[2] | mac[3] | mac[4] | mac[5]) == 0; +} + +static inline bool mac_equals(const uint8_t a[ETH_ADDR_LEN], + const uint8_t b[ETH_ADDR_LEN]) +{ + return !memcmp(a, b, ETH_ADDR_LEN); +} + +#define MAC_FMT \ + "%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8 +#define MAC_ARGS(mac) \ + (mac)[0], (mac)[1], (mac)[2], (mac)[3], (mac)[4], (mac)[5] + + +#endif /* mac.h */ diff --git a/include/netlink.h b/include/netlink.h new file mode 100644 index 00000000..e5e66c28 --- /dev/null +++ b/include/netlink.h @@ -0,0 +1,148 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NETLINK_H +#define NETLINK_H 1 + +/* Netlink interface. + * + * Netlink is a datagram-based network protocol primarily for communication + * between user processes and the kernel, and mainly on Linux. Netlink is + * specified in RFC 3549, "Linux Netlink as an IP Services Protocol". + * + * Netlink is not suitable for use in physical networks of heterogeneous + * machines because host byte order is used throughout. */ + +#include +#include +#include +#include +#include +#include + +#ifndef NLA_ALIGNTO +struct nlattr +{ + __u16 nla_len; + __u16 nla_type; +}; + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +#endif + +struct buffer; +struct nl_sock; + +/* Netlink sockets. */ + +int nl_sock_create(int protocol, int multicast_group, + size_t so_sndbuf, size_t so_rcvbuf, + struct nl_sock **); +void nl_sock_destroy(struct nl_sock *); + +int nl_sock_send(struct nl_sock *, const struct buffer *, bool wait); +int nl_sock_sendv(struct nl_sock *sock, const struct iovec iov[], size_t n_iov, + bool wait); +int nl_sock_recv(struct nl_sock *, struct buffer **, bool wait); +int nl_sock_transact(struct nl_sock *, const struct buffer *request, + struct buffer **reply); + +int nl_sock_fd(const struct nl_sock *); + +/* Netlink messages. */ + +/* Accessing headers and data. */ +struct nlmsghdr *nl_msg_nlmsghdr(const struct buffer *); +struct genlmsghdr *nl_msg_genlmsghdr(const struct buffer *); +bool nl_msg_nlmsgerr(const struct buffer *, int *error); +void nl_msg_reserve(struct buffer *, size_t); + +/* Appending headers and raw data. */ +void nl_msg_put_nlmsghdr(struct buffer *, struct nl_sock *, + size_t expected_payload, + uint32_t type, uint32_t flags); +void nl_msg_put_genlmsghdr(struct buffer *, struct nl_sock *, + size_t expected_payload, int family, uint32_t flags, + uint8_t cmd, uint8_t version); +void nl_msg_put(struct buffer *, const void *, size_t); +void *nl_msg_put_uninit(struct buffer *, size_t); + +/* Appending attributes. */ +void *nl_msg_put_unspec_uninit(struct buffer *, uint16_t type, size_t); +void nl_msg_put_unspec(struct buffer *, uint16_t type, const void *, size_t); +void nl_msg_put_flag(struct buffer *, uint16_t type); +void nl_msg_put_u8(struct buffer *, uint16_t type, uint8_t value); +void nl_msg_put_u16(struct buffer *, uint16_t type, uint16_t value); +void nl_msg_put_u32(struct buffer *, uint16_t type, uint32_t value); +void nl_msg_put_u64(struct buffer *, uint16_t type, uint64_t value); +void nl_msg_put_string(struct buffer *, uint16_t type, const char *value); +void nl_msg_put_nested(struct buffer *, uint16_t type, struct buffer *); + +/* Netlink attribute types. */ +enum nl_attr_type +{ + NL_A_NO_ATTR = 0, + NL_A_UNSPEC, + NL_A_U8, + NL_A_U16, + NL_A_U32, + NL_A_U64, + NL_A_STRING, + NL_A_FLAG, + NL_A_NESTED, + N_NL_ATTR_TYPES +}; + +/* Netlink attribute parsing. */ +const void* nl_attr_get(const struct nlattr *); +size_t nl_attr_get_size(const struct nlattr *); +const void* nl_attr_get_unspec(const struct nlattr *, size_t size); +bool nl_attr_get_flag(const struct nlattr *); +uint8_t nl_attr_get_u8(const struct nlattr *); +uint16_t nl_attr_get_u16(const struct nlattr *); +uint32_t nl_attr_get_u32(const struct nlattr *); +uint64_t nl_attr_get_u64(const struct nlattr *); +const char *nl_attr_get_string(const struct nlattr *); + +/* Netlink attribute policy. + * + * Specifies how to parse a single attribute from a Netlink message payload. + * + * See Nl_policy for example. + */ +struct nl_policy +{ + enum nl_attr_type type; + size_t min_len, max_len; + bool optional; +}; + +bool nl_policy_parse(const struct buffer *, const struct nl_policy[], + struct nlattr *[], size_t n_attrs); + +/* Miscellaneous. */ + +int nl_lookup_genl_family(const char *name, int *number); + +#endif /* netlink.h */ diff --git a/include/ofp-print.h b/include/ofp-print.h new file mode 100644 index 00000000..ad383a43 --- /dev/null +++ b/include/ofp-print.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* OpenFlow protocol pretty-printer. */ + +#ifndef __OFP_PRINT_H__ +#define __OFP_ORINT_H __1 + +#include + +struct ofp_flow_mod; +struct ofp_table; + +#ifdef __cplusplus +extern "C" { +#endif + +void ofp_print(FILE *, const void *, size_t, int verbosity); +void ofp_print_table(FILE *stream, const struct ofp_table* ot); +void ofp_print_flow_mod(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_flow_expired(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_data_hello(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_packet(FILE *stream, const void *data, size_t len, size_t total_len); +void ofp_print_port_status(FILE *stream, const void *oh, size_t len, int verbosity); + +#ifdef __cplusplus +} +#endif + +#endif /* ofppp.h */ diff --git a/include/openflow-netlink.h b/include/openflow-netlink.h new file mode 100644 index 00000000..31bd71eb --- /dev/null +++ b/include/openflow-netlink.h @@ -0,0 +1,83 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef OPENFLOW_NETLINK_H +#define OPENFLOW_NETLINK_H 1 + +#include + +#define DP_GENL_FAMILY_NAME "OpenFlow" + +/* Attributes that can be attached to the datapath's netlink messages. */ +enum { + DP_GENL_A_UNSPEC, + DP_GENL_A_OFPHEADER, /* OFP header information */ + DP_GENL_A_DP_IDX, /* Datapath Ethernet device name. */ + DP_GENL_A_PORTNAME, /* Device name for datapath port. */ + DP_GENL_A_MC_GROUP, /* Generic netlink multicast group. */ + DP_GENL_A_OPENFLOW, /* OpenFlow packet. */ + + DP_GENL_A_DP_INFO, /* OpenFlow datapath information */ + + DP_GENL_A_FLOW, /* OpenFlow flow entry */ + DP_GENL_A_NUMFLOWS, /* Number of flows */ + DP_GENL_A_TABLEIDX, /* Flow table index */ + + DP_GENL_A_TABLE, /* OpenFlow table entry */ + DP_GENL_A_NUMTABLES, /* Number of tables in a table query */ + + DP_GENL_A_NPACKETS, /* Number of packets to send up netlink */ + DP_GENL_A_PSIZE, /* Size of packets to send up netlink */ + + __DP_GENL_A_MAX, + DP_GENL_A_MAX = __DP_GENL_A_MAX - 1 +}; + +/* Commands that can be executed on the datapath's netlink interface. */ +enum dp_genl_command { + DP_GENL_C_UNSPEC, + DP_GENL_C_ADD_DP, /* Create datapath. */ + DP_GENL_C_DEL_DP, /* Destroy datapath. */ + DP_GENL_C_QUERY_DP, /* Get multicast group for datapath. */ + DP_GENL_C_SHOW_DP, /* Show information about datapath. */ + DP_GENL_C_ADD_PORT, /* Add port to datapath. */ + DP_GENL_C_DEL_PORT, /* Remove port from datapath. */ + DP_GENL_C_OPENFLOW, /* Encapsulated OpenFlow protocol. */ + + DP_GENL_C_QUERY_FLOW, /* Request flow entries. */ + DP_GENL_C_QUERY_TABLE, /* Request table entries. */ + + DP_GENL_C_BENCHMARK_NL, /* Benchmark netlink connection */ + + __DP_GENL_C_MAX, + DP_GENL_C_MAX = __DP_GENL_C_MAX - 1 +}; + +/* Table */ +enum { + TBL_MACONLY, + TBL_HASH, + TBL_LINEAR, + __TBL_MAX, + TBL_MAX = __TBL_MAX - 1 +}; + +#endif /* openflow_netlink_h */ diff --git a/include/openflow.h b/include/openflow.h new file mode 100644 index 00000000..5f76e931 --- /dev/null +++ b/include/openflow.h @@ -0,0 +1,388 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* OpenFlow: protocol between controller and datapath. */ + +#ifndef OPENFLOW_H +#define OPENFLOW_H 1 + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* Maximum length of a OpenFlow packet. */ +#define OFP_MAXLEN (sizeof(struct ofp_data_hello) \ + + (sizeof(struct ofp_phy_port) * OFPP_MAX) + 200) + +#define OFP_VERSION 1 +#define OFP_MAX_TABLE_NAME_LEN 32 +#define OFP_MAX_PORT_NAME_LEN 16 + +#define OFP_TCP_PORT 975 +#define OFP_SSL_PORT 976 + +#define OFP_ETH_ALEN 6 /* Bytes in an Ethernet address. */ + +/* Port numbering. Physical ports are numbered starting from 0. */ +enum ofp_port { + /* Maximum number of physical switch ports. */ + OFPP_MAX = 0x100, + + /* Fake output "ports". */ + OFPP_NORMAL = 0xfffa, /* Process with normal L2/L3 switching */ + OFPP_FLOOD = 0xfffb, /* All physical ports except input port and + those disabled by STP. */ + OFPP_ALL = 0xfffc, /* All physical ports except input port. */ + OFPP_CONTROLLER = 0xfffd, /* Send to controller. */ + OFPP_LOCAL = 0xfffe, /* Local openflow "port". */ /* xxx Want?! */ + OFPP_NONE = 0xffff /* Not associated with a physical port. */ +}; + +enum ofp_type { + OFPT_CONTROL_HELLO, /* 0 Controller/switch message */ + OFPT_DATA_HELLO, /* 1 Controller/switch message */ + OFPT_PACKET_IN, /* 2 Async message */ + OFPT_PACKET_OUT, /* 3 Controller/switch message */ + OFPT_FLOW_MOD, /* 4 Controller/switch message */ + OFPT_FLOW_EXPIRED, /* 5 Async message */ + OFPT_TABLE, /* 6 Controller/switch message */ + OFPT_PORT_MOD, /* 7 Controller/switch message */ + OFPT_PORT_STATUS, /* 8 Async message */ + OFPT_FLOW_STAT_REQUEST, /* 9 Controller/switch message */ + OFPT_FLOW_STAT_REPLY, /* 10 Controller/switch message */ + OFPT_TABLE_STAT_REQUEST, /* 11 Controller/switch message */ + OFPT_TABLE_STAT_REPLY, /* 12 Controller/switch message */ + OFPT_PORT_STAT_REQUEST, /* 13 Controller/switch message */ + OFPT_PORT_STAT_REPLY /* 14 Controller/switch message */ +}; + +/* Header on all OpenFlow packets. */ +struct ofp_header { + uint8_t version; /* Always 1. */ + uint8_t type; /* One of the OFPT_ constants. */ + uint16_t length; /* Length including this ofp_header. */ + uint32_t xid; /* Transactin id associated with this packet. + Replies use the same id as was in the request + to facilitate pairing. */ +}; + +#define OFP_DEFAULT_MISS_SEND_LEN 128 +#define OFP_MISS_SEND_LEN_UNCHANGED 0xffff + +/* Flag to indicate that datapath should notify the controller of + * expired flow entries. + */ +#define OFP_CHELLO_SEND_FLOW_EXP 0x0001 + +/* Controller hello (controller -> datapath). */ +struct ofp_control_hello { + struct ofp_header header; + uint32_t version; /* Max supported protocol version (?) */ + uint16_t flags; + uint16_t miss_send_len; /* Max bytes of new flow that datapath should + send to the controller. A value of + OFP_MISS_SEND_LEN_UNCHANGED leaves the + currently configured value unchanged. */ +}; + +/* Capabilities supported by the datapath. */ +enum ofp_capabilities { + OFPC_FLOW_STATS = 1 << 0, /* Flow statistics. */ + OFPC_TABLE_STATS = 1 << 1, /* Table statistics. */ + OFPC_PORT_STATS = 1 << 2, /* Port statistics. */ + OFPC_STP = 1 << 3, /* 802.11d spanning tree. */ + OFPC_MULTI_PHY_TX = 1 << 4 /* Supports transmitting through multiple + physical interfaces */ +}; + +/* Flags to indicate behavior of the physical port */ +enum ofp_port_flags { + OFPPFL_NO_FLOOD = 1 << 0, /* Do not include this port when flooding */ +}; + +/* Features of physical ports available in a datapath. */ +enum ofp_port_features { + OFPPF_10MB_HD = 1 << 0, /* 10 Mb half-duplex rate support. */ + OFPPF_10MB_FD = 1 << 1, /* 10 Mb full-duplex rate support. */ + OFPPF_100MB_HD = 1 << 2, /* 100 Mb half-duplex rate support. */ + OFPPF_100MB_FD = 1 << 3, /* 100 Mb full-duplex rate support. */ + OFPPF_1GB_HD = 1 << 4, /* 1 Gb half-duplex rate support. */ + OFPPF_1GB_FD = 1 << 5, /* 1 Gb full-duplex rate support. */ + OFPPF_10GB_FD = 1 << 6, /* 10 Gb full-duplex rate support. */ +}; + + +/* Description of a physical port */ +struct ofp_phy_port { + uint16_t port_no; + uint8_t hw_addr[OFP_ETH_ALEN]; + uint8_t name[OFP_MAX_PORT_NAME_LEN]; /* Null-terminated */ + uint32_t flags; /* Bitmap of "ofp_port_flags". */ + uint32_t speed; /* Current speed in Mbps */ + uint32_t features; /* Bitmap of supported "ofp_port_features"s. */ +}; + +/* Datapath hello (datapath -> controller). */ +struct ofp_data_hello { + struct ofp_header header; + uint64_t datapath_id; /* Datapath unique ID */ + + /* Table info. */ + uint32_t n_exact; /* Max exact-match table entries. */ + uint32_t n_mac_only; /* Max mac-only table entries. */ + uint32_t n_compression; /* Max entries compressed on service port. */ + uint32_t n_general; /* Max entries of arbitrary form. */ + + /* Buffer limits. A datapath that cannot buffer reports 0.*/ + uint32_t buffer_mb; /* Space for buffering packets, in MB. */ + uint32_t n_buffers; /* Max packets buffered at once. */ + + /* Features. */ + uint32_t capabilities; /* Bitmap of support "ofp_capabilities". */ + uint32_t actions; /* Bitmap of supported "ofp_action_type"s. */ + + /* Miscellany */ + uint16_t miss_send_len; /* Currently configured value for max bytes + of new flow that datapath will send to the + controller. */ + uint8_t pad[2]; /* Align to 32-bits */ + + /* Port info.*/ + struct ofp_phy_port ports[0]; /* Port definitions. The number of ports + is inferred from the length field in + the header. */ +}; + +/* What changed about the phsyical port */ +enum ofp_port_reason { + OFPPR_ADD, /* The port was added */ + OFPPR_DELETE, /* The port was removed */ + OFPPR_MOD /* Some attribute of the port has changed */ +}; + +/* A physical port has changed in the datapath */ +struct ofp_port_status { + struct ofp_header header; + uint8_t reason; /* One of OFPPR_* */ + uint8_t pad[3]; /* Align to 32-bits */ + struct ofp_phy_port desc; +}; + +/* Modify behavior of the physical port */ +struct ofp_port_mod { + struct ofp_header header; + struct ofp_phy_port desc; +}; + +/* Why is this packet being sent to the controller? */ +enum ofp_reason { + OFPR_NO_MATCH, /* No matching flow. */ + OFPR_ACTION /* Action explicitly output to controller. */ +}; + +/* Packet received on port (datapath -> controller). */ +struct ofp_packet_in { + struct ofp_header header; + uint32_t buffer_id; /* ID assigned by datapath. */ + uint16_t total_len; /* Full length of frame. */ + uint16_t in_port; /* Port on which frame was received. */ + uint8_t reason; /* Reason packet is being sent (one of OFPR_*) */ + uint8_t pad; + uint8_t data[0]; /* Ethernet frame, halfway through 32-bit word, + so the IP header is 32-bit aligned. The + amount of data is inferred from the length + field in the header. Because of padding, + offsetof(struct ofp_packet_in, data) == + sizeof(struct ofp_packet_in) - 2. */ +}; + +enum ofp_action_type { + OFPAT_OUTPUT, /* Output to switch port. */ + OFPAT_SET_DL_VLAN, /* VLAN. */ + OFPAT_SET_DL_SRC, /* Ethernet source address. */ + OFPAT_SET_DL_DST, /* Ethernet destination address. */ + OFPAT_SET_NW_SRC, /* IP source address. */ + OFPAT_SET_NW_DST, /* IP destination address. */ + OFPAT_SET_TP_SRC, /* TCP/UDP source port. */ + OFPAT_SET_TP_DST /* TCP/UDP destination port. */ +}; + +/* An output action sends packets out 'port'. When the 'port' is the + * OFPP_CONTROLLER, 'max_len' indicates the max number of bytes to + * send. A 'max_len' of zero means the entire packet should be sent. */ +struct ofp_action_output { + uint16_t max_len; + uint16_t port; +}; + +/* The VLAN id is 12-bits, so we'll use the entire 16 bits to indicate + * special conditions. All ones is used to indicate that no VLAN id was + * set, or if used as an action, that the VLAN header should be + * stripped. + */ +#define OFP_VLAN_NONE 0xffff + +struct ofp_action { + uint16_t type; /* One of OFPAT_* */ + union { + struct ofp_action_output output; /* OFPAT_OUTPUT: output struct. */ + uint16_t vlan_id; /* OFPAT_SET_DL_VLAN: VLAN id. */ + uint8_t dl_addr[OFP_ETH_ALEN]; /* OFPAT_SET_DL_SRC/DST */ + uint32_t nw_addr; /* OFPAT_SET_NW_SRC/DST */ + uint16_t tp; /* OFPAT_SET_TP_SRC/DST */ + } arg; +}; + +/* Send packet (controller -> datapath). */ +struct ofp_packet_out { + struct ofp_header header; + uint32_t buffer_id; /* ID assigned by datapath (-1 if none). */ + uint16_t in_port; /* Packet's input port (OFPP_NONE if none). */ + uint16_t out_port; /* Output port (if buffer_id == -1). */ + union { + struct ofp_action actions[0]; /* buffer_id != -1 */ + uint8_t data[0]; /* buffer_id == -1 */ + } u; +}; + +enum ofp_flow_mod_command { + OFPFC_ADD, /* New flow. */ + OFPFC_DELETE, /* Delete all matching flows. */ + OFPFC_DELETE_STRICT /* Strictly match wildcards. */ +}; + +/* Flow wildcards. */ +enum ofp_flow_wildcards { + OFPFW_IN_PORT = 1 << 0, /* Switch input port. */ + OFPFW_DL_VLAN = 1 << 1, /* VLAN. */ + OFPFW_DL_SRC = 1 << 2, /* Ethernet source address. */ + OFPFW_DL_DST = 1 << 3, /* Ethernet destination address. */ + OFPFW_DL_TYPE = 1 << 4, /* Ethernet frame type. */ + OFPFW_NW_SRC = 1 << 5, /* IP source address. */ + OFPFW_NW_DST = 1 << 6, /* IP destination address. */ + OFPFW_NW_PROTO = 1 << 7, /* IP protocol. */ + OFPFW_TP_SRC = 1 << 8, /* TCP/UDP source port. */ + OFPFW_TP_DST = 1 << 9, /* TCP/UDP destination port. */ + OFPFW_ALL = (1 << 10) - 1 +}; + +/* Values below this cutoff are 802.3 packets and the two bytes + * following MAC addresses are used as a frame length. Otherwise, the + * two bytes are used as the Ethernet type. + */ +#define OFP_DL_TYPE_ETH2_CUTOFF 0x0600 + +/* Value of dl_type to indicate that the frame does not include an + * Ethernet type. + */ +#define OFP_DL_TYPE_NOT_ETH_TYPE 0x05ff + +/* Fields to match against flows */ +struct ofp_match { + uint16_t wildcards; /* Wildcard fields. */ + uint16_t in_port; /* Input switch port. */ + uint8_t dl_src[OFP_ETH_ALEN]; /* Ethernet source address. */ + uint8_t dl_dst[OFP_ETH_ALEN]; /* Ethernet destination address. */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t pad[3]; /* Align to 32-bits */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ +}; + +/* Value used in "max_idle" to indicate that the entry is permanent */ +#define OFP_FLOW_PERMANENT 0 + +/* Flow setup and teardown (controller -> datapath). */ +struct ofp_flow_mod { + struct ofp_header header; + struct ofp_match match; /* Fields to match */ + + /* Flow actions. */ + uint16_t command; /* One of OFPFC_*. */ + uint16_t max_idle; /* Idle time before discarding (seconds). */ + uint32_t buffer_id; /* Buffered packet to apply to (or -1). */ + uint32_t group_id; /* Flow group ID (for QoS). */ + struct ofp_action actions[0]; /* The number of actions is inferred from + the length field in the header. */ +}; + +/* Flow expiration (datapath -> controller). */ +struct ofp_flow_expired { + struct ofp_header header; + struct ofp_match match; /* Description of fields */ + + uint32_t duration; /* Time flow was alive in seconds. */ + uint64_t packet_count; + uint64_t byte_count; +}; + +/* Statistics about flows that match the "match" field */ +struct ofp_flow_stats { + struct ofp_match match; /* Description of fields */ + uint32_t duration; /* Time flow has been alive in seconds. Only + used for non-aggregated results. */ + uint64_t packet_count; + uint64_t byte_count; +}; + +enum { + OFPFS_INDIV, /* Send an entry for each matching flow */ + OFPFS_AGGREGATE /* Aggregate matching flows */ +}; + +/* Current flow statistics request */ +struct ofp_flow_stat_request { + struct ofp_header header; + struct ofp_match match; /* Fields to match */ + uint8_t type; /* One of OFPFS_ */ + uint8_t pad[3]; /* Align to 32-bits */ +}; + +/* Current flow statistics reply */ +struct ofp_flow_stat_reply { + struct ofp_header header; + + /* If request was of type OFPFS_INDIV, this will contain an array of + * flow statistic entries. The number of matching flows is likely + * much larger than can fit in a single OpenFlow message, so a + * a response with no flows included is sent to indicate the end. + * If it was a OFPFS_AGGREGATE request, only a single flow stats + * entry will be contained in the response. + */ + struct ofp_flow_stats flows[0]; +}; + +/* Table attributes collected at runtime */ +struct ofp_table { + struct ofp_header header; + char name[OFP_MAX_TABLE_NAME_LEN]; + uint16_t table_id; + unsigned long int n_flows; + unsigned long int max_flows; +}; + +#endif /* openflow.h */ diff --git a/include/packets.h b/include/packets.h new file mode 100644 index 00000000..52a7a380 --- /dev/null +++ b/include/packets.h @@ -0,0 +1,136 @@ +#ifndef PACKETS_H +#define PACKETS_H 1 + +#include +#include "util.h" + +/* Ethernet frames. */ +#define ETH_ADDR_LEN 6 + +#define ETH_TYPE_IP 0x0800 +#define ETH_TYPE_ARP 0x0806 +#define ETH_TYPE_VLAN 0x8100 + +#define ETH_HEADER_LEN 14 +#define ETH_PAYLOAD_MIN 46 +#define ETH_TOTAL_MIN (ETH_HEADER_LEN + ETH_PAYLOAD_MIN) +struct eth_header { + uint8_t eth_dst[ETH_ADDR_LEN]; + uint8_t eth_src[ETH_ADDR_LEN]; + uint16_t eth_type; +}; +BUILD_ASSERT_DECL(ETH_HEADER_LEN == sizeof(struct eth_header)); + +#define LLC_DSAP_SNAP 0xaa +#define LLC_SSAP_SNAP 0xaa +#define LLC_CNTL_SNAP 3 + +#define LLC_HEADER_LEN 3 +struct llc_header { + uint8_t llc_dsap; + uint8_t llc_ssap; + uint8_t llc_cntl; +}; +BUILD_ASSERT_DECL(LLC_HEADER_LEN == sizeof(struct llc_header)); + +#define SNAP_ORG_ETHERNET "\0\0" /* The compiler adds a null byte, so + sizeof(SNAP_ORG_ETHERNET) == 3. */ +#define SNAP_HEADER_LEN 5 +struct snap_header { + uint8_t snap_org[3]; + uint16_t snap_type; +} __attribute__((packed)); +BUILD_ASSERT_DECL(SNAP_HEADER_LEN == sizeof(struct snap_header)); + +#define LLC_SNAP_HEADER_LEN (LLC_HEADER_LEN + SNAP_HEADER_LEN) +struct llc_snap_header { + struct llc_header llc; + struct snap_header snap; +}; +BUILD_ASSERT_DECL(LLC_SNAP_HEADER_LEN == sizeof(struct llc_snap_header)); + +#define VLAN_VID 0x0fff + +#define VLAN_HEADER_LEN 4 +struct vlan_header { + uint16_t vlan_tci; /* Lowest 12 bits are VLAN ID. */ + uint16_t vlan_next_type; +}; +BUILD_ASSERT_DECL(VLAN_HEADER_LEN == sizeof(struct vlan_header)); + +#define IP_VER(ip_ihl_ver) ((ip_ihl_ver) >> 4) +#define IP_IHL(ip_ihl_ver) ((ip_ihl_ver) & 15) + +#define IP_TYPE_TCP 6 +#define IP_TYPE_UDP 17 + +#define IP_HEADER_LEN 20 +struct ip_header { + uint8_t ip_ihl_ver; + uint8_t ip_tos; + uint16_t ip_tot_len; + uint16_t ip_id; + uint16_t ip_frag_off; + uint8_t ip_ttl; + uint8_t ip_proto; + uint16_t ip_csum; + uint32_t ip_src; + uint32_t ip_dst; +}; +BUILD_ASSERT_DECL(IP_HEADER_LEN == sizeof(struct ip_header)); + +#define UDP_HEADER_LEN 8 +struct udp_header { + uint16_t udp_src; + uint16_t udp_dst; + uint16_t udp_len; + uint16_t udp_csum; +}; +BUILD_ASSERT_DECL(UDP_HEADER_LEN == sizeof(struct udp_header)); + +#define TCP_FIN 0x01 +#define TCP_SYN 0x02 +#define TCP_RST 0x04 +#define TCP_PSH 0x08 +#define TCP_ACK 0x10 +#define TCP_URG 0x20 + +#define TCP_FLAGS(tcp_ctl) (htons(tcp_ctl) & 0x003f) +#define TCP_OFFSET(tcp_ctl) (htons(tcp_ctl) >> 12) + +#define TCP_HEADER_LEN 20 +struct tcp_header { + uint16_t tcp_src; + uint16_t tcp_dst; + uint32_t tcp_seq; + uint32_t tcp_ack; + uint16_t tcp_ctl; + uint16_t tcp_winsz; + uint16_t tcp_csum; + uint16_t tcp_urg; +}; +BUILD_ASSERT_DECL(TCP_HEADER_LEN == sizeof(struct tcp_header)); + +#define ARP_HRD_ETHERNET 1 +#define ARP_PRO_IP 0x0800 +#define ARP_OP_REQUEST 1 +#define ARP_OP_REPLY 2 + +#define ARP_ETH_HEADER_LEN 28 +struct arp_eth_header { + /* Generic members. */ + uint16_t ar_hrd; /* Hardware type. */ + uint16_t ar_pro; /* Protocol type. */ + uint8_t ar_hln; /* Hardware address length. */ + uint8_t ar_pln; /* Protocol address length. */ + uint16_t ar_op; /* Opcode. */ + + /* Ethernet+IPv4 specific members. */ + uint8_t ar_sha[ETH_ADDR_LEN]; /* Sender hardware address. */ + uint32_t ar_spa; /* Sender protocol address. */ + uint8_t ar_tha[ETH_ADDR_LEN]; /* Target hardware address. */ + uint32_t ar_tpa; /* Target protocol address. */ +} __attribute__((packed)); +BUILD_ASSERT_DECL(ARP_ETH_HEADER_LEN == sizeof(struct arp_eth_header)); + +#endif /* packets.h */ diff --git a/include/socket-util.h b/include/socket-util.h new file mode 100644 index 00000000..2d167f94 --- /dev/null +++ b/include/socket-util.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef SOCKET_UTIL_H +#define SOCKET_UTIL_H 1 + +#include +#include + +int set_nonblocking(int fd); +int lookup_ip(const char *host_name, struct in_addr *address); + +#endif /* socket-util.h */ diff --git a/include/util.h b/include/util.h new file mode 100644 index 00000000..5a12a091 --- /dev/null +++ b/include/util.h @@ -0,0 +1,84 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_H +#define UTIL_H 1 + +#include +#include +#include +#include +#include "compiler.h" + +#ifndef __cplusplus +/* Build-time assertion for use in a statement context. */ +#define BUILD_ASSERT(EXPR) \ + sizeof(struct { unsigned int build_assert_failed : (EXPR) ? 1 : -1; }) + +/* Build-time assertion for use in a declaration context. */ +#define BUILD_ASSERT_DECL(EXPR) \ + extern int (*build_assert(void))[BUILD_ASSERT(EXPR)] +#else /* __cplusplus */ +#include +#define BUILD_ASSERT BOOST_STATIC_ASSERT +#define BUILD_ASSERT_DECL BOOST_STATIC_ASSERT +#endif /* __cplusplus */ + +extern const char *program_name; + +#define ARRAY_SIZE(ARRAY) (sizeof ARRAY / sizeof *ARRAY) +#define ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y) * (Y)) +#define ROUND_DOWN(X, Y) ((X) / (Y) * (Y)) +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) + +#define NOT_REACHED() abort() +#define NOT_IMPLEMENTED() abort() +#define NOT_TESTED() ((void) 0) /* XXX should print a message. */ + +/* Given POINTER, the address of the given MEMBER in a STRUCT object, returns + the STRUCT object. */ +#define CONTAINER_OF(POINTER, STRUCT, MEMBER) \ + ((STRUCT *) ((char *) (POINTER) - offsetof (STRUCT, MEMBER))) + +#ifdef __cplusplus +extern "C" { +#endif + +void set_program_name(const char *); + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xrealloc(void *, size_t); +char *xstrdup(const char *); +char *xasprintf(const char *format, ...) PRINTF_FORMAT(1, 2); + +void fatal(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3) NO_RETURN; +void error(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void debug(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void debug_msg(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void hex_dump(FILE *, const void *, size_t, uintptr_t offset, bool ascii); + +#ifdef __cplusplus +} +#endif + +#endif /* util.h */ diff --git a/include/vconn.h b/include/vconn.h new file mode 100644 index 00000000..f6da0ad4 --- /dev/null +++ b/include/vconn.h @@ -0,0 +1,154 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VCONN_H +#define VCONN_H 1 + +#include +#include + +struct buffer; +struct flow; +struct pollfd; + +/* Client interface. */ + +/* Virtual connection to an OpenFlow device. */ +struct vconn { + struct vconn_class *class; +}; + +/* What kind of operation do we want to perform? */ +enum { + WANT_ACCEPT = 1 << 0, /* Want to accept a new connection. */ + WANT_RECV = 1 << 1, /* Want to receive a message. */ + WANT_SEND = 1 << 2 /* Want to send a message. */ +}; + +int vconn_open(const char *name, struct vconn **); +void vconn_close(struct vconn *); +bool vconn_is_passive(const struct vconn *); +void vconn_prepoll(struct vconn *, int want, struct pollfd *); +void vconn_postpoll(struct vconn *, short int *revents); +int vconn_accept(struct vconn *, struct vconn **); +int vconn_recv(struct vconn *, struct buffer **); +int vconn_send(struct vconn *, struct buffer *); +int vconn_send_wait(struct vconn *, struct buffer *); + +struct buffer *make_add_simple_flow(const struct flow *, + uint32_t buffer_id, uint16_t out_port); +struct buffer *make_buffered_packet_out(uint32_t buffer_id, + uint16_t in_port, uint16_t out_port); +struct buffer *make_unbuffered_packet_out(const struct buffer *packet, + uint16_t in_port, uint16_t out_port); + +/* Provider interface. */ + +struct vconn_class { + /* Prefix for connection names, e.g. "nl", "tcp". */ + const char *name; + + /* Attempts to connect to an OpenFlow device. 'name' is the full + * connection name provided by the user, e.g. "nl:0", "tcp:1.2.3.4". This + * name is useful for error messages but must not be modified. + * + * 'suffix' is a copy of 'name' following the colon and may be modified. + * + * Returns 0 if successful, otherwise a positive errno value. If + * successful, stores a pointer to the new connection in '*vconnp'. */ + int (*open)(const char *name, char *suffix, struct vconn **vconnp); + + /* Closes 'vconn' and frees associated memory. */ + void (*close)(struct vconn *vconn); + + /* Called by the main loop before calling poll(), this function must + * initialize 'pfd->fd' and 'pfd->events' appropriately so that poll() will + * wake up when the connection becomes available for the operations + * specified in 'want'. The prepoll function may also set bits in 'pfd' to + * allow for internal processing. */ + void (*prepoll)(struct vconn *, int want, struct pollfd *pfd); + + /* Called by the main loop after calling poll(), this function may perform + * any internal processing needed by the connection. It is provided with + * the vconn file descriptor's status in '*revents', as reported by poll(). + * + * The postpoll function should adjust '*revents' to reflect the status of + * the connection from the caller's point of view: that is, upon return + * '*revents & POLLIN' should indicate that a packet is (potentially) ready + * to be read (for an active vconn) or a new connection is ready to be + * accepted (for a passive vconn) and '*revents & POLLOUT' should indicate + * that a packet is (potentially) ready to be written. + * + * This function may be a null pointer in a vconn class that has no use for + * it, that is, if the vconn does not need to do any internal processing + * and poll's revents out properly reflects the vconn's status. */ + void (*postpoll)(struct vconn *, short int *revents); + + /* Tries to accept a new connection on 'vconn', which must be a passive + * vconn. If successful, stores the new connection in '*new_vconnp' and + * returns 0. Otherwise, returns a positive errno value. + * + * The accept function must not block waiting for a connection. If no + * connection is ready to be accepted, it should return EAGAIN. + * + * Nonnull iff this is a passive vconn (one that accepts connection and + * does not transfer data). */ + int (*accept)(struct vconn *vconn, struct vconn **new_vconnp); + + /* Tries to receive an OpenFlow message from 'vconn', which must be an + * active vconn. If successful, stores the received message into '*msgp' + * and returns 0. The caller is responsible for destroying the message + * with buffer_delete(). On failure, returns a positive errno value and + * stores a null pointer into '*msgp'. + * + * If the connection has been closed in the normal fashion, returns EOF. + * + * The recv function must not block waiting for a packet to arrive. If no + * packets have been received, it should return EAGAIN. + * + * Nonnull iff this is an active vconn (one that transfers data and does + * not accept connections). */ + int (*recv)(struct vconn *vconn, struct buffer **msgp); + + /* Tries to queue 'msg' for transmission on 'vconn', which must be an + * active vconn. If successful, returns 0, in which case ownership of + * 'msg' is transferred to the vconn. Success does not guarantee that + * 'msg' has been or ever will be delivered to the peer, only that it has + * been queued for transmission. + * + * Returns a positive errno value on failure, in which case the caller + * retains ownership of 'msg'. + * + * The send function must not block. If 'msg' cannot be immediately + * accepted for transmission, it should return EAGAIN. + * + * Nonnull iff this is an active vconn (one that transfers data and does + * not accept connections). */ + int (*send)(struct vconn *vconn, struct buffer *msg); +}; + +extern struct vconn_class tcp_vconn_class; +extern struct vconn_class ptcp_vconn_class; +#ifdef HAVE_NETLINK +extern struct vconn_class netlink_vconn_class; +#endif + +#endif /* vconn.h */ diff --git a/include/vlog-socket.h b/include/vlog-socket.h new file mode 100644 index 00000000..90ec97cb --- /dev/null +++ b/include/vlog-socket.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VLOG_SOCKET_H +#define VLOG_SOCKET_H 1 + +/* Server for Vlog control connection. */ +struct vlog_server; +int vlog_server_listen(const char *path, struct vlog_server **); +void vlog_server_close(struct vlog_server *); +int vlog_server_get_fd(const struct vlog_server *); +void vlog_server_poll(struct vlog_server *); + +/* Client for Vlog control connection. */ +struct vlog_client; +int vlog_client_connect(const char *path, struct vlog_client **); +void vlog_client_close(struct vlog_client *); +int vlog_client_send(struct vlog_client *, const char *request); +int vlog_client_recv(struct vlog_client *, char **reply); +int vlog_client_transact(struct vlog_client *, + const char *request, char **reply); +const char *vlog_client_target(const struct vlog_client *); + +#endif /* vlog-socket.h */ diff --git a/include/vlog.h b/include/vlog.h new file mode 100644 index 00000000..f47324b8 --- /dev/null +++ b/include/vlog.h @@ -0,0 +1,100 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VLOG_H +#define VLOG_H 1 + +#include + +/* Logging importance levels. */ +enum vlog_level { + VLL_EMER, + VLL_ERR, + VLL_WARN, + VLL_DBG, + VLL_N_LEVELS +}; + +const char *vlog_get_level_name(enum vlog_level); +enum vlog_level vlog_get_level_val(const char *name); + +/* Facilities that we can log to. */ +enum vlog_facility { + VLF_SYSLOG, + VLF_CONSOLE, + VLF_N_FACILITIES, + VLF_ANY_FACILITY = -1 +}; + +const char *vlog_get_facility_name(enum vlog_facility); +enum vlog_facility vlog_get_facility_val(const char *name); + +/* Modules that can emit log messages. */ +#define VLOG_MODULES \ + VLOG_MODULE(controller) \ + VLOG_MODULE(ctlpath) \ + VLOG_MODULE(dpif) \ + VLOG_MODULE(dpctl) \ + VLOG_MODULE(fault) \ + VLOG_MODULE(flow) \ + VLOG_MODULE(netlink) \ + VLOG_MODULE(secchan) \ + VLOG_MODULE(socket_util) \ + VLOG_MODULE(vconn_netlink) \ + VLOG_MODULE(vconn_tcp) \ + VLOG_MODULE(vconn) \ + +/* VLM_ constant for each vlog module. */ +enum vlog_module { +#define VLOG_MODULE(NAME) VLM_##NAME, + VLOG_MODULES +#undef VLOG_MODULE + VLM_N_MODULES, + VLM_ANY_MODULE = -1 +}; + +const char *vlog_get_module_name(enum vlog_module); +enum vlog_module vlog_get_module_val(const char *name); + +/* Configuring how each module logs messages. */ +enum vlog_level vlog_get_level(enum vlog_module, enum vlog_facility); +void vlog_set_levels(enum vlog_module, enum vlog_facility, enum vlog_level); +char *vlog_set_levels_from_string(const char *); +char *vlog_get_levels(void); +void vlog_set_verbosity(const char *arg); + +/* Function for actual logging. */ +void vlog_init(void); +void vlog_exit(void); +void vlog(enum vlog_module, enum vlog_level, const char *format, ...) + __attribute__((format(printf, 3, 4))); + +/* Convenience macros. To use these, define THIS_MODULE as a macro that + * expands to the module used by the current source file, e.g. + * #include "vlog.h" + * #define THIS_MODULE VLM_NETLINK + */ +#define VLOG_EMER(...) vlog(THIS_MODULE, VLL_EMER, __VA_ARGS__) +#define VLOG_ERR(...) vlog(THIS_MODULE, VLL_ERR, __VA_ARGS__) +#define VLOG_WARN(...) vlog(THIS_MODULE, VLL_WARN, __VA_ARGS__) +#define VLOG_DBG(...) vlog(THIS_MODULE, VLL_DBG, __VA_ARGS__) + +#endif /* vlog.h */ diff --git a/include/xtoxll.h b/include/xtoxll.h new file mode 100644 index 00000000..22070941 --- /dev/null +++ b/include/xtoxll.h @@ -0,0 +1,19 @@ +#ifndef XTOXLL_H +#define XTOXLL_H 1 + +#include +#include + +static inline uint64_t +htonll(uint64_t n) +{ + return htonl(1) == 1 ? n : ((uint64_t) htonl(n) << 32) | htonl(n >> 32); +} + +static inline uint64_t +ntohll(uint64_t n) +{ + return htonl(1) == 1 ? n : ((uint64_t) ntohl(n) << 32) | ntohl(n >> 32); +} + +#endif /* xtonll.h */ diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/lib/Makefile.am b/lib/Makefile.am new file mode 100644 index 00000000..d9a490c5 --- /dev/null +++ b/lib/Makefile.am @@ -0,0 +1,27 @@ +include ../Make.vars + +noinst_LTLIBRARIES = libopenflow.la + +libopenflow_la_SOURCES = \ + buffer.c \ + command-line.c \ + dynamic-string.c \ + fatal-signal.c \ + fault.c \ + flow.c \ + hash.c \ + list.c \ + ofp-print.c \ + socket-util.c \ + util.c \ + vconn-tcp.c \ + vconn.c \ + vlog-socket.c \ + vlog.c + +if HAVE_NETLINK +libopenflow_la_SOURCES += \ + dpif.c \ + netlink.c \ + vconn-netlink.c +endif diff --git a/lib/buffer.c b/lib/buffer.c new file mode 100644 index 00000000..0ce1045e --- /dev/null +++ b/lib/buffer.c @@ -0,0 +1,192 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "buffer.h" +#include +#include +#include +#include "util.h" + +/* Initializes 'b' as an empty buffer that contains the 'allocated' bytes of + * memory starting at 'base'. + * + * 'base' should ordinarily be the first byte of a region obtained from + * malloc(), but in circumstances where it can be guaranteed that 'b' will + * never need to be expanded or freed, it can be a pointer into arbitrary + * memory. */ +void +buffer_use(struct buffer *b, void *base, size_t allocated) +{ + b->base = b->data = base; + b->allocated = allocated; + b->size = 0; + b->next = NULL; +} + +/* Initializes 'b' as a buffer with an initial capacity of 'size' bytes. */ +void +buffer_init(struct buffer *b, size_t size) +{ + buffer_use(b, size ? xmalloc(size) : NULL, size); +} + +/* Frees memory that 'b' points to. */ +void +buffer_uninit(struct buffer *b) +{ + if (b) { + free(b->base); + } +} + +/* Frees memory that 'b' points to and allocates a new buffer */ +void +buffer_reinit(struct buffer *b, size_t size) +{ + buffer_uninit(b); + buffer_init(b, size); +} + +/* Creates and returns a new buffer with an initial capacity of 'size' + * bytes. */ +struct buffer * +buffer_new(size_t size) +{ + struct buffer *b = xmalloc(sizeof *b); + buffer_init(b, size); + return b; +} + +/* Frees memory that 'b' points to, as well as 'b' itself. */ +void +buffer_delete(struct buffer *b) +{ + if (b) { + buffer_uninit(b); + free(b); + } +} + +/* Returns the number of bytes of headroom in 'b', that is, the number of bytes + * of unused space in buffer 'b' before the data that is in use. (Most + * commonly, the data in a buffer is at its beginning, and thus the buffer's + * headroom is 0.) */ +size_t +buffer_headroom(struct buffer *b) +{ + return b->data - b->base; +} + +/* Returns the number of bytes that may be appended to the tail end of buffer + * 'b' before the buffer must be reallocated. */ +size_t +buffer_tailroom(struct buffer *b) +{ + return buffer_end(b) - buffer_tail(b); +} + +/* Ensures that 'b' has room for at least 'size' bytes at its tail end, + * reallocating and copying its data if necessary. */ +void +buffer_reserve_tailroom(struct buffer *b, size_t size) +{ + if (size > buffer_tailroom(b)) { + size_t headroom = buffer_headroom(b); + size_t new_allocated = b->allocated + MAX(size, 64); + void *new_base = xmalloc(new_allocated); + memcpy(new_base, b->base, b->allocated); + free(b->base); + b->base = new_base; + b->allocated = new_allocated; + b->data = new_base + headroom; + } +} + +/* Appends 'size' bytes of data to the tail end of 'b', reallocating and + * copying its data if necessary. Returns a pointer to the first byte of the + * new data, which is left uninitialized. */ +void * +buffer_put_uninit(struct buffer *b, size_t size) +{ + void *p; + buffer_reserve_tailroom(b, size); + p = buffer_tail(b); + b->size += size; + return p; +} + +/* Appends the 'size' bytes of data in 'p' to the tail end of 'b'. Data in 'b' + * is reallocated and copied if necessary. */ +void +buffer_put(struct buffer *b, const void *p, size_t size) +{ + memcpy(buffer_put_uninit(b, size), p, size); +} + +/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to + * byte 'offset'. Otherwise, returns a null pointers. */ +void * +buffer_at(const struct buffer *b, size_t offset, size_t size) +{ + return offset + size <= b->size ? (char *) b->data + offset : NULL; +} + +/* Returns a pointer to byte 'offset' in 'b', which must contain at least + * 'offset + size' bytes of data. */ +void * +buffer_at_assert(const struct buffer *b, size_t offset, size_t size) +{ + assert(offset + size <= b->size); + return ((char *) b->data) + offset; +} + +/* Returns the byte following the last byte of data in use in 'b'. */ +void * +buffer_tail(const struct buffer *b) +{ + return (char *) b->data + b->size; +} + +/* Returns the byte following the last byte allocated for use (but not + * necessarily in use) by 'b'. */ +void * +buffer_end(const struct buffer *b) +{ + return (char *) b->base + b->allocated; +} + +/* Clears any data from 'b'. */ +void +buffer_clear(struct buffer *b) +{ + b->data = b->base; + b->size = 0; +} + +/* Removes 'size' bytes from the head end of 'b', which must contain at least + * 'size' bytes of data. */ +void +buffer_pull(struct buffer *b, size_t size) +{ + assert(b->size >= size); + b->data += size; + b->size -= size; +} diff --git a/lib/command-line.c b/lib/command-line.c new file mode 100644 index 00000000..fbd8e8eb --- /dev/null +++ b/lib/command-line.c @@ -0,0 +1,53 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "command-line.h" +#include +#include +#include "util.h" +#include "vlog.h" + +/* Given the GNU-style long options in 'options', returns a string that may be + * passed to getopt() with the corresponding short options. The caller is + * responsible for freeing the string. */ +char * +long_options_to_short_options(const struct option options[]) +{ + char short_options[UCHAR_MAX * 3 + 1]; + char *p = short_options; + + for (; options->name; options++) { + const struct option *o = options; + if (o->flag == NULL && o->val > 0 && o->val <= UCHAR_MAX) { + *p++ = o->val; + if (o->has_arg == required_argument) { + *p++ = ':'; + } else if (o->has_arg == optional_argument) { + *p++ = ':'; + *p++ = ':'; + } + } + } + *p = '\0'; + + return xstrdup(short_options); +} + diff --git a/lib/dpif.c b/lib/dpif.c new file mode 100644 index 00000000..6bd6fef7 --- /dev/null +++ b/lib/dpif.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dpif.h" + +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "mac.h" +#include "netlink.h" +#include "ofp-print.h" +#include "openflow-netlink.h" +#include "openflow.h" +#include "util.h" +#include "xtoxll.h" + +#include "vlog.h" +#define THIS_MODULE VLM_dpif + +/* The Generic Netlink family number used for OpenFlow. */ +static int openflow_family; + +static int lookup_openflow_multicast_group(int dp_idx, int *multicast_group); +static int send_mgmt_command(struct dpif *, int command, + const char *netdev); + +/* Opens the local datapath numbered 'dp_idx', initializing 'dp'. If + * 'subscribe' is true, listens for asynchronous messages (packet-in, etc.) + * from the datapath; otherwise, 'dp' will receive only replies to explicitly + * initiated requests. */ +int +dpif_open(int dp_idx, bool subscribe, struct dpif *dp) +{ + struct nl_sock *sock; + int multicast_group = 0; + int retval; + + retval = nl_lookup_genl_family(DP_GENL_FAMILY_NAME, &openflow_family); + if (retval) { + return retval; + } + + if (subscribe) { + retval = lookup_openflow_multicast_group(dp_idx, &multicast_group); + if (retval) { + return retval; + } + } + + /* Specify a large so_rcvbuf size because we occasionally need to be able + * to retrieve large collections of flow records. */ + retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, + 4 * 1024u * 1024, &sock); + if (retval) { + return retval; + } + + dp->dp_idx = dp_idx; + dp->sock = sock; + return 0; +} + +/* Closes 'dp'. */ +void +dpif_close(struct dpif *dp) +{ + nl_sock_destroy(dp->sock); +} + +static const struct nl_policy openflow_policy[] = { + [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, + [DP_GENL_A_OPENFLOW] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct ofp_header), + .max_len = OFP_MAXLEN }, +}; + +/* Tries to receive an openflow message from the kernel on 'sock'. If + * successful, stores the received message into '*msgp' and returns 0. The + * caller is responsible for destroying the message with buffer_delete(). On + * failure, returns a positive errno value and stores a null pointer into + * '*msgp'. + * + * Only Netlink messages with embedded OpenFlow messages are accepted. Other + * Netlink messages provoke errors. + * + * If 'wait' is true, dpif_recv_openflow waits for a message to be ready; + * otherwise, returns EAGAIN if the 'sock' receive buffer is empty. */ +int +dpif_recv_openflow(struct dpif *dp, struct buffer **bufferp, + bool wait) +{ + struct nlattr *attrs[ARRAY_SIZE(openflow_policy)]; + struct buffer *buffer; + struct ofp_header *oh; + size_t ofp_len; + int retval; + + *bufferp = NULL; + do { + retval = nl_sock_recv(dp->sock, &buffer, wait); + } while (retval == ENOBUFS || (!retval && nl_msg_nlmsgerr(buffer, NULL))); + if (retval) { + if (retval != EAGAIN) { + VLOG_WARN("dpif_recv_openflow: %s", strerror(retval)); + } + return retval; + } + + if (nl_msg_genlmsghdr(buffer) == NULL) { + VLOG_DBG("received packet too short for Generic Netlink"); + goto error; + } + if (nl_msg_nlmsghdr(buffer)->nlmsg_type != openflow_family) { + VLOG_DBG("received type (%"PRIu16") != openflow family (%d)", + nl_msg_nlmsghdr(buffer)->nlmsg_type, openflow_family); + goto error; + } + + if (!nl_policy_parse(buffer, openflow_policy, attrs, + ARRAY_SIZE(openflow_policy))) { + goto error; + } + if (nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]) != dp->dp_idx) { + VLOG_WARN("received dp_idx (%"PRIu32") differs from expected (%d)", + nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]), dp->dp_idx); + goto error; + } + + oh = buffer->data = (void *) nl_attr_get(attrs[DP_GENL_A_OPENFLOW]); + buffer->size = nl_attr_get_size(attrs[DP_GENL_A_OPENFLOW]); + ofp_len = ntohs(oh->length); + if (ofp_len != buffer->size) { + VLOG_WARN("ofp_header.length %"PRIu16" != attribute length %zu\n", + ofp_len, buffer->size); + buffer->size = MIN(ofp_len, buffer->size); + } + *bufferp = buffer; + return 0; + +error: + buffer_delete(buffer); + return EPROTO; +} + +/* Encapsulates 'msg', which must contain an OpenFlow message, in a Netlink + * message, and sends it to the OpenFlow kernel module via 'sock'. + * + * Returns 0 if successful, otherwise a positive errno value. If + * 'wait' is true, then the send will wait until buffer space is ready; + * otherwise, returns EAGAIN if the 'sock' send buffer is full. + * + * If the send is successful, then the kernel module will receive it, but there + * is no guarantee that any reply will not be dropped (see nl_sock_transact() + * for details). + */ +int +dpif_send_openflow(struct dpif *dp, struct buffer *buffer, bool wait) +{ + struct buffer hdr; + struct nlattr *nla; + uint32_t fixed_buffer[64 / 4]; + struct iovec iov[3]; + int pad_bytes; + int n_iov; + int retval; + + buffer_use(&hdr, fixed_buffer, sizeof fixed_buffer); + nl_msg_put_genlmsghdr(&hdr, dp->sock, 32, openflow_family, + NLM_F_REQUEST, DP_GENL_C_OPENFLOW, 1); + nl_msg_put_u32(&hdr, DP_GENL_A_DP_IDX, dp->dp_idx); + nla = buffer_put_uninit(&hdr, sizeof nla); + nla->nla_len = sizeof nla + buffer->size; + nla->nla_type = DP_GENL_A_OPENFLOW; + pad_bytes = NLA_ALIGN(nla->nla_len) - nla->nla_len; + nl_msg_nlmsghdr(&hdr)->nlmsg_len = hdr.size + buffer->size + pad_bytes; + n_iov = 2; + iov[0].iov_base = hdr.data; + iov[0].iov_len = hdr.size; + iov[1].iov_base = buffer->data; + iov[1].iov_len = buffer->size; + if (pad_bytes) { + static char zeros[NLA_ALIGNTO]; + n_iov++; + iov[2].iov_base = zeros; + iov[2].iov_len = pad_bytes; + } + retval = nl_sock_sendv(dp->sock, iov, n_iov, false); + if (retval && retval != EAGAIN) { + VLOG_WARN("dpif_send_openflow: %s", strerror(retval)); + } + return retval; +} + +/* Creates the datapath represented by 'dp'. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_add_dp(struct dpif *dp) +{ + return send_mgmt_command(dp, DP_GENL_C_ADD_DP, NULL); +} + +/* Destroys the datapath represented by 'dp'. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_del_dp(struct dpif *dp) +{ + return send_mgmt_command(dp, DP_GENL_C_DEL_DP, NULL); +} + +/* Adds the Ethernet device named 'netdev' to this datapath. Returns 0 if + * successful, otherwise a positive errno value. */ +int +dpif_add_port(struct dpif *dp, const char *netdev) +{ + return send_mgmt_command(dp, DP_GENL_C_ADD_PORT, netdev); +} + +/* Removes the Ethernet device named 'netdev' from this datapath. Returns 0 + * if successful, otherwise a positive errno value. */ +int +dpif_del_port(struct dpif *dp, const char *netdev) +{ + return send_mgmt_command(dp, DP_GENL_C_DEL_PORT, netdev); +} + +/* Prints a description of 'dp' to stdout. Returns 0 if successful, otherwise + * a positive errno value. */ +int +dpif_show(struct dpif *dp) +{ + static const struct nl_policy show_policy[] = { + [DP_GENL_A_DP_INFO] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct ofp_data_hello), + .max_len = SIZE_MAX }, + }; + + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(show_policy)]; + struct ofp_data_hello *odh; + int retval; + size_t len; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_SHOW_DP, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + if (!nl_policy_parse(reply, show_policy, attrs, + ARRAY_SIZE(show_policy))) { + buffer_delete(reply); + return EPROTO; + } + + odh = (void *) nl_attr_get(attrs[DP_GENL_A_DP_INFO]); + if (odh->header.version != OFP_VERSION + || odh->header.type != OFPT_DATA_HELLO) { + VLOG_ERR("bad show query response (%"PRIu8",%"PRIu8")", + odh->header.version, odh->header.type); + buffer_delete(reply); + return EPROTO; + } + + len = nl_attr_get_size(attrs[DP_GENL_A_DP_INFO]); + ofp_print_data_hello(stdout, odh, len, 1); + + return retval; +} + +static const struct nl_policy table_policy[] = { + [DP_GENL_A_NUMTABLES] = { .type = NL_A_U32 }, + [DP_GENL_A_TABLE] = { .type = NL_A_UNSPEC }, +}; + +/* Writes a description of 'dp''s tables to stdout. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_dump_tables(struct dpif *dp) +{ + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(table_policy)]; + const struct ofp_table *tables; + int n_tables; + int i; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_QUERY_TABLE, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + if (!nl_policy_parse(reply, table_policy, attrs, + ARRAY_SIZE(table_policy))) { + buffer_delete(reply); + return EPROTO; + } + + tables = nl_attr_get(attrs[DP_GENL_A_TABLE]); + n_tables = (nl_attr_get_size(attrs[DP_GENL_A_TABLE]) + / sizeof(struct ofp_table)); + n_tables = MIN(n_tables, nl_attr_get_u32(attrs[DP_GENL_A_NUMTABLES])); + for (i = 0; i < n_tables; i++) { + const struct ofp_table *ot = &tables[i]; + if (ot->header.version != 1 || ot->header.type != OFPT_TABLE) { + VLOG_DBG("bad table query response (%"PRIu8",%"PRIu8")", + ot->header.version, ot->header.type); + retval = EPROTO; + break; + } + + ofp_print_table(stdout, ot); + fprintf(stdout,"\n"); + } + buffer_delete(reply); + + return retval; +} + +static const struct nl_policy flow_policy[] = { + [DP_GENL_A_TABLEIDX] = { .type = NL_A_U16 }, + [DP_GENL_A_NUMFLOWS] = { .type = NL_A_U32 }, + [DP_GENL_A_FLOW] = { .type = NL_A_UNSPEC }, +}; + +struct _dump_ofp_flow_mod +{ + struct ofp_flow_mod ofm; + struct ofp_action oa; +}; + +/* Writes a description of flows in the given 'table' in 'dp' to stdout. If + * 'match' is null, all flows in the table are written; otherwise, only + * matching flows are written. Returns 0 if successful, otherwise a positive + * errno value. */ +int +dpif_dump_flows(struct dpif *dp, int table, struct ofp_match *match) +{ + struct buffer request, *reply; + struct ofp_flow_mod *ofm; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, NLM_F_REQUEST, + DP_GENL_C_QUERY_FLOW, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + nl_msg_put_u16(&request, DP_GENL_A_TABLEIDX, table); + ofm = nl_msg_put_unspec_uninit(&request, DP_GENL_A_FLOW, sizeof *ofm); + memset(ofm, 0, sizeof *ofm); + ofm->header.version = 1; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(sizeof ofm); + if (match) { + ofm->match = *match; + } else { + ofm->match.wildcards = htons(OFPFW_ALL); + } + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + + for (;;) { + struct nlattr *attrs[ARRAY_SIZE(flow_policy)]; + const struct _dump_ofp_flow_mod *flows, *ofm; + int n_flows; + + if (!nl_policy_parse(reply, flow_policy, attrs, + ARRAY_SIZE(flow_policy))) { + buffer_delete(reply); + return EPROTO; + } + n_flows = (nl_attr_get_size(attrs[DP_GENL_A_FLOW]) + / sizeof(struct ofp_flow_mod)); + n_flows = MIN(n_flows, nl_attr_get_u32(attrs[DP_GENL_A_NUMFLOWS])); + if (n_flows <= 0) { + break; + } + + flows = nl_attr_get(attrs[DP_GENL_A_FLOW]); + for (ofm = flows; ofm < &flows[n_flows]; ofm++) { + if (ofm->ofm.header.version != 1){ + VLOG_DBG("recv_dp_flow incorrect version"); + buffer_delete(reply); + return EPROTO; + } else if (ofm->ofm.header.type != OFPT_FLOW_MOD) { + VLOG_DBG("recv_fp_flow bad return message type"); + buffer_delete(reply); + return EPROTO; + } + + ofp_print_flow_mod(stdout, &ofm->ofm, + sizeof(struct ofp_flow_mod), 1); + putc('\n', stdout); + } + + buffer_delete(reply); + retval = nl_sock_recv(dp->sock, &reply, true); + if (retval) { + return retval; + } + } + return 0; +} + +/* Tells dp to send num_packets up through netlink for benchmarking*/ +int +dpif_benchmark_nl(struct dpif *dp, uint32_t num_packets, uint32_t packet_size) +{ + struct buffer request; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_BENCHMARK_NL, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + nl_msg_put_u32(&request, DP_GENL_A_NPACKETS, num_packets); + nl_msg_put_u32(&request, DP_GENL_A_PSIZE, packet_size); + retval = nl_sock_send(dp->sock, &request, true); + buffer_uninit(&request); + + return retval; +} + +static const struct nl_policy openflow_multicast_policy[] = { + [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, + [DP_GENL_A_MC_GROUP] = { .type = NL_A_U32 }, +}; + +/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If + * successful, stores the multicast group in '*multicast_group' and returns 0. + * Otherwise, returns a positve errno value. */ +static int +lookup_openflow_multicast_group(int dp_idx, int *multicast_group) +{ + struct nl_sock *sock; + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(openflow_multicast_policy)]; + int retval; + + retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + if (retval) { + return retval; + } + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, sock, 0, openflow_family, NLM_F_REQUEST, + DP_GENL_C_QUERY_DP, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); + retval = nl_sock_transact(sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + nl_sock_destroy(sock); + return retval; + } + if (!nl_policy_parse(reply, openflow_multicast_policy, attrs, + ARRAY_SIZE(openflow_multicast_policy))) { + nl_sock_destroy(sock); + buffer_delete(reply); + return EPROTO; + } + *multicast_group = nl_attr_get_u32(attrs[DP_GENL_A_MC_GROUP]); + nl_sock_destroy(sock); + buffer_delete(reply); + + return 0; +} + +/* Sends the given 'command' to datapath 'dp'. If 'netdev' is nonnull, adds it + * to the command as the port name attribute. Returns 0 if successful, + * otherwise a positive errno value. */ +static int +send_mgmt_command(struct dpif *dp, int command, const char *netdev) +{ + struct buffer request, *reply; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 32, openflow_family, + NLM_F_REQUEST | NLM_F_ACK, command, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + if (netdev) { + nl_msg_put_string(&request, DP_GENL_A_PORTNAME, netdev); + } + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + buffer_delete(reply); + + return retval; +} diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c new file mode 100644 index 00000000..610bb7c8 --- /dev/null +++ b/lib/dynamic-string.c @@ -0,0 +1,98 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dynamic-string.h" +#include +#include +#include "util.h" + +void +ds_init(struct ds *ds) +{ + ds->string = NULL; + ds->length = 0; + ds->allocated = 0; +} + +void +ds_reserve(struct ds *ds, size_t min_length) +{ + if (min_length > ds->allocated || !ds->string) { + ds->allocated += MAX(min_length, ds->allocated); + ds->allocated = MAX(8, ds->allocated); + ds->string = xrealloc(ds->string, ds->allocated + 1); + } +} + +void +ds_put_format(struct ds *ds, const char *format, ...) +{ + va_list args; + + va_start(args, format); + ds_put_format_valist(ds, format, args); + va_end(args); +} + +void +ds_put_format_valist(struct ds *ds, const char *format, va_list args_) +{ + va_list args; + size_t available; + int needed; + + va_copy(args, args_); + available = ds->string ? ds->allocated - ds->length + 1 : 0; + needed = vsnprintf(&ds->string[ds->length], available, format, args); + va_end(args); + + if (needed < available) { + ds->length += needed; + } else { + size_t available; + + ds_reserve(ds, ds->length + needed); + + va_copy(args, args_); + available = ds->allocated - ds->length + 1; + needed = vsnprintf(&ds->string[ds->length], available, format, args); + va_end(args); + + assert(needed < available); + ds->length += needed; + } +} + +char * +ds_cstr(struct ds *ds) +{ + if (!ds->string) { + ds_reserve(ds, 0); + ds->string[0] = '\0'; + } + return ds->string; +} + +void +ds_destroy(struct ds *ds) +{ + free(ds->string); +} diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c new file mode 100644 index 00000000..5ab62848 --- /dev/null +++ b/lib/fatal-signal.c @@ -0,0 +1,181 @@ +#include "fatal-signal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" + +/* Signals to catch. */ +static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP }; + +/* Signals to catch as a sigset_t. */ +static sigset_t fatal_signal_set; + +/* Hooks to call upon catching a signal */ +struct hook { + void (*func)(void *aux); + void *aux; +}; +#define MAX_HOOKS 32 +static struct hook hooks[MAX_HOOKS]; +static size_t n_hooks; + +/* Number of nesting signal blockers. */ +static int block_level = 0; + +/* Signal mask saved by outermost signal blocker. */ +static sigset_t saved_signal_mask; + +static void call_sigprocmask(int how, sigset_t* new_set, sigset_t* old_set); +static void signal_handler(int sig_nr); + +/* Registers 'hook' to be called when a process termination signal is + * raised. */ +void +fatal_signal_add_hook(void (*func)(void *aux), void *aux) +{ + fatal_signal_block(); + assert(n_hooks < MAX_HOOKS); + hooks[n_hooks].func = func; + hooks[n_hooks].aux = aux; + n_hooks++; + fatal_signal_unblock(); +} + +/* Blocks program termination signals until fatal_signal_unblock() is called. + * May be called multiple times with nesting; if so, fatal_signal_unblock() + * must be called the same number of times to unblock signals. + * + * This is needed while adjusting a data structure that will be accessed by a + * fatal signal hook, so that the hook is not invoked while the data structure + * is in an inconsistent state. */ +void +fatal_signal_block() +{ + static bool inited = false; + if (!inited) { + size_t i; + + inited = true; + sigemptyset(&fatal_signal_set); + for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) { + int sig_nr = fatal_signals[i]; + sigaddset(&fatal_signal_set, sig_nr); + if (signal(sig_nr, signal_handler) == SIG_IGN) { + signal(sig_nr, SIG_IGN); + } + } + } + + if (++block_level == 1) { + call_sigprocmask(SIG_BLOCK, &fatal_signal_set, &saved_signal_mask); + } +} + +/* Unblocks program termination signals blocked by fatal_signal_block() is + * called. If multiple calls to fatal_signal_block() are nested, + * fatal_signal_unblock() must be called the same number of times to unblock + * signals. */ +void +fatal_signal_unblock() +{ + assert(block_level > 0); + if (--block_level == 0) { + call_sigprocmask(SIG_SETMASK, &saved_signal_mask, NULL); + } +} + +static char **files; +static size_t n_files, max_files; + +static void unlink_files(void *aux); +static void do_unlink_files(void); + +/* Registers 'file' to be unlinked when the program terminates via exit() or a + * fatal signal. */ +void +fatal_signal_add_file_to_unlink(const char *file) +{ + static bool added_hook = false; + if (!added_hook) { + added_hook = true; + fatal_signal_add_hook(unlink_files, NULL); + atexit(do_unlink_files); + } + + fatal_signal_block(); + if (n_files >= max_files) { + max_files = max_files * 2 + 1; + files = xrealloc(files, sizeof *files * max_files); + } + files[n_files++] = xstrdup(file); + fatal_signal_unblock(); +} + +/* Unregisters 'file' from being unlinked when the program terminates via + * exit() or a fatal signal. */ +void +fatal_signal_remove_file_to_unlink(const char *file) +{ + size_t i; + + fatal_signal_block(); + for (i = 0; i < n_files; i++) { + if (!strcmp(files[i], file)) { + free(files[i]); + files[i] = files[--n_files]; + break; + } + } + fatal_signal_unblock(); +} + +static void +unlink_files(void *aux UNUSED) +{ + do_unlink_files(); +} + +static void +do_unlink_files(void) +{ + size_t i; + + for (i = 0; i < n_files; i++) { + unlink(files[i]); + } +} + +static void +call_sigprocmask(int how, sigset_t* new_set, sigset_t* old_set) +{ + int error = sigprocmask(how, new_set, old_set); + if (error) { + fprintf(stderr, "sigprocmask: %s\n", strerror(errno)); + } +} + +static void +signal_handler(int sig_nr) +{ + volatile sig_atomic_t recurse = 0; + if (!recurse) { + size_t i; + + recurse = 1; + + /* Call all the hooks. */ + for (i = 0; i < n_hooks; i++) { + hooks[i].func(hooks[i].aux); + } + } + + /* Re-raise the signal with the default handling so that the program + * termination status reflects that we were killed by this signal */ + signal(sig_nr, SIG_DFL); + raise(sig_nr); +} diff --git a/lib/fault.c b/lib/fault.c new file mode 100644 index 00000000..c1de3d81 --- /dev/null +++ b/lib/fault.c @@ -0,0 +1,77 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "fault.h" +#include +#include +#include +#include +#include +#include +#include "util.h" + +#include "vlog.h" +#define THIS_MODULE VLM_fault + +void +fault_handler(int sig_nr) +{ + VLOG_EMER("Caught signal %d.", sig_nr); + log_backtrace(); + fflush(stdout); + fflush(stderr); + + signal(sig_nr, SIG_DFL); + raise(sig_nr); +} + +void +log_backtrace(void) +{ + /* During the loop: + + frame[0] points to the next frame. + frame[1] points to the return address. */ + void **frame; + for (frame = __builtin_frame_address(0); + frame != NULL && frame[0] != NULL; + frame = frame[0]) { + Dl_info addrinfo; + if (!dladdr(frame[1], &addrinfo) || !addrinfo.dli_sname) { + fprintf(stderr, " 0x%08"PRIxPTR"\n", (uintptr_t) frame[1]); + } else { + fprintf(stderr, " 0x%08"PRIxPTR" (%s+0x%x)\n", + (uintptr_t) frame[1], addrinfo.dli_sname, + (char *) frame[1] - (char *) addrinfo.dli_saddr); + } + } + fflush(stderr); +} + +void +register_fault_handlers(void) +{ + signal(SIGABRT, fault_handler); + signal(SIGBUS, fault_handler); + signal(SIGFPE, fault_handler); + signal(SIGILL, fault_handler); + signal(SIGSEGV, fault_handler); +} diff --git a/lib/flow.c b/lib/flow.c new file mode 100644 index 00000000..bccc0efe --- /dev/null +++ b/lib/flow.c @@ -0,0 +1,122 @@ +#include +#include "flow.h" +#include +#include +#include +#include "buffer.h" +#include "hash.h" +#include "ip.h" +#include "mac.h" +#include "openflow.h" +#include "packets.h" + +#include "vlog.h" +#define THIS_MODULE VLM_flow + +void +flow_extract(const struct buffer *packet, uint16_t in_port, struct flow *flow) +{ + struct buffer b = *packet; + struct eth_header *eth; + + if (b.size < ETH_TOTAL_MIN) { + VLOG_WARN("packet length %d less than minimum size %d", + b.size, ETH_TOTAL_MIN); + } + + memset(flow, 0, sizeof *flow); + flow->in_port = htons(in_port); + + eth = buffer_at(&b, 0, sizeof *eth); + if (eth) { + buffer_pull(&b, ETH_HEADER_LEN); + if (ntohs(eth->eth_type) >= OFP_DL_TYPE_ETH2_CUTOFF) { + /* This is an Ethernet II frame */ + flow->dl_type = eth->eth_type; + } else { + /* This is an 802.2 frame */ + struct llc_snap_header *h = buffer_at(&b, 0, sizeof *h); + if (h == NULL) { + return; + } + if (h->llc.llc_dsap == LLC_DSAP_SNAP + && h->llc.llc_ssap == LLC_SSAP_SNAP + && h->llc.llc_cntl == LLC_CNTL_SNAP + && !memcmp(h->snap.snap_org, SNAP_ORG_ETHERNET, + sizeof h->snap.snap_org)) { + flow->dl_type = h->snap.snap_type; + buffer_pull(&b, sizeof *h); + } else { + flow->dl_type = OFP_DL_TYPE_NOT_ETH_TYPE; + buffer_pull(&b, sizeof(struct llc_header)); + } + } + + /* Check for a VLAN tag */ + if (flow->dl_type != htons(ETH_TYPE_VLAN)) { + flow->dl_vlan = htons(OFP_VLAN_NONE); + } else { + struct vlan_header *vh = buffer_at(&b, 0, sizeof *vh); + flow->dl_type = vh->vlan_next_type; + flow->dl_vlan = vh->vlan_tci & htons(VLAN_VID); + buffer_pull(&b, sizeof *vh); + } + memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); + memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); + + if (flow->dl_type == htons(ETH_TYPE_IP)) { + const struct ip_header *nh = buffer_at(&b, 0, sizeof *nh); + if (nh) { + flow->nw_src = nh->ip_src; + flow->nw_dst = nh->ip_dst; + flow->nw_proto = nh->ip_proto; + if (flow->nw_proto == IP_TYPE_TCP + || flow->nw_proto == IP_TYPE_UDP) { + int udp_ofs = IP_IHL(nh->ip_ihl_ver) * 4; + const struct udp_header *th + = buffer_at(&b, udp_ofs, sizeof *th); + if (th) { + flow->tp_src = th->udp_src; + flow->tp_dst = th->udp_dst; + } + } + } + } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + const struct arp_eth_header *ah = buffer_at(&b, 0, sizeof *ah); + if (ah && ah->ar_hrd == htons(ARP_HRD_ETHERNET) + && ah->ar_pro == htons(ARP_PRO_IP) + && ah->ar_hln == ETH_ADDR_LEN + && ah->ar_pln == sizeof flow->nw_src) + { + /* check if sha/tha match dl_src/dl_dst? */ + flow->nw_src = ah->ar_spa; + flow->nw_dst = ah->ar_tpa; + } + } + } +} + +void +flow_print(FILE *stream, const struct flow *flow) +{ + fprintf(stream, + "port%04x:vlan%04x mac"MAC_FMT"->"MAC_FMT" " + "proto%04x ip"IP_FMT"->"IP_FMT" port%d->%d", + ntohs(flow->in_port), ntohs(flow->dl_vlan), + MAC_ARGS(flow->dl_src), MAC_ARGS(flow->dl_dst), + ntohs(flow->dl_type), + IP_ARGS(&flow->nw_src), IP_ARGS(&flow->nw_dst), + ntohs(flow->tp_src), ntohs(flow->tp_dst)); +} + +int +flow_compare(const struct flow *a, const struct flow *b) +{ + return memcmp(a, b, sizeof *a); +} + +unsigned long int +flow_hash(const struct flow *flow, uint32_t basis) +{ + return hash_fnv(flow, sizeof *flow, basis); +} diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 00000000..784daa7f --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,13 @@ +#include "hash.h" + +uint32_t +hash_fnv(const void *p_, size_t n, uint32_t basis) +{ + const uint8_t *p = p_; + uint32_t hash = basis; + while (n--) { + hash *= HASH_FNV_PRIME; + hash ^= *p++; + } + return hash; +} diff --git a/lib/list.c b/lib/list.c new file mode 100644 index 00000000..379e8f8a --- /dev/null +++ b/lib/list.c @@ -0,0 +1,123 @@ +#include "list.h" +#include + +/* Initializes 'list' as an empty list. */ +void +list_init(struct list *list) +{ + list->next = list->prev = list; +} + +/* Inserts 'elem' just before 'before'. */ +void +list_insert(struct list *before, struct list *elem) +{ + elem->prev = before->prev; + elem->next = before; + before->prev->next = elem; + before->prev = elem; +} + +/* Removes elements 'first' though 'last' (exclusive) from their current list, + then inserts them just before 'before'. */ +void +list_splice(struct list *before, struct list *first, struct list *last) +{ + if (first == last) + return; + last = last->prev; + + /* Cleanly remove 'first'...'last' from its current list. */ + first->prev->next = last->next; + last->next->prev = first->prev; + + /* Splice 'first'...'last' into new list. */ + first->prev = before->prev; + last->next = before; + before->prev->next = first; + before->prev = last; +} + +/* Inserts 'elem' at the beginning of 'list', so that it becomes the front in + 'list'. */ +void +list_push_front(struct list *list, struct list *elem) +{ + list_insert(list->next, elem); +} + +/* Inserts 'elem' at the end of 'list', so that it becomes the back in + * 'list'. */ +void +list_push_back(struct list *list, struct list *elem) +{ + list_insert(list, elem); +} + +/* Removes 'elem' from its list and returns the element that followed it. + Undefined behavior if 'elem' is not in a list. */ +struct list * +list_remove(struct list *elem) +{ + elem->prev->next = elem->next; + elem->next->prev = elem->prev; + return elem->next; +} + +/* Removes the front element from 'list' and returns it. Undefined behavior if + 'list' is empty before removal. */ +struct list * +list_pop_front(struct list *list) +{ + struct list *front = list->next; + list_remove(front); + return front; +} + +/* Removes the back element from 'list' and returns it. + Undefined behavior if 'list' is empty before removal. */ +struct list * +list_pop_back(struct list *list) +{ + struct list *back = list->prev; + list_remove(back); + return back; +} + +/* Returns the front element in 'list'. + Undefined behavior if 'list' is empty. */ +struct list * +list_front(struct list *list) +{ + assert(!list_is_empty(list)); + return list->next; +} + +/* Returns the back element in 'list'. + Undefined behavior if 'list' is empty. */ +struct list * +list_back(struct list *list) +{ + assert(!list_is_empty(list)); + return list->prev; +} + +/* Returns the number of elements in 'list'. + Runs in O(n) in the number of elements. */ +size_t +list_size(const struct list *list) +{ + const struct list *e; + size_t cnt = 0; + + for (e = list->next; e != list; e = e->next) + cnt++; + return cnt; +} + +/* Returns true if 'list' is empty, false otherwise. */ +bool +list_is_empty(const struct list *list) +{ + return list->next == list; +} diff --git a/lib/netlink.c b/lib/netlink.c new file mode 100644 index 00000000..f625f94b --- /dev/null +++ b/lib/netlink.c @@ -0,0 +1,908 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "netlink.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "util.h" + +#include "vlog.h" +#define THIS_MODULE VLM_netlink + +/* Linux header file confusion causes this to be undefined. */ +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +/* Netlink sockets. */ + +struct nl_sock +{ + int fd; + uint32_t pid; +}; + +/* Next nlmsghdr sequence number. + * + * This implementation uses sequence numbers that are unique process-wide, to + * avoid a hypothetical race: send request, close socket, open new socket that + * reuses the old socket's PID value, send request on new socket, receive reply + * from kernel to old socket but with same PID and sequence number. (This race + * could be avoided other ways, e.g. by preventing PIDs from being quickly + * reused). */ +static uint32_t next_seq; + +static int alloc_pid(uint32_t *); +static void free_pid(uint32_t); + +/* Creates a new netlink socket for the given netlink 'protocol' + * (NETLINK_ROUTE, NETLINK_GENERIC, ...). Returns 0 and sets '*sockp' to the + * new socket if successful, otherwise returns a positive errno value. + * + * If 'multicast_group' is nonzero, the new socket subscribes to the specified + * netlink multicast group. (A netlink socket may listen to an arbitrary + * number of multicast groups, but so far we only need one at a time.) + * + * Nonzero 'so_sndbuf' or 'so_rcvbuf' override the kernel default send or + * receive buffer size, respectively. + */ +int +nl_sock_create(int protocol, int multicast_group, + size_t so_sndbuf, size_t so_rcvbuf, struct nl_sock **sockp) +{ + struct nl_sock *sock; + struct sockaddr_nl local, remote; + int retval = 0; + + if (next_seq == 0) { + /* Pick initial sequence number. */ + next_seq = getpid() ^ time(0); + } + + *sockp = NULL; + sock = malloc(sizeof *sock); + if (sock == NULL) { + return ENOMEM; + } + + sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol); + if (sock->fd < 0) { + VLOG_ERR("fcntl: %s", strerror(errno)); + goto error; + } + + retval = alloc_pid(&sock->pid); + if (retval) { + goto error; + } + + if (so_sndbuf != 0 + && setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, + &so_sndbuf, sizeof so_sndbuf) < 0) { + VLOG_ERR("setsockopt(SO_SNDBUF,%zu): %s", so_sndbuf, strerror(errno)); + goto error_free_pid; + } + + if (so_rcvbuf != 0 + && setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, + &so_rcvbuf, sizeof so_rcvbuf) < 0) { + VLOG_ERR("setsockopt(SO_RCVBUF,%zu): %s", so_rcvbuf, strerror(errno)); + goto error_free_pid; + } + + /* Bind local address as our selected pid. */ + memset(&local, 0, sizeof local); + local.nl_family = AF_NETLINK; + local.nl_pid = sock->pid; + if (multicast_group > 0 && multicast_group <= 32) { + /* This method of joining multicast groups is supported by old kernels, + * but it only allows 32 multicast groups per protocol. */ + local.nl_groups |= 1ul << (multicast_group - 1); + } + if (bind(sock->fd, (struct sockaddr *) &local, sizeof local) < 0) { + VLOG_ERR("bind(%"PRIu32"): %s", sock->pid, strerror(errno)); + goto error_free_pid; + } + + /* Bind remote address as the kernel (pid 0). */ + memset(&remote, 0, sizeof remote); + remote.nl_family = AF_NETLINK; + remote.nl_pid = 0; + if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) { + VLOG_ERR("connect(0): %s", strerror(errno)); + goto error_free_pid; + } + + /* This method of joining multicast groups is only supported by newish + * kernels, but it allows for an arbitrary number of multicast groups. */ + if (multicast_group > 32 + && setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &multicast_group, sizeof multicast_group) < 0) { + VLOG_ERR("setsockopt(NETLINK_ADD_MEMBERSHIP,%d): %s", + multicast_group, strerror(errno)); + goto error_free_pid; + } + + *sockp = sock; + return 0; + +error_free_pid: + free_pid(sock->pid); +error: + if (retval == 0) { + retval = errno; + if (retval == 0) { + retval = EINVAL; + } + } + if (sock->fd >= 0) { + close(sock->fd); + } + free(sock); + return retval; +} + +/* Destroys netlink socket 'sock'. */ +void +nl_sock_destroy(struct nl_sock *sock) +{ + if (sock) { + close(sock->fd); + free_pid(sock->pid); + free(sock); + } +} + +/* Tries to send 'msg', which must contain a Netlink message, to the kernel on + * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size before the + * message is sent. + * + * Returns 0 if successful, otherwise a positive errno value. If + * 'wait' is true, then the send will wait until buffer space is ready; + * otherwise, returns EAGAIN if the 'sock' send buffer is full. */ +int +nl_sock_send(struct nl_sock *sock, const struct buffer *msg, bool wait) +{ + int retval; + + nl_msg_nlmsghdr(msg)->nlmsg_len = msg->size; + do { + retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT); + } while (retval < 0 && errno == EINTR); + return retval < 0 ? errno : 0; +} + +/* Tries to send the 'n_iov' chunks of data in 'iov' to the kernel on 'sock' as + * a single Netlink message. (The message must be fully formed and not require + * finalization of its nlmsg_len field.) + * + * Returns 0 if successful, otherwise a positive errno value. If 'wait' is + * true, then the send will wait until buffer space is ready; otherwise, + * returns EAGAIN if the 'sock' send buffer is full. */ +int +nl_sock_sendv(struct nl_sock *sock, const struct iovec iov[], size_t n_iov, + bool wait) +{ + struct msghdr msg; + int retval; + + memset(&msg, 0, sizeof msg); + msg.msg_iov = (struct iovec *) iov; + msg.msg_iovlen = n_iov; + do { + retval = sendmsg(sock->fd, &msg, MSG_DONTWAIT); + } while (retval < 0 && errno == EINTR); + return retval < 0 ? errno : 0; +} + +/* Tries to receive a netlink message from the kernel on 'sock'. If + * successful, stores the received message into '*bufp' and returns 0. The + * caller is responsible for destroying the message with buffer_delete(). On + * failure, returns a positive errno value and stores a null pointer into + * '*bufp'. + * + * If 'wait' is true, nl_sock_recv waits for a message to be ready; otherwise, + * returns EAGAIN if the 'sock' receive buffer is empty. */ +int +nl_sock_recv(struct nl_sock *sock, struct buffer **bufp, bool wait) +{ + uint8_t tmp; + ssize_t bufsize = 2048; + ssize_t nbytes, nbytes2; + struct buffer *buf; + struct nlmsghdr *nlmsghdr; + struct iovec iov; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + + buf = buffer_new(bufsize); + *bufp = NULL; + +try_again: + /* Attempt to read the message. We don't know the size of the data + * yet, so we take a guess at 2048. If we're wrong, we keep trying + * and doubling the buffer size each time. + */ + nlmsghdr = buffer_put_uninit(buf, bufsize); + iov.iov_base = nlmsghdr; + iov.iov_len = bufsize; + do { + nbytes = recvmsg(sock->fd, &msg, (wait ? 0 : MSG_DONTWAIT) | MSG_PEEK); + } while (nbytes < 0 && errno == EINTR); + if (nbytes < 0) { + buffer_delete(buf); + return errno; + } + if (msg.msg_flags & MSG_TRUNC) { + bufsize *= 2; + buffer_reinit(buf, bufsize); + goto try_again; + } + buf->size = nbytes; + + /* We successfully read the message, so recv again to clear the queue */ + iov.iov_base = &tmp; + iov.iov_len = 1; + do { + nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT); + if (nbytes2 < 0) { + VLOG_ERR("failed to remove nlmsg from socket: %d\n", errno); + } + } while (nbytes2 < 0 && errno == EINTR); + + if (!NLMSG_OK(nlmsghdr, nbytes)) { + VLOG_ERR("received invalid nlmsg (%zd bytes < %d)", + bufsize, NLMSG_HDRLEN); + buffer_delete(buf); + return EPROTO; + } + *bufp = buf; + return 0; +} + +/* Sends 'request' to the kernel via 'sock' and waits for a response. If + * successful, stores the reply into '*replyp' and returns 0. The caller is + * responsible for destroying the reply with buffer_delete(). On failure, + * returns a positive errno value and stores a null pointer into '*replyp'. + * + * Bare Netlink is an unreliable transport protocol. This function layers + * reliable delivery and reply semantics on top of bare Netlink. + * + * In Netlink, sending a request to the kernel is reliable enough, because the + * kernel will tell us if the message cannot be queued (and we will in that + * case put it on the transmit queue and wait until it can be delivered). + * + * Receiving the reply is the real problem: if the socket buffer is full when + * the kernel tries to send the reply, the reply will be dropped. However, the + * kernel sets a flag that a reply has been dropped. The next call to recv + * then returns ENOBUFS. We can then re-send the request. + * + * Caveats: + * + * 1. Netlink depends on sequence numbers to match up requests and + * replies. The sender of a request supplies a sequence number, and + * the reply echos back that sequence number. + * + * This is fine, but (1) some kernel netlink implementations are + * broken, in that they fail to echo sequence numbers and (2) this + * function will drop packets with non-matching sequence numbers, so + * that only a single request can be usefully transacted at a time. + * + * 2. Resending the request causes it to be re-executed, so the request + * needs to be idempotent. + */ +int +nl_sock_transact(struct nl_sock *sock, + const struct buffer *request, struct buffer **replyp) +{ + uint32_t seq = nl_msg_nlmsghdr(request)->nlmsg_seq; + struct nlmsghdr *nlmsghdr; + struct buffer *reply; + int retval; + + *replyp = NULL; + + /* Ensure that we get a reply even if this message doesn't ordinarily call + * for one. */ + nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_ACK; + +send: + retval = nl_sock_send(sock, request, true); + if (retval) { + return retval; + } + +recv: + retval = nl_sock_recv(sock, &reply, true); + if (retval) { + if (retval == ENOBUFS) { + VLOG_DBG("receive buffer overflow, resending request"); + goto send; + } else { + return retval; + } + } + nlmsghdr = nl_msg_nlmsghdr(reply); + if (seq != nlmsghdr->nlmsg_seq) { + VLOG_DBG("ignoring seq %"PRIu32" != expected %"PRIu32, + nl_msg_nlmsghdr(reply)->nlmsg_seq, seq); + buffer_delete(reply); + goto recv; + } + if (nl_msg_nlmsgerr(reply, &retval)) { + if (retval) { + VLOG_DBG("received NAK error=%d (%s)", retval, strerror(retval)); + } + return retval != EAGAIN ? retval : EPROTO; + } + + *replyp = reply; + return 0; +} + +/* Returns 'sock''s underlying file descriptor. */ +int +nl_sock_fd(const struct nl_sock *sock) +{ + return sock->fd; +} + +/* Netlink messages. */ + +/* Returns the nlmsghdr at the head of 'msg'. + * + * 'msg' must be at least as large as a nlmsghdr. */ +struct nlmsghdr * +nl_msg_nlmsghdr(const struct buffer *msg) +{ + return buffer_at_assert(msg, 0, NLMSG_HDRLEN); +} + +/* Returns the genlmsghdr just past 'msg''s nlmsghdr. + * + * Returns a null pointer if 'msg' is not large enough to contain an nlmsghdr + * and a genlmsghdr. */ +struct genlmsghdr * +nl_msg_genlmsghdr(const struct buffer *msg) +{ + return buffer_at(msg, NLMSG_HDRLEN, GENL_HDRLEN); +} + +/* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK + * message, otherwise a positive errno value, and returns true. If 'buffer' is + * not an NLMSG_ERROR message, returns false. + * + * 'msg' must be at least as large as a nlmsghdr. */ +bool +nl_msg_nlmsgerr(const struct buffer *msg, int *errorp) +{ + if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = buffer_at(msg, NLMSG_HDRLEN, sizeof *err); + int code = EPROTO; + if (!err) { + VLOG_ERR("received invalid nlmsgerr (%zd bytes < %zd)", + msg->size, NLMSG_HDRLEN + sizeof *err); + } else if (err->error <= 0 && err->error > INT_MIN) { + code = -err->error; + } + if (errorp) { + *errorp = code; + } + return true; + } else { + return false; + } +} + +/* Ensures that 'b' has room for at least 'size' bytes plus netlink pading at + * its tail end, reallocating and copying its data if necessary. */ +void +nl_msg_reserve(struct buffer *msg, size_t size) +{ + buffer_reserve_tailroom(msg, NLMSG_ALIGN(size)); +} + +/* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty. + * Uses the given 'type' and 'flags'. 'sock' is used to obtain a PID and + * sequence number for proper routing of replies. 'expected_payload' should be + * an estimate of the number of payload bytes to be supplied; if the size of + * the payload is unknown a value of 0 is acceptable. + * + * 'type' is ordinarily an enumerated value specific to the Netlink protocol + * (e.g. RTM_NEWLINK, for NETLINK_ROUTE protocol). For Generic Netlink, 'type' + * is the family number obtained via nl_lookup_genl_family(). + * + * 'flags' is a bit-mask that indicates what kind of request is being made. It + * is often NLM_F_REQUEST indicating that a request is being made, commonly + * or'd with NLM_F_ACK to request an acknowledgement. + * + * nl_msg_put_genlmsghdr is more convenient for composing a Generic Netlink + * message. */ +void +nl_msg_put_nlmsghdr(struct buffer *msg, struct nl_sock *sock, + size_t expected_payload, uint32_t type, uint32_t flags) +{ + struct nlmsghdr *nlmsghdr; + + assert(msg->size == 0); + + nl_msg_reserve(msg, NLMSG_HDRLEN + expected_payload); + nlmsghdr = nl_msg_put_uninit(msg, NLMSG_HDRLEN); + nlmsghdr->nlmsg_len = 0; + nlmsghdr->nlmsg_type = type; + nlmsghdr->nlmsg_flags = flags; + nlmsghdr->nlmsg_seq = ++next_seq; + nlmsghdr->nlmsg_pid = sock->pid; +} + +/* Puts a nlmsghdr and genlmsghdr at the beginning of 'msg', which must be + * initially empty. 'sock' is used to obtain a PID and sequence number for + * proper routing of replies. 'expected_payload' should be an estimate of the + * number of payload bytes to be supplied; if the size of the payload is + * unknown a value of 0 is acceptable. + * + * 'family' is the family number obtained via nl_lookup_genl_family(). + * + * 'flags' is a bit-mask that indicates what kind of request is being made. It + * is often NLM_F_REQUEST indicating that a request is being made, commonly + * or'd with NLM_F_ACK to request an acknowledgement. + * + * 'cmd' is an enumerated value specific to the Generic Netlink family + * (e.g. CTRL_CMD_NEWFAMILY for the GENL_ID_CTRL family). + * + * 'version' is a version number specific to the family and command (often 1). + * + * nl_msg_put_nlmsghdr should be used to compose Netlink messages that are not + * Generic Netlink messages. */ +void +nl_msg_put_genlmsghdr(struct buffer *msg, struct nl_sock *sock, + size_t expected_payload, int family, uint32_t flags, + uint8_t cmd, uint8_t version) +{ + struct genlmsghdr *genlmsghdr; + + nl_msg_put_nlmsghdr(msg, sock, GENL_HDRLEN + expected_payload, + family, flags); + assert(msg->size == NLMSG_HDRLEN); + genlmsghdr = nl_msg_put_uninit(msg, GENL_HDRLEN); + genlmsghdr->cmd = cmd; + genlmsghdr->version = version; + genlmsghdr->reserved = 0; +} + +/* Appends the 'size' bytes of data in 'p', plus Netlink padding if needed, to + * the tail end of 'msg'. Data in 'msg' is reallocated and copied if + * necessary. */ +void +nl_msg_put(struct buffer *msg, const void *data, size_t size) +{ + memcpy(nl_msg_put_uninit(msg, size), data, size); +} + +/* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail + * end of 'msg', reallocating and copying its data if necessary. Returns a + * pointer to the first byte of the new data, which is left uninitialized. */ +void * +nl_msg_put_uninit(struct buffer *msg, size_t size) +{ + size_t pad = NLMSG_ALIGN(size) - size; + char *p = buffer_put_uninit(msg, size + pad); + if (pad) { + memset(p + size, 0, pad); + } + return p; +} + +/* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of + * data as its payload, plus Netlink padding if needed, to the tail end of + * 'msg', reallocating and copying its data if necessary. Returns a pointer to + * the first byte of data in the attribute, which is left uninitialized. */ +void * +nl_msg_put_unspec_uninit(struct buffer *msg, uint16_t type, size_t size) +{ + size_t total_size = NLA_HDRLEN + size; + struct nlattr* nla = nl_msg_put_uninit(msg, total_size); + assert(NLA_ALIGN(total_size) <= UINT16_MAX); + nla->nla_len = total_size; + nla->nla_type = type; + return nla + 1; +} + +/* Appends a Netlink attribute of the given 'type' and the 'size' bytes of + * 'data' as its payload, to the tail end of 'msg', reallocating and copying + * its data if necessary. Returns a pointer to the first byte of data in the + * attribute, which is left uninitialized. */ +void +nl_msg_put_unspec(struct buffer *msg, uint16_t type, + const void *data, size_t size) +{ + memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size); +} + +/* Appends a Netlink attribute of the given 'type' and no payload to 'msg'. + * (Some Netlink protocols use the presence or absence of an attribute as a + * Boolean flag.) */ +void +nl_msg_put_flag(struct buffer *msg, uint16_t type) +{ + nl_msg_put_unspec(msg, type, NULL, 0); +} + +/* Appends a Netlink attribute of the given 'type' and the given 8-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u8(struct buffer *msg, uint16_t type, uint8_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 16-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u16(struct buffer *msg, uint16_t type, uint16_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 32-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u32(struct buffer *msg, uint16_t type, uint32_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 64-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u64(struct buffer *msg, uint16_t type, uint64_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given + * null-terminated string 'value' to 'msg'. */ +void +nl_msg_put_string(struct buffer *msg, uint16_t type, const char *value) +{ + nl_msg_put_unspec(msg, type, value, strlen(value) + 1); +} + +/* Appends a Netlink attribute of the given 'type' and the given buffered + * netlink message in 'nested_msg' to 'msg'. The nlmsg_len field in + * 'nested_msg' is finalized to match 'nested_msg->size'. */ +void +nl_msg_put_nested(struct buffer *msg, + uint16_t type, struct buffer *nested_msg) +{ + nl_msg_nlmsghdr(nested_msg)->nlmsg_len = nested_msg->size; + nl_msg_put_unspec(msg, type, nested_msg->data, nested_msg->size); +} + +/* Returns the first byte in the payload of attribute 'nla'. */ +const void * +nl_attr_get(const struct nlattr *nla) +{ + assert(nla->nla_len >= NLA_HDRLEN); + return nla + 1; +} + +/* Returns the number of bytes in the payload of attribute 'nla'. */ +size_t +nl_attr_get_size(const struct nlattr *nla) +{ + assert(nla->nla_len >= NLA_HDRLEN); + return nla->nla_len - NLA_HDRLEN; +} + +/* Asserts that 'nla''s payload is at least 'size' bytes long, and returns the + * first byte of the payload. */ +const void * +nl_attr_get_unspec(const struct nlattr *nla, size_t size) +{ + assert(nla->nla_len >= NLA_HDRLEN + size); + return nla + 1; +} + +/* Returns true if 'nla' is nonnull. (Some Netlink protocols use the presence + * or absence of an attribute as a Boolean flag.) */ +bool +nl_attr_get_flag(const struct nlattr *nla) +{ + return nla != NULL; +} + +#define NL_ATTR_GET_AS(NLA, TYPE) \ + (*(TYPE*) nl_attr_get_unspec(nla, sizeof(TYPE))) + +/* Returns the 8-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 1 byte long. */ +uint8_t +nl_attr_get_u8(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint8_t); +} + +/* Returns the 16-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 2 bytes long. */ +uint16_t +nl_attr_get_u16(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint16_t); +} + +/* Returns the 32-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 4 bytes long. */ +uint32_t +nl_attr_get_u32(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint32_t); +} + +/* Returns the 64-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 8 bytes long. */ +uint64_t +nl_attr_get_u64(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint64_t); +} + +/* Returns the null-terminated string value in 'nla''s payload. + * + * Asserts that 'nla''s payload contains a null-terminated string. */ +const char * +nl_attr_get_string(const struct nlattr *nla) +{ + assert(nla->nla_len > NLA_HDRLEN); + assert(memchr(nl_attr_get(nla), '\0', nla->nla_len - NLA_HDRLEN) != NULL); + return nl_attr_get(nla); +} + +/* Default minimum and maximum payload sizes for each type of attribute. */ +static const size_t attr_len_range[][2] = { + [0 ... N_NL_ATTR_TYPES - 1] = { 0, SIZE_MAX }, + [NL_A_U8] = { 1, 1 }, + [NL_A_U16] = { 2, 2 }, + [NL_A_U32] = { 4, 4 }, + [NL_A_U64] = { 8, 8 }, + [NL_A_STRING] = { 1, SIZE_MAX }, + [NL_A_FLAG] = { 0, SIZE_MAX }, + [NL_A_NESTED] = { NLMSG_HDRLEN, SIZE_MAX }, +}; + +/* Parses the Generic Netlink payload of 'msg' as a sequence of Netlink + * attributes. 'policy[i]', for 0 <= i < n_attrs, specifies how the attribute + * with nla_type == i is parsed; a pointer to attribute i is stored in + * attrs[i]. Returns true if successful, false on failure. */ +bool +nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[], + struct nlattr *attrs[], size_t n_attrs) +{ + void *p, *tail; + size_t n_required; + size_t i; + + n_required = 0; + for (i = 0; i < n_attrs; i++) { + attrs[i] = NULL; + + assert(policy[i].type < N_NL_ATTR_TYPES); + if (policy[i].type != NL_A_NO_ATTR + && policy[i].type != NL_A_FLAG + && !policy[i].optional) { + n_required++; + } + } + + p = buffer_at(msg, NLMSG_HDRLEN + GENL_HDRLEN, 0); + if (p == NULL) { + VLOG_DBG("missing headers in nl_policy_parse"); + return false; + } + tail = buffer_tail(msg); + + while (p < tail) { + size_t offset = p - msg->data; + struct nlattr *nla = p; + size_t len, aligned_len; + uint16_t type; + + /* Make sure its claimed length is plausible. */ + if (nla->nla_len < NLA_HDRLEN) { + VLOG_DBG("%zu: attr shorter than NLA_HDRLEN (%"PRIu16")", + offset, nla->nla_len); + return false; + } + len = nla->nla_len - NLA_HDRLEN; + aligned_len = NLA_ALIGN(len); + if (aligned_len > tail - p) { + VLOG_DBG("%zu: attr %"PRIu16" aligned data len (%zu) " + "> bytes left (%tu)", + offset, nla->nla_type, aligned_len, tail - p); + return false; + } + + type = nla->nla_type; + if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) { + const struct nl_policy *p = &policy[type]; + size_t min_len, max_len; + + /* Validate length and content. */ + min_len = p->min_len ? p->min_len : attr_len_range[p->type][0]; + max_len = p->max_len ? p->max_len : attr_len_range[p->type][1]; + if (len < min_len || len > max_len) { + VLOG_DBG("%zu: attr %"PRIu16" length %zu not in allowed range " + "%zu...%zu", offset, type, len, min_len, max_len); + return false; + } + if (p->type == NL_A_STRING) { + if (((char *) nla)[nla->nla_len - 1]) { + VLOG_DBG("%zu: attr %"PRIu16" lacks null terminator", + offset, type); + return false; + } + if (memchr(nla + 1, '\0', len - 1) != NULL) { + VLOG_DBG("%zu: attr %"PRIu16" lies about string length", + offset, type); + return false; + } + } + if (!p->optional && attrs[type] == NULL) { + assert(n_required > 0); + --n_required; + } + attrs[type] = nla; + } else { + /* Skip attribute type that we don't care about. */ + } + p += NLA_ALIGN(nla->nla_len); + } + if (n_required) { + VLOG_DBG("%zu required attrs missing", n_required); + return false; + } + return true; +} + +/* Miscellaneous. */ + +static const struct nl_policy family_policy[CTRL_ATTR_MAX + 1] = { + [CTRL_ATTR_FAMILY_ID] = {.type = NL_A_U16}, +}; + +static int do_lookup_genl_family(const char *name) +{ + struct nl_sock *sock; + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(family_policy)]; + int retval; + + retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + if (retval) { + return -retval; + } + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, sock, 0, GENL_ID_CTRL, NLM_F_REQUEST, + CTRL_CMD_GETFAMILY, 1); + nl_msg_put_string(&request, CTRL_ATTR_FAMILY_NAME, name); + retval = nl_sock_transact(sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + nl_sock_destroy(sock); + return -retval; + } + + if (!nl_policy_parse(reply, family_policy, attrs, + ARRAY_SIZE(family_policy))) { + nl_sock_destroy(sock); + buffer_delete(reply); + return -EPROTO; + } + + retval = nl_attr_get_u16(attrs[CTRL_ATTR_FAMILY_ID]); + if (retval == 0) { + retval = -EPROTO; + } + nl_sock_destroy(sock); + buffer_delete(reply); + return retval; +} + +/* If '*number' is 0, translates the given Generic Netlink family 'name' to a + * number and stores it in '*number'. If successful, returns 0 and the caller + * may use '*number' as the family number. On failure, returns a positive + * errno value and '*number' caches the errno value. */ +int +nl_lookup_genl_family(const char *name, int *number) +{ + if (*number == 0) { + *number = do_lookup_genl_family(name); + assert(*number != 0); + } + return *number > 0 ? 0 : -*number; +} + +/* Netlink PID. + * + * Every Netlink socket must be bound to a unique 32-bit PID. By convention, + * programs that have a single Netlink socket use their Unix process ID as PID, + * and programs with multiple Netlink sockets add a unique per-socket + * identifier in the bits above the Unix process ID. + * + * The kernel has Netlink PID 0. + */ + +/* Parameters for how many bits in the PID should come from the Unix process ID + * and how many unique per-socket. */ +#define SOCKET_BITS 10 +#define MAX_SOCKETS (1u << SOCKET_BITS) + +#define PROCESS_BITS (32 - SOCKET_BITS) +#define MAX_PROCESSES (1u << PROCESS_BITS) +#define PROCESS_MASK ((uint32_t) (MAX_PROCESSES - 1)) + +/* Bit vector of unused socket identifiers. */ +static uint32_t avail_sockets[ROUND_UP(MAX_SOCKETS, 32)]; + +/* Allocates and returns a new Netlink PID. */ +static int +alloc_pid(uint32_t *pid) +{ + int i; + + for (i = 0; i < MAX_SOCKETS; i++) { + if ((avail_sockets[i / 32] & (1u << (i % 32))) == 0) { + avail_sockets[i / 32] |= 1u << (i % 32); + *pid = (getpid() & PROCESS_MASK) | (i << PROCESS_BITS); + return 0; + } + } + VLOG_ERR("netlink pid space exhausted"); + return ENOBUFS; +} + +/* Makes the specified 'pid' available for reuse. */ +static void +free_pid(uint32_t pid) +{ + int sock = pid >> PROCESS_BITS; + assert(avail_sockets[sock / 32] & (1u << (sock % 32))); + avail_sockets[sock / 32] &= ~(1u << (sock % 32)); +} diff --git a/lib/ofp-print.c b/lib/ofp-print.c new file mode 100644 index 00000000..f8bbc5fb --- /dev/null +++ b/lib/ofp-print.c @@ -0,0 +1,471 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ofp-print.h" +#include "xtoxll.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "ip.h" +#include "mac.h" +#include "compiler.h" +#include "util.h" +#include "openflow.h" + +/* Dumps the contents of the Ethernet frame in the 'len' bytes starting at + * 'data' to 'stream' using tcpdump. 'total_len' specifies the full length of + * the Ethernet frame (of which 'len' bytes were captured). + * + * This starts and kills a tcpdump subprocess so it's quite expensive. */ +void ofp_print_packet(FILE *stream, const void *data, size_t len, + size_t total_len) +{ + struct pcap_hdr { + uint32_t magic_number; /* magic number */ + uint16_t version_major; /* major version number */ + uint16_t version_minor; /* minor version number */ + int32_t thiszone; /* GMT to local correction */ + uint32_t sigfigs; /* accuracy of timestamps */ + uint32_t snaplen; /* max length of captured packets */ + uint32_t network; /* data link type */ + } PACKED; + + struct pcaprec_hdr { + uint32_t ts_sec; /* timestamp seconds */ + uint32_t ts_usec; /* timestamp microseconds */ + uint32_t incl_len; /* number of octets of packet saved in file */ + uint32_t orig_len; /* actual length of packet */ + } PACKED; + + struct pcap_hdr ph; + struct pcaprec_hdr prh; + + char command[128]; + FILE *tcpdump; + int status; + + fflush(stream); + snprintf(command, sizeof command, "tcpdump -n -r - %d>&1 2>/dev/null", + fileno(stream)); + tcpdump = popen(command, "w"); + if (!tcpdump) { + error(errno, "exec(\"%s\")", command); + return; + } + + /* The pcap reader is responsible for figuring out endianness based on the + * magic number, so the lack of htonX calls here is intentional. */ + ph.magic_number = 0xa1b2c3d4; + ph.version_major = 2; + ph.version_minor = 4; + ph.thiszone = 0; + ph.sigfigs = 0; + ph.snaplen = 1518; + ph.network = 1; /* Ethernet */ + + prh.ts_sec = 0; + prh.ts_usec = 0; + prh.incl_len = len; + prh.orig_len = total_len; + + fwrite(&ph, 1, sizeof ph, tcpdump); + fwrite(&prh, 1, sizeof prh, tcpdump); + fwrite(data, 1, len, tcpdump); + + fflush(tcpdump); + if (ferror(tcpdump)) + error(errno, "error writing \"%s\" subprocess", command); + + status = pclose(tcpdump); + if (WIFEXITED(status)) { + if (WEXITSTATUS(status)) + error(0, "tcpdump exited with status %d", WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + error(0, "tcpdump exited with signal %d", WTERMSIG(status)); + } +} + +/* Pretty-print the OFPT_PACKET_IN packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +static void ofp_packet_in(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_packet_in *op = oh; + size_t data_len; + + fprintf(stream, " total_len=%"PRIu16" in_port=%"PRIu8, + ntohs(op->total_len), ntohs(op->in_port)); + + if (op->reason == OFPR_ACTION) + fputs(" (via action)", stream); + else if (op->reason != OFPR_NO_MATCH) + fprintf(stream, " (***reason %"PRIu8"***)", op->reason); + + data_len = len - offsetof(struct ofp_packet_in, data); + fprintf(stream, " data_len=%zu", data_len); + if (htonl(op->buffer_id) == UINT32_MAX) { + fprintf(stream, " (unbuffered)"); + if (ntohs(op->total_len) != data_len) + fprintf(stream, " (***total_len != data_len***)"); + } else { + fprintf(stream, " buffer=%08"PRIx32, ntohl(op->buffer_id)); + if (ntohs(op->total_len) < data_len) + fprintf(stream, " (***total_len < data_len***)"); + } + putc('\n', stream); + + if (verbosity > 0) + ofp_print_packet(stream, op->data, data_len, ntohs(op->total_len)); +} + +static void ofp_print_port_name(FILE *stream, uint16_t port) +{ + if (port == UINT16_MAX) { + fputs("none", stream); + } else if (port == OFPP_FLOOD) { + fputs("flood", stream); + } else if (port == OFPP_CONTROLLER) { + fputs("controller", stream); + } else { + fprintf(stream, "%"PRIu16, port); + } +} + +static void ofp_print_action(FILE *stream, const struct ofp_action *a) +{ + switch (ntohs(a->type)) { + case OFPAT_OUTPUT: + fputs("output(", stream); + ofp_print_port_name(stream, ntohs(a->arg.output.port)); + if (a->arg.output.port == htons(OFPP_CONTROLLER)) { + fprintf(stream, ", max %"PRIu16" bytes", ntohs(a->arg.output.max_len)); + } + fputs(")", stream); + break; + + default: + fprintf(stream, "(decoder %"PRIu16" not implemented)", ntohs(a->type)); + break; + } +} + +static void ofp_print_actions(FILE *stream, + const struct ofp_action actions[], + size_t n_bytes) +{ + size_t i; + + fputs(" actions[", stream); + for (i = 0; i < n_bytes / sizeof *actions; i++) { + if (i) { + fputs("; ", stream); + } + ofp_print_action(stream, &actions[i]); + } + if (n_bytes % sizeof *actions) { + if (i) { + fputs("; ", stream); + } + fputs("; ***trailing garbage***", stream); + } + fputs("]", stream); +} + +/* Pretty-print the OFPT_PACKET_OUT packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +static void ofp_packet_out(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_packet_out *opo = oh; + + fputs(" in_port=", stream); + ofp_print_port_name(stream, ntohs(opo->in_port)); + + if (ntohl(opo->buffer_id) == UINT32_MAX) { + fputs(" out_port=", stream); + ofp_print_port_name(stream, ntohs(opo->out_port)); + if (verbosity > 0 && len > sizeof *opo) { + ofp_print_packet(stream, opo->u.data, len - sizeof *opo, + len - sizeof *opo); + } + } else { + fprintf(stream, " buffer=%08"PRIx32, ntohl(opo->buffer_id)); + ofp_print_actions(stream, opo->u.actions, len - sizeof *opo); + } + putc('\n', stream); +} + +/* qsort comparison function. */ +static int +compare_ports(const void *a_, const void *b_) +{ + const struct ofp_phy_port *a = a_; + const struct ofp_phy_port *b = b_; + uint16_t ap = ntohs(a->port_no); + uint16_t bp = ntohs(b->port_no); + + return ap < bp ? -1 : ap > bp; +} + +static +void ofp_print_phy_port(FILE *stream, const struct ofp_phy_port *port) +{ + uint8_t name[OFP_MAX_PORT_NAME_LEN]; + int j; + + memcpy(name, port->name, sizeof name); + for (j = 0; j < sizeof name - 1; j++) { + if (!isprint(name[j])) { + break; + } + } + name[j] = '\0'; + + fprintf(stream, " %2d(%s): addr:"MAC_FMT", speed:%d, flags:%#x, " + "feat:%#x\n", ntohs(port->port_no), name, + MAC_ARGS(port->hw_addr), ntohl(port->speed), ntohl(port->flags), + ntohl(port->features)); +} + +/* Pretty-print the OFPT_DATA_HELLO packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_data_hello(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_data_hello *odh = oh; + struct ofp_phy_port port_list[OFPP_MAX]; + int n_ports; + int i; + + + fprintf(stream, "dp id:%"PRIx64"\n", ntohll(odh->datapath_id)); + fprintf(stream, "tables: exact:%d, mac:%d, compressed:%d, general:%d\n", + ntohl(odh->n_exact), ntohl(odh->n_mac_only), + ntohl(odh->n_compression), ntohl(odh->n_general)); + fprintf(stream, "buffers: size:%d, number:%d, miss_len:%d\n", + ntohl(odh->buffer_mb), ntohl(odh->n_buffers), + ntohs(odh->miss_send_len)); + fprintf(stream, "features: capabilities:%#x, actions:%#x\n", + ntohl(odh->capabilities), ntohl(odh->actions)); + + if (ntohs(odh->header.length) >= sizeof *odh) { + len = MIN(len, ntohs(odh->header.length)); + } + n_ports = (len - sizeof *odh) / sizeof *odh->ports; + + memcpy(port_list, odh->ports, (len - sizeof *odh)); + qsort(port_list, n_ports, sizeof port_list[0], compare_ports); + for (i = 0; i < n_ports; i++) { + ofp_print_phy_port(stream, &port_list[i]); + } +} + +static void print_wild(FILE *stream, const char *leader, int is_wild, + const char *format, ...) __attribute__((format(printf, 4, 5))); + +static void print_wild(FILE *stream, const char *leader, int is_wild, + const char *format, ...) +{ + fputs(leader, stream); + if (!is_wild) { + va_list args; + + va_start(args, format); + vfprintf(stream, format, args); + va_end(args); + } else { + putc('?', stream); + } +} + +/* Pretty-print the ofp_match structure */ +static void ofp_print_match(FILE *f, const struct ofp_match *om) +{ + uint16_t w = ntohs(om->wildcards); + + print_wild(f, "inport", w & OFPFW_IN_PORT, "%04x", ntohs(om->in_port)); + print_wild(f, ":vlan", w & OFPFW_DL_VLAN, "%04x", ntohs(om->dl_vlan)); + print_wild(f, " mac[", w & OFPFW_DL_SRC, MAC_FMT, MAC_ARGS(om->dl_src)); + print_wild(f, "->", w & OFPFW_DL_DST, MAC_FMT, MAC_ARGS(om->dl_dst)); + print_wild(f, "] type", w & OFPFW_DL_TYPE, "%04x", ntohs(om->dl_type)); + print_wild(f, " ip[", w & OFPFW_NW_SRC, IP_FMT, IP_ARGS(&om->nw_src)); + print_wild(f, "->", w & OFPFW_NW_DST, IP_FMT, IP_ARGS(&om->nw_dst)); + print_wild(f, "] proto", w & OFPFW_NW_PROTO, "%u", om->nw_proto); + print_wild(f, " tport[", w & OFPFW_TP_SRC, "%d", ntohs(om->tp_src)); + print_wild(f, "->", w & OFPFW_TP_DST, "%d", ntohs(om->tp_dst)); + fputs("]\n", f); +} + +/* Pretty-print the OFPT_FLOW_MOD packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_flow_mod(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_flow_mod *ofm = oh; + + ofp_print_match(stream, &ofm->match); + fprintf(stream, " cmd:%d idle:%d buf:%#x grp:%d\n", ntohs(ofm->command), + ntohs(ofm->max_idle), ntohl(ofm->buffer_id), ntohl(ofm->group_id)); +} + +/* Pretty-print the OFPT_FLOW_EXPIRED packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_flow_expired(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_flow_expired *ofe = oh; + + ofp_print_match(stream, &ofe->match); + fprintf(stream, + " secs%d pkts%lld bytes%lld\n", ntohl(ofe->duration), + ntohll(ofe->packet_count), ntohll(ofe->byte_count)); +} + +/* Pretty-print the OFPT_PORT_STATUS packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_port_status(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_port_status *ops = oh; + + if (ops->reason == OFPPR_ADD) { + fprintf(stream, "add:"); + } else if (ops->reason == OFPPR_DELETE) { + fprintf(stream, "del:"); + } else if (ops->reason == OFPPR_MOD) { + fprintf(stream, "mod:"); + } else { + fprintf(stream, "err:"); + } + + ofp_print_phy_port(stream, &ops->desc); +} + +struct openflow_packet { + const char *name; + size_t min_size; + void (*printer)(FILE *, const void *, size_t len, int verbosity); +}; + +static const struct openflow_packet packets[] = { + [OFPT_CONTROL_HELLO] = { + "ofp_control_hello", + sizeof (struct ofp_control_hello), + NULL, + }, + [OFPT_DATA_HELLO] = { + "ofp_data_hello", + sizeof (struct ofp_data_hello), + ofp_print_data_hello, + }, + [OFPT_PACKET_IN] = { + "ofp_packet_in", + offsetof(struct ofp_packet_in, data), + ofp_packet_in, + }, + [OFPT_PACKET_OUT] = { + "ofp_packet_out", + sizeof (struct ofp_packet_out), + ofp_packet_out, + }, + [OFPT_FLOW_MOD] = { + "ofp_flow_mod", + sizeof (struct ofp_flow_mod), + ofp_print_flow_mod, + }, + [OFPT_FLOW_EXPIRED] = { + "ofp_flow_expired", + sizeof (struct ofp_flow_expired), + ofp_print_flow_expired, + }, + [OFPT_PORT_MOD] = { + "ofp_port_mod", + sizeof (struct ofp_port_mod), + NULL, + }, + [OFPT_PORT_STATUS] = { + "ofp_port_status", + sizeof (struct ofp_port_status), + ofp_print_port_status + }, +}; + +/* Pretty-print the OpenFlow packet of 'len' bytes at 'oh' to 'stream' at the + * given 'verbosity' level. 0 is a minimal amount of verbosity and higher + * numbers increase verbosity. */ +void ofp_print(FILE *stream, const void *oh_, size_t len, int verbosity) +{ + const struct ofp_header *oh = oh_; + const struct openflow_packet *pkt; + + if (len < sizeof(struct ofp_header)) { + fprintf(stream, "OpenFlow packet too short:\n"); + hex_dump(stream, oh, len, 0, true); + return; + } else if (oh->version != 1) { + fprintf(stream, "Bad OpenFlow version %"PRIu8":\n", oh->version); + hex_dump(stream, oh, len, 0, true); + return; + } else if (oh->type >= ARRAY_SIZE(packets) || !packets[oh->type].name) { + fprintf(stream, "Unknown OpenFlow packet type %"PRIu8":\n", + oh->type); + hex_dump(stream, oh, len, 0, true); + return; + } + + pkt = &packets[oh->type]; + fprintf(stream, "%s (xid=%"PRIx32"):", pkt->name, oh->xid); + + if (ntohs(oh->length) > len) + fprintf(stream, " (***truncated to %zu bytes from %"PRIu16"***)", + len, ntohs(oh->length)); + else if (ntohs(oh->length) < len) { + fprintf(stream, " (***only uses %"PRIu16" bytes out of %zu***)\n", + ntohs(oh->length), len); + len = ntohs(oh->length); + } + + if (len < pkt->min_size) { + fprintf(stream, " (***length=%zu < min_size=%zu***)\n", + len, pkt->min_size); + } else if (!pkt->printer) { + fprintf(stream, " length=%zu (decoder not implemented)\n", + ntohs(oh->length)); + } else { + pkt->printer(stream, oh, len, verbosity); + } + if (verbosity >= 3) + hex_dump(stream, oh, len, 0, true); +} + +/* Pretty print a openflow table */ +void ofp_print_table(FILE *stream, const struct ofp_table* ot) +{ + fprintf(stream, "id: %d name: %-8s n_flows: %6d max_flows: %6d", + ntohs(ot->table_id), ot->name, ntohl(ot->n_flows), + ntohl(ot->max_flows)); +} diff --git a/lib/socket-util.c b/lib/socket-util.c new file mode 100644 index 00000000..3397fdb3 --- /dev/null +++ b/lib/socket-util.c @@ -0,0 +1,65 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "socket-util.h" +#include +#include +#include +#include +#include + +#include "vlog.h" +#define THIS_MODULE VLM_socket_util + +/* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a + * positive errno value. */ +int +set_nonblocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, 0); + if (flags != -1) { + return fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1 ? 0 : errno; + } else { + return errno; + } +} + +/* Translates 'host_name', which may be a DNS name or an IP address, into a + * numeric IP address in '*addr'. Returns 0 if successful, otherwise a + * positive errno value. */ +int +lookup_ip(const char *host_name, struct in_addr *addr) +{ + if (!inet_aton(host_name, addr)) { + struct hostent *he = gethostbyname(host_name); + if (he == NULL) { + VLOG_ERR("gethostbyname(%s): %s", host_name, + (h_errno == HOST_NOT_FOUND ? "host not found" + : h_errno == TRY_AGAIN ? "try again" + : h_errno == NO_RECOVERY ? "non-recoverable error" + : h_errno == NO_ADDRESS ? "no address" + : "unknown error")); + return ENOENT; + } + addr->s_addr = *(uint32_t *) he->h_addr; + } + return 0; +} diff --git a/lib/util.c b/lib/util.c new file mode 100644 index 00000000..8f038e5f --- /dev/null +++ b/lib/util.c @@ -0,0 +1,195 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util.h" +#include +#include +#include +#include + +const char *program_name; + +static void +out_of_memory(void) +{ + fatal(0, "virtual memory exhausted"); +} + +void * +xcalloc(size_t count, size_t size) +{ + void *p = count && size ? calloc(count, size) : malloc(1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +void * +xmalloc(size_t size) +{ + void *p = malloc(size ? size : 1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +void * +xrealloc(void *p, size_t size) +{ + p = realloc(p, size ? size : 1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +char * +xstrdup(const char *s_) +{ + size_t size = strlen(s_) + 1; + char *s = xmalloc(size); + memcpy(s, s_, size); + return s; +} + +char * +xasprintf(const char *format, ...) +{ + va_list args; + size_t needed; + char *s; + + va_start(args, format); + needed = vsnprintf(NULL, 0, format, args); + va_end(args); + + s = xmalloc(needed + 1); + + va_start(args, format); + vsnprintf(s, needed + 1, format, args); + va_end(args); + + return s; +} + +void fatal(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); + + exit(EXIT_FAILURE); +} + +void error(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); +} + +void debug(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); +} + +/* Sets program_name based on 'argv0'. Should be called at the beginning of + * main(), as "set_program_name(argv[0]);". */ +void set_program_name(const char *argv0) +{ + const char *slash = strrchr(argv0, '/'); + program_name = slash ? slash + 1 : argv0; +} + +/* Writes the 'size' bytes in 'buf' to 'stream' as hex bytes arranged 16 per + * line. Numeric offsets are also included, starting at 'ofs' for the first + * byte in 'buf'. If 'ascii' is true then the corresponding ASCII characters + * are also rendered alongside. */ +void +hex_dump(FILE *stream, const void *buf_, size_t size, + uintptr_t ofs, bool ascii) +{ + const uint8_t *buf = buf_; + const size_t per_line = 16; /* Maximum bytes per line. */ + + while (size > 0) + { + size_t start, end, n; + size_t i; + + /* Number of bytes on this line. */ + start = ofs % per_line; + end = per_line; + if (end - start > size) + end = start + size; + n = end - start; + + /* Print line. */ + fprintf(stream, "%08jx ", (uintmax_t) ROUND_DOWN(ofs, per_line)); + for (i = 0; i < start; i++) + fprintf(stream, " "); + for (; i < end; i++) + fprintf(stream, "%02hhx%c", + buf[i - start], i == per_line / 2 - 1? '-' : ' '); + if (ascii) + { + for (; i < per_line; i++) + fprintf(stream, " "); + fprintf(stream, "|"); + for (i = 0; i < start; i++) + fprintf(stream, " "); + for (; i < end; i++) { + int c = buf[i - start]; + putc(c >= 32 && c < 127 ? c : '.', stream); + } + for (; i < per_line; i++) + fprintf(stream, " "); + fprintf(stream, "|"); + } + fprintf(stream, "\n"); + + ofs += n; + buf += n; + size -= n; + } +} diff --git a/lib/vconn-netlink.c b/lib/vconn-netlink.c new file mode 100644 index 00000000..7ab54705 --- /dev/null +++ b/lib/vconn-netlink.c @@ -0,0 +1,126 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "openflow-netlink.h" +#include "buffer.h" +#include "dpif.h" +#include "netlink.h" +#include "socket-util.h" +#include "util.h" +#include "openflow.h" + +#include "vlog.h" +#define THIS_MODULE VLM_VCONN_NETLINK + +struct netlink_vconn +{ + struct vconn vconn; + struct dpif dp; +}; + +static struct netlink_vconn * +netlink_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &netlink_vconn_class); + return CONTAINER_OF(vconn, struct netlink_vconn, vconn); +} + +static int +netlink_open(const char *name, char *suffix, struct vconn **vconnp) +{ + struct netlink_vconn *netlink; + int dp_idx; + int retval; + + if (sscanf(suffix, "%d", &dp_idx) != 1) { + fatal(0, "%s: bad peer name format", name); + } + + netlink = xmalloc(sizeof *netlink); + netlink->vconn.class = &netlink_vconn_class; + retval = dpif_open(dp_idx, true, &netlink->dp); + if (retval) { + free(netlink); + *vconnp = NULL; + return retval; + } + *vconnp = &netlink->vconn; + return 0; +} + +static void +netlink_close(struct vconn *vconn) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + dpif_close(&netlink->dp); + free(netlink); +} + +static void +netlink_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + pfd->fd = nl_sock_fd(netlink->dp.sock); + if (want & WANT_RECV) { + pfd->events |= POLLIN; + } + if (want & WANT_SEND) { + pfd->events |= POLLOUT; + } +} + +static int +netlink_recv(struct vconn *vconn, struct buffer **bufferp) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + return dpif_recv_openflow(&netlink->dp, bufferp, false); +} + +static int +netlink_send(struct vconn *vconn, struct buffer *buffer) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + int retval = dpif_send_openflow(&netlink->dp, buffer, false); + if (!retval) { + buffer_delete(buffer); + } + return retval; +} + +struct vconn_class netlink_vconn_class = { + .name = "nl", + .open = netlink_open, + .close = netlink_close, + .prepoll = netlink_prepoll, + .recv = netlink_recv, + .send = netlink_send, +}; diff --git a/lib/vconn-tcp.c b/lib/vconn-tcp.c new file mode 100644 index 00000000..1878d2db --- /dev/null +++ b/lib/vconn-tcp.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "socket-util.h" +#include "util.h" +#include "openflow.h" +#include "ofp-print.h" + +#include "vlog.h" +#define THIS_MODULE VLM_vconn_tcp + +/* Active TCP. */ + +struct tcp_vconn +{ + struct vconn vconn; + int fd; + struct buffer *rxbuf; + struct buffer *txbuf; +}; + +static int +new_tcp_vconn(const char *name, int fd, struct vconn **vconnp) +{ + struct tcp_vconn *tcp; + int on = 1; + int retval; + + retval = set_nonblocking(fd); + if (retval) { + VLOG_ERR("%s: set_nonblocking: %s", name, strerror(retval)); + close(fd); + return retval; + } + + retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); + if (retval) { + VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, strerror(errno)); + close(fd); + return errno; + } + + tcp = xmalloc(sizeof *tcp); + tcp->vconn.class = &tcp_vconn_class; + tcp->fd = fd; + tcp->txbuf = NULL; + tcp->rxbuf = NULL; + *vconnp = &tcp->vconn; + return 0; +} + +static struct tcp_vconn * +tcp_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &tcp_vconn_class); + return CONTAINER_OF(vconn, struct tcp_vconn, vconn); +} + + +static int +tcp_open(const char *name, char *suffix, struct vconn **vconnp) +{ + char *save_ptr; + const char *host_name; + const char *port_string; + struct sockaddr_in sin; + int retval; + int fd; + + /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that + * can cause segfaults here: + * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. + * Using "::" instead of the obvious ":" works around it. */ + host_name = strtok_r(suffix, "::", &save_ptr); + port_string = strtok_r(NULL, "::", &save_ptr); + if (!host_name) { + fatal(0, "%s: bad peer name format", name); + } + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + if (lookup_ip(host_name, &sin.sin_addr)) { + return ENOENT; + } + sin.sin_port = htons(port_string ? atoi(port_string) : OFP_TCP_PORT); + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + VLOG_ERR("%s: socket: %s", name, strerror(errno)); + return errno; + } + + retval = connect(fd, (struct sockaddr *) &sin, sizeof sin); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: connect: %s", name, strerror(error)); + close(fd); + return error; + } + + return new_tcp_vconn(name, fd, vconnp); +} + +static void +tcp_close(struct vconn *vconn) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + close(tcp->fd); + free(tcp); +} + +static void +tcp_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + pfd->fd = tcp->fd; + if (want & WANT_RECV) { + pfd->events |= POLLIN; + } + if (want & WANT_SEND || tcp->txbuf) { + pfd->events |= POLLOUT; + } +} + +static void +tcp_postpoll(struct vconn *vconn, short int *revents) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + if (*revents & POLLOUT && tcp->txbuf) { + ssize_t n = write(tcp->fd, tcp->txbuf->data, tcp->txbuf->size); + if (n < 0) { + if (errno != EAGAIN) { + VLOG_ERR("send: %s", strerror(errno)); + *revents |= POLLERR; + } + } else if (n > 0) { + buffer_pull(tcp->txbuf, n); + if (tcp->txbuf->size == 0) { + buffer_delete(tcp->txbuf); + tcp->txbuf = NULL; + } + } + if (tcp->txbuf) { + *revents &= ~POLLOUT; + } + } +} + +static int +tcp_recv(struct vconn *vconn, struct buffer **bufferp) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + struct buffer *rx; + size_t want_bytes; + ssize_t retval; + + if (tcp->rxbuf == NULL) { + tcp->rxbuf = buffer_new(1564); + } + rx = tcp->rxbuf; + +again: + if (sizeof(struct ofp_header) > rx->size) { + want_bytes = sizeof(struct ofp_header) - rx->size; + } else { + struct ofp_header *oh = rx->data; + size_t length = ntohs(oh->length); + if (length < sizeof(struct ofp_header)) { + VLOG_ERR("received too-short ofp_header (%zu bytes)", length); + return EPROTO; + } + want_bytes = length - rx->size; + } + buffer_reserve_tailroom(rx, want_bytes); + + retval = read(tcp->fd, buffer_tail(rx), want_bytes); + if (retval > 0) { + rx->size += retval; + if (retval == want_bytes) { + if (rx->size > sizeof(struct ofp_header)) { + *bufferp = rx; + tcp->rxbuf = NULL; + return 0; + } else { + goto again; + } + } + return EAGAIN; + } else if (retval == 0) { + return rx->size ? EPROTO : EOF; + } else { + return retval ? errno : EAGAIN; + } +} + +static int +tcp_send(struct vconn *vconn, struct buffer *buffer) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + ssize_t retval; + + if (tcp->txbuf) { + return EAGAIN; + } + + retval = write(tcp->fd, buffer->data, buffer->size); + if (retval == buffer->size) { + buffer_delete(buffer); + return 0; + } else if (retval >= 0 || errno == EAGAIN) { + tcp->txbuf = buffer; + if (retval > 0) { + buffer_pull(buffer, retval); + } + return 0; + } else { + return errno; + } +} + +struct vconn_class tcp_vconn_class = { + .name = "tcp", + .open = tcp_open, + .close = tcp_close, + .prepoll = tcp_prepoll, + .postpoll = tcp_postpoll, + .recv = tcp_recv, + .send = tcp_send, +}; + +/* Passive TCP. */ + +struct ptcp_vconn +{ + struct vconn vconn; + int fd; +}; + +static struct ptcp_vconn * +ptcp_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &ptcp_vconn_class); + return CONTAINER_OF(vconn, struct ptcp_vconn, vconn); +} + +static int +ptcp_open(const char *name, char *suffix, struct vconn **vconnp) +{ + struct sockaddr_in sin; + struct ptcp_vconn *ptcp; + int retval; + int fd; + unsigned int yes = 1; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + VLOG_ERR("%s: socket: %s", name, strerror(errno)); + return errno; + } + + if ( setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,&yes,sizeof(yes)) < 0) { + VLOG_ERR("%s: setsockopt::SO_REUSEADDR: %s", name, strerror(errno)); + return errno; + } + + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(atoi(suffix) ? atoi(suffix) : OFP_TCP_PORT); + retval = bind(fd, (struct sockaddr *) &sin, sizeof sin); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: bind: %s", name, strerror(error)); + close(fd); + return error; + } + + retval = listen(fd, 10); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: listen: %s", name, strerror(error)); + close(fd); + return error; + } + + retval = set_nonblocking(fd); + if (retval) { + VLOG_ERR("%s: set_nonblocking: %s", name, strerror(retval)); + close(fd); + return retval; + } + + ptcp = xmalloc(sizeof *ptcp); + ptcp->vconn.class = &ptcp_vconn_class; + ptcp->fd = fd; + *vconnp = &ptcp->vconn; + return 0; +} + +static void +ptcp_close(struct vconn *vconn) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + close(ptcp->fd); + free(ptcp); +} + +static void +ptcp_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + pfd->fd = ptcp->fd; + if (want & WANT_ACCEPT) { + pfd->events |= POLLIN; + } +} + +static int +ptcp_accept(struct vconn *vconn, struct vconn **new_vconnp) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + int new_fd; + + new_fd = accept(ptcp->fd, NULL, NULL); + if (new_fd < 0) { + return errno; + } + + return new_tcp_vconn("tcp" /* FIXME */, new_fd, new_vconnp); +} + +struct vconn_class ptcp_vconn_class = { + .name = "ptcp", + .open = ptcp_open, + .close = ptcp_close, + .prepoll = ptcp_prepoll, + .accept = ptcp_accept, +}; + diff --git a/lib/vconn.c b/lib/vconn.c new file mode 100644 index 00000000..2fedc2eb --- /dev/null +++ b/lib/vconn.c @@ -0,0 +1,289 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "flow.h" +#include "openflow.h" +#include "util.h" + +static struct vconn_class *vconn_classes[] = { + &tcp_vconn_class, + &ptcp_vconn_class, +#ifdef HAVE_NETLINK + &netlink_vconn_class, +#endif +}; + +/* Check the validity of the vconn class structures. */ +static void +check_vconn_classes(void) +{ +#ifndef NDEBUG + size_t i; + + for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { + struct vconn_class *class = vconn_classes[i]; + assert(class->name != NULL); + assert(class->open != NULL); + assert(class->close != NULL); + assert(class->prepoll != NULL); + assert(class->accept + ? !class->recv && !class->send + : class->recv && class->send); + } +#endif +} + +/* Attempts to connect to an OpenFlow device. 'name' is a connection name in + * the form "TYPE:ARGS", where TYPE is the vconn class's name and ARGS are + * vconn class-specific. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * stores a pointer to the new connection in '*vconnp', otherwise a null + * pointer. */ +int +vconn_open(const char *name, struct vconn **vconnp) +{ + size_t prefix_len; + size_t i; + + check_vconn_classes(); + + prefix_len = strcspn(name, ":"); + if (prefix_len == strlen(name)) { + fatal(0, "`%s' not correct format for peer name", name); + } + for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { + struct vconn_class *class = vconn_classes[i]; + if (strlen(class->name) == prefix_len + && !memcmp(class->name, name, prefix_len)) { + char *suffix_copy = xstrdup(name + prefix_len + 1); + int retval = class->open(name, suffix_copy, vconnp); + free(suffix_copy); + if (retval) { + *vconnp = NULL; + } + return retval; + } + } + fatal(0, "unknown peer type `%.*s'", (int) prefix_len, name); + abort(); +} + +/* Closes 'vconn'. */ +void +vconn_close(struct vconn *vconn) +{ + if (vconn != NULL) { + (vconn->class->close)(vconn); + } +} + +/* Returns true if 'vconn' is a passive vconn, that is, its purpose is to + * wait for connections to arrive, not to transfer data. Returns false if + * 'vconn' is an active vconn, that is, its purpose is to transfer data, not + * to wait for new connections to arrive. */ +bool +vconn_is_passive(const struct vconn *vconn) +{ + return vconn->class->accept != NULL; +} + +/* Initializes 'pfd->fd' and 'pfd->events' appropriately so that poll() will + * wake up when the connection becomes available for the operations specified + * in 'want', or for performing the vconn's needed internal processing. */ +void +vconn_prepoll(struct vconn *vconn, int want, struct pollfd *pollfd) +{ + (vconn->class->prepoll)(vconn, want, pollfd); +} + +/* Perform any internal processing needed by the connections. The vconn file + * descriptor's status, as reported by poll(), must be provided in '*revents'. + * + * The postpoll function adjusts '*revents' to reflect the status of the + * connection from the caller's point of view. That is, upon return '*revents + * & POLLIN' indicates that a packet is (potentially) ready to be read (for an + * active vconn) or a new connection is ready to be accepted (for a passive + * vconn) and '*revents & POLLOUT' indicates that a packet is (potentially) + * ready to be written. */ +void +vconn_postpoll(struct vconn *vconn, short int *revents) +{ + if (vconn->class->postpoll) { + (vconn->class->postpoll)(vconn, revents); + } +} + +/* Tries to accept a new connection on 'vconn', which must be a passive vconn. + * If successful, stores the new connection in '*new_vconn' and returns 0. + * Otherwise, returns a positive errno value. + * + * vconn_accept will not block waiting for a connection. If no connection is + * ready to be accepted, it returns EAGAIN immediately. */ +int +vconn_accept(struct vconn *vconn, struct vconn **new_vconn) +{ + int retval = (vconn->class->accept)(vconn, new_vconn); + if (retval) { + *new_vconn = NULL; + } + return retval; +} + +/* Tries to receive an OpenFlow message from 'vconn', which must be an active + * vconn. If successful, stores the received message into '*msgp' and returns + * 0. The caller is responsible for destroying the message with + * buffer_delete(). On failure, returns a positive errno value and stores a + * null pointer into '*msgp'. On normal connection close, returns EOF. + * + * vconn_recv will not block waiting for a packet to arrive. If no packets + * have been received, it returns EAGAIN immediately. */ +int +vconn_recv(struct vconn *vconn, struct buffer **msgp) +{ + int retval = (vconn->class->recv)(vconn, msgp); + if (retval) { + *msgp = NULL; + } + return retval; +} + +/* Tries to queue 'msg' for transmission on 'vconn', which must be an active + * vconn. If successful, returns 0, in which case ownership of 'msg' is + * transferred to the vconn. Success does not guarantee that 'msg' has been or + * ever will be delivered to the peer, only that it has been queued for + * transmission. + * + * Returns a positive errno value on failure, in which case the caller + * retains ownership of 'msg'. + * + * vconn_send will not block. If 'msg' cannot be immediately accepted for + * transmission, it returns EAGAIN immediately. */ +int +vconn_send(struct vconn *vconn, struct buffer *msg) +{ + return (vconn->class->send)(vconn, msg); +} + +/* Same as vconn_send, except that it waits until 'msg' can be transmitted. */ +int +vconn_send_wait(struct vconn *vconn, struct buffer *msg) +{ + int retval; + while ((retval = vconn_send(vconn, msg)) == EAGAIN) { + struct pollfd pfd; + + pfd.fd = -1; + pfd.events = 0; + vconn_prepoll(vconn, WANT_SEND, &pfd); + do { + retval = poll(&pfd, 1, -1); + } while (retval < 0 && errno == EINTR); + if (retval < 0) { + return errno; + } + assert(retval == 1); + vconn_postpoll(vconn, &pfd.revents); + } + return retval; +} + +struct buffer * +make_add_simple_flow(const struct flow *flow, + uint32_t buffer_id, uint16_t out_port) +{ + struct ofp_flow_mod *ofm; + size_t size = sizeof *ofm + sizeof ofm->actions[0]; + struct buffer *out = buffer_new(size); + ofm = buffer_put_uninit(out, size); + memset(ofm, 0, size); + ofm->header.version = OFP_VERSION; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(size); + ofm->match.wildcards = htons(0); + ofm->match.in_port = flow->in_port; + memcpy(ofm->match.dl_src, flow->dl_src, sizeof ofm->match.dl_src); + memcpy(ofm->match.dl_dst, flow->dl_dst, sizeof ofm->match.dl_dst); + ofm->match.dl_vlan = flow->dl_vlan; + ofm->match.dl_type = flow->dl_type; + ofm->match.nw_src = flow->nw_src; + ofm->match.nw_dst = flow->nw_dst; + ofm->match.nw_proto = flow->nw_proto; + ofm->match.tp_src = flow->tp_src; + ofm->match.tp_dst = flow->tp_dst; + ofm->command = htons(OFPFC_ADD); + ofm->max_idle = htons(60); + ofm->buffer_id = htonl(buffer_id); + ofm->group_id = htonl(0); + ofm->actions[0].type = htons(OFPAT_OUTPUT); + ofm->actions[0].arg.output.max_len = htons(0); + ofm->actions[0].arg.output.port = htons(out_port); + return out; +} + +struct buffer * +make_unbuffered_packet_out(const struct buffer *packet, + uint16_t in_port, uint16_t out_port) +{ + struct ofp_packet_out *opo; + size_t size = sizeof *opo + packet->size; + struct buffer *out = buffer_new(size); + opo = buffer_put_uninit(out, size); + memset(opo, 0, sizeof *opo); + opo->header.version = OFP_VERSION; + opo->header.type = OFPT_PACKET_OUT; + opo->header.length = htons(size); + opo->buffer_id = htonl(UINT32_MAX); + opo->in_port = htons(in_port); + opo->out_port = htons(out_port); + memcpy(opo->u.data, packet->data, packet->size); + return out; +} + +struct buffer * +make_buffered_packet_out(uint32_t buffer_id, + uint16_t in_port, uint16_t out_port) +{ + struct ofp_packet_out *opo; + size_t size = sizeof *opo + sizeof opo->u.actions[0]; + struct buffer *out = buffer_new(size); + opo = buffer_put_uninit(out, size); + memset(opo, 0, size); + opo->header.version = OFP_VERSION; + opo->header.type = OFPT_PACKET_OUT; + opo->header.length = htons(size); + opo->buffer_id = htonl(buffer_id); + opo->in_port = htons(in_port); + opo->out_port = htons(out_port); + opo->u.actions[0].type = htons(OFPAT_OUTPUT); + opo->u.actions[0].arg.output.max_len = htons(0); + opo->u.actions[0].arg.output.port = htons(out_port); + return out; +} + diff --git a/lib/vlog-socket.c b/lib/vlog-socket.c new file mode 100644 index 00000000..b3c2a285 --- /dev/null +++ b/lib/vlog-socket.c @@ -0,0 +1,504 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vlog-socket.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fatal-signal.h" +#include "util.h" +#include "vlog.h" + +#ifndef SCM_CREDENTIALS +#include +#endif + +static int make_unix_socket(bool nonblock, bool passcred, + const char *bind_path, const char *connect_path); + +/* Server for Vlog control connection. */ +struct vlog_server { + char *path; + int fd; +}; + +/* Start listening for connections from clients and processing their + * requests. 'path' may be: + * + * - NULL, in which case the default socket path is used. (Only one + * Vlog_server_socket per process can use the default path.) + * + * - A name that does not start with '/', in which case it is appended to + * the default socket path. + * + * - An absolute path (starting with '/') that gives the exact name of + * the Unix domain socket to listen on. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * sets '*serverp' to the new vlog_server, otherwise to NULL. */ +int +vlog_server_listen(const char *path, struct vlog_server **serverp) +{ + struct vlog_server *server = xmalloc(sizeof *server); + + if (path && path[0] == '/') { + server->path = xstrdup(path); + } else { + server->path = xasprintf("/tmp/vlogs.%ld%s", + (long int) getpid(), path ? path : ""); + } + + server->fd = make_unix_socket(true, true, server->path, NULL); + if (server->fd < 0) { + int fd = server->fd; + free(server->path); + free(server); + fprintf(stderr, "Could not initialize vlog configuration socket: %s\n", + strerror(-server->fd)); + *serverp = NULL; + return fd; + } + *serverp = server; + return 0; +} + +/* Destroys 'server' and stops listening for connections. */ +void +vlog_server_close(struct vlog_server *server) +{ + if (server) { + close(server->fd); + unlink(server->path); + fatal_signal_remove_file_to_unlink(server->path); + free(server->path); + free(server); + } +} + +/* Returns the fd used by 'server'. The caller can poll this fd (POLLIN) to + * determine when to call vlog_server_poll(). */ +int +vlog_server_get_fd(const struct vlog_server *server) +{ + return server->fd; +} + +static int +recv_with_creds(const struct vlog_server *server, + char *cmd_buf, size_t cmd_buf_size, + struct sockaddr_un *un, socklen_t *un_len) +{ +#ifdef SCM_CREDENTIALS + /* Read a message and control messages from 'fd'. */ + char cred_buf[CMSG_SPACE(sizeof(struct ucred))]; + ssize_t n; + struct iovec iov; + struct msghdr msg; + struct ucred* cred; + struct cmsghdr* cmsg; + + iov.iov_base = cmd_buf; + iov.iov_len = cmd_buf_size - 1; + + memset(&msg, 0, sizeof msg); + msg.msg_name = un; + msg.msg_namelen = sizeof *un; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cred_buf; + msg.msg_controllen = sizeof cred_buf; + + n = recvmsg(server->fd, &msg, 0); + *un_len = msg.msg_namelen; + if (n < 0) { + return errno; + } + cmd_buf[n] = '\0'; + + /* Ensure that the message has credentials ensuring that it was sent + * from the same user who started us, or by root. */ + cred = NULL; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_CREDENTIALS) { + cred = (struct ucred *) CMSG_DATA(cmsg); + } else if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) { + /* Anyone can send us fds. If we don't close them, then that's + * a DoS: the sender can overflow our fd table. */ + int* fds = (int *) CMSG_DATA(cmsg); + size_t n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof *fds; + size_t i; + for (i = 0; i < n_fds; i++) { + close(fds[i]); + } + } + } + if (!cred) { + fprintf(stderr, "vlog: config message lacks credentials\n"); + return -1; + } else if (cred->uid && cred->uid != getuid()) { + fprintf(stderr, "vlog: config message uid=%ld is not 0 or %ld\n", + (long int) cred->uid, (long int) getuid()); + return -1; + } + + return 0; +#else /* !SCM_CREDENTIALS */ + socklen_t len; + ssize_t n; + struct stat s; + time_t recent; + + /* Receive a message. */ + len = sizeof *un; + n = recvfrom(server->fd, cmd_buf, cmd_buf_size - 1, 0, + (struct sockaddr *) un, &len); + *un_len = len; + if (n < 0) { + return errno; + } + cmd_buf[n] = '\0'; + + len -= offsetof(struct sockaddr_un, sun_path); + un->sun_path[len] = '\0'; + if (stat(un->sun_path, &s) < 0) { + fprintf(stderr, "vlog: config message from inaccessible socket: %s\n", + strerror(errno)); + return -1; + } + if (!S_ISSOCK(s.st_mode)) { + fprintf(stderr, "vlog: config message not from a socket\n"); + return -1; + } + recent = time(0) - 30; + if (s.st_atime < recent || s.st_ctime < recent || s.st_mtime < recent) { + fprintf(stderr, "vlog: config socket too old\n"); + return -1; + } + if (s.st_uid && s.st_uid != getuid()) { + fprintf(stderr, "vlog: config message uid=%ld is not 0 or %ld\n", + (long int) s.st_uid, (long int) getuid()); + return -1; + } + return 0; +#endif /* !SCM_CREDENTIALS */ +} + +/* Processes incoming requests for 'server'. */ +void +vlog_server_poll(struct vlog_server *server) +{ + for (;;) { + char cmd_buf[512]; + struct sockaddr_un un; + socklen_t un_len; + char *reply; + int error; + + error = recv_with_creds(server, cmd_buf, sizeof cmd_buf, &un, &un_len); + if (error > 0) { + if (error != EAGAIN && error != EWOULDBLOCK) { + fprintf(stderr, "vlog: reading configuration socket: %s", + strerror(errno)); + } + return; + } else if (error < 0) { + continue; + } + + /* Process message and send reply. */ + if (!strncmp(cmd_buf, "set ", 4)) { + char *msg = vlog_set_levels_from_string(cmd_buf + 4); + reply = msg ? msg : xstrdup("ack"); + } else if (!strcmp(cmd_buf, "list")) { + reply = vlog_get_levels(); + } else { + reply = xstrdup("nak"); + } + sendto(server->fd, reply, strlen(reply), 0, + (struct sockaddr*) &un, un_len); + free(reply); + } +} + +/* Client for Vlog control connection. */ + +struct vlog_client { + char *connect_path; + char *bind_path; + int fd; +}; + +/* Connects to a Vlog server socket. If 'path' does not start with '/', then + * it start with a PID as a string. If a non-null, non-absolute name was + * passed to Vlog_server_socket::listen(), then it must follow the PID in + * 'path'. If 'path' starts with '/', then it must be an absolute path that + * gives the exact name of the Unix domain socket to connect to. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * sets '*clientp' to the new vlog_client, otherwise to NULL. */ +int +vlog_client_connect(const char *path, struct vlog_client **clientp) +{ + struct vlog_client *client; + int fd; + + client = xmalloc(sizeof *client); + client->connect_path = (path[0] == '/' + ? xstrdup(path) + : xasprintf("/tmp/vlogs.%s", path)); + + client->bind_path = xasprintf("/tmp/vlog.%ld", (long int) getpid()); + fd = make_unix_socket(false, false, + client->bind_path, client->connect_path); + + if (fd >= 0) { + client->fd = fd; + *clientp = client; + return 0; + } else { + free(client->connect_path); + free(client->bind_path); + free(client); + *clientp = NULL; + return errno; + } +} + +/* Destroys 'client'. */ +void +vlog_client_close(struct vlog_client *client) +{ + if (client) { + unlink(client->bind_path); + fatal_signal_remove_file_to_unlink(client->bind_path); + free(client->bind_path); + free(client->connect_path); + close(client->fd); + free(client); + } +} + +/* Sends 'request' to the server socket that 'client' is connected to. Returns + * 0 if successful, otherwise a positive errno value. */ +int +vlog_client_send(struct vlog_client *client, const char *request) +{ +#ifdef SCM_CREDENTIALS + struct ucred cred; + struct iovec iov; + char buf[CMSG_SPACE(sizeof cred)]; + struct msghdr msg; + struct cmsghdr* cmsg; + ssize_t nbytes; + + cred.pid = getpid(); + cred.uid = getuid(); + cred.gid = getgid(); + + iov.iov_base = (void*) request; + iov.iov_len = strlen(request); + + memset(&msg, 0, sizeof msg); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof buf; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof cred); + memcpy(CMSG_DATA(cmsg), &cred, sizeof cred); + msg.msg_controllen = cmsg->cmsg_len; + + nbytes = sendmsg(client->fd, &msg, 0); +#else /* !SCM_CREDENTIALS */ + ssize_t nbytes = send(client->fd, request, strlen(request), 0); +#endif /* !SCM_CREDENTIALS */ + if (nbytes > 0) { + return nbytes == strlen(request) ? 0 : ENOBUFS; + } else { + return errno; + } +} + +/* Attempts to receive a response from the server socket that 'client' is + * connected to. Returns 0 if successful, otherwise a positive errno value. + * If successful, sets '*reply' to the reply, which the caller must free, + * otherwise to NULL. */ +int +vlog_client_recv(struct vlog_client *client, char **reply) +{ + struct pollfd pfd; + int nfds; + char buffer[65536]; + ssize_t nbytes; + + *reply = NULL; + + pfd.fd = client->fd; + pfd.events = POLLIN; + nfds = poll(&pfd, 1, 1000); + if (nfds == 0) { + return ETIMEDOUT; + } else if (nfds < 0) { + return errno; + } + + nbytes = read(client->fd, buffer, sizeof buffer - 1); + if (nbytes < 0) { + return errno; + } else { + buffer[nbytes] = '\0'; + *reply = xstrdup(buffer); + return 0; + } +} + +/* Sends 'request' to the server socket and waits for a reply. Returns 0 if + * successful, otherwise to a positive errno value. If successful, sets + * '*reply' to the reply, which the caller must free, otherwise to NULL. */ +int +vlog_client_transact(struct vlog_client *client, + const char *request, char **reply) +{ + int i; + + /* Retry up to 3 times. */ + for (i = 0; i < 3; ++i) { + int error = vlog_client_send(client, request); + if (error) { + *reply = NULL; + return error; + } + error = vlog_client_recv(client, reply); + if (error != ETIMEDOUT) { + return error; + } + } + *reply = NULL; + return ETIMEDOUT; +} + +/* Returns the path of the server socket to which 'client' is connected. The + * caller must not modify or free the returned string. */ +const char * +vlog_client_target(const struct vlog_client *client) +{ + return client->connect_path; +} + +/* Helper functions. */ + +/* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in + * '*un_len' the size of the sockaddr_un. */ +static void +make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len) +{ + un->sun_family = AF_UNIX; + strncpy(un->sun_path, name, sizeof un->sun_path); + un->sun_path[sizeof un->sun_path - 1] = '\0'; + *un_len = (offsetof(struct sockaddr_un, sun_path) + + strlen (un->sun_path) + 1); +} + +/* Creates a Unix domain datagram socket that is bound to '*bind_path' (if + * 'bind_path' is non-null) and connected to '*connect_path' (if 'connect_path' + * is non-null). If 'nonblock' is true, the socket is made non-blocking. If + * 'passcred' is true, the socket is configured to receive SCM_CREDENTIALS + * control messages. + * + * Returns the socket's fd if successful, otherwise a negative errno value. */ +static int +make_unix_socket(bool nonblock, bool passcred UNUSED, + const char *bind_path, const char *connect_path) +{ + int error; + int fd; + + fd = socket(PF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + return -errno; + } + + if (nonblock) { + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + goto error; + } + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) { + goto error; + } + } + + if (bind_path) { + struct sockaddr_un un; + socklen_t un_len; + make_sockaddr_un(bind_path, &un, &un_len); + if (unlink(un.sun_path) && errno != ENOENT) { + fprintf(stderr, "unlinking \"%s\": %s\n", + un.sun_path, strerror(errno)); + } + fatal_signal_add_file_to_unlink(bind_path); + if (bind(fd, (struct sockaddr*) &un, un_len) + || fchmod(fd, S_IRWXU)) { + goto error; + } + } + + if (connect_path) { + struct sockaddr_un un; + socklen_t un_len; + make_sockaddr_un(connect_path, &un, &un_len); + if (connect(fd, (struct sockaddr*) &un, un_len)) { + goto error; + } + } + +#ifdef SCM_CREDENTIALS + if (passcred) { + int enable = 1; + if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) { + goto error; + } + } +#endif + + return fd; + +error: + if (bind_path) { + fatal_signal_remove_file_to_unlink(bind_path); + } + error = errno; + close(fd); + return -error; +} diff --git a/lib/vlog.c b/lib/vlog.c new file mode 100644 index 00000000..66d57101 --- /dev/null +++ b/lib/vlog.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2007, 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vlog.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "util.h" + +/* Name for each logging level. */ +static const char *level_names[VLL_N_LEVELS] = { + [VLL_EMER] = "EMER", + [VLL_ERR] = "ERR", + [VLL_WARN] = "WARN", + [VLL_DBG] = "DBG", +}; + +/* Name for each logging facility. */ +static const char *facility_names[VLF_N_FACILITIES] = { + [VLF_CONSOLE] = "console", + [VLF_SYSLOG] = "syslog", +}; + +/* Name for each logging module */ +static const char *module_names[VLM_N_MODULES] = { +#define VLOG_MODULE(NAME) #NAME, + VLOG_MODULES +#undef VLOG_MODULES +}; + +static int levels[VLM_N_MODULES][VLF_N_FACILITIES]; + +/* Searches the 'n_names' in 'names'. Returns the index of a match for + * 'target', or 'n_names' if no name matches. */ +static size_t +search_name_array(const char *target, const char **names, size_t n_names) +{ + size_t i; + + for (i = 0; i < n_names; i++) { + assert(names[i]); + if (!strcasecmp(names[i], target)) { + break; + } + } + return i; +} + +/* Returns the name for logging level 'level'. */ +const char * +vlog_get_level_name(enum vlog_level level) +{ + assert(level < VLL_N_LEVELS); + return level_names[level]; +} + +/* Returns the logging level with the given 'name', or VLL_N_LEVELS if 'name' + * is not the name of a logging level. */ +enum vlog_level +vlog_get_level_val(const char *name) +{ + return search_name_array(name, level_names, ARRAY_SIZE(level_names)); +} + +/* Returns the name for logging facility 'facility'. */ +const char * +vlog_get_facility_name(enum vlog_facility facility) +{ + assert(facility < VLF_N_FACILITIES); + return facility_names[facility]; +} + +/* Returns the logging facility named 'name', or VLF_N_FACILITIES if 'name' is + * not the name of a logging facility. */ +enum vlog_facility +vlog_get_facility_val(const char *name) +{ + return search_name_array(name, facility_names, ARRAY_SIZE(facility_names)); +} + +/* Returns the name for logging module 'module'. */ +const char *vlog_get_module_name(enum vlog_module module) +{ + assert(module < VLM_N_MODULES); + return module_names[module]; +} + +/* Returns the logging module named 'name', or VLM_N_MODULES if 'name' is not + * the name of a logging module. */ +enum vlog_module +vlog_get_module_val(const char *name) +{ + return search_name_array(name, module_names, ARRAY_SIZE(module_names)); +} + +/* Returns the current logging level for the given 'module' and 'facility'. */ +enum vlog_level +vlog_get_level(enum vlog_module module, enum vlog_facility facility) +{ + assert(module < VLM_N_MODULES); + assert(facility < VLF_N_FACILITIES); + return levels[module][facility]; +} + +static void +set_facility_level(enum vlog_facility facility, enum vlog_module module, + enum vlog_level level) +{ + assert(facility >= 0 && facility < VLF_N_FACILITIES); + assert(level < VLL_N_LEVELS); + + if (module == VLM_ANY_MODULE) { + for (module = 0; module < VLM_N_MODULES; module++) { + levels[module][facility] = level; + } + } else { + levels[module][facility] = level; + } +} + +/* Sets the logging level for the given 'module' and 'facility' to 'level'. */ +void +vlog_set_levels(enum vlog_module module, enum vlog_facility facility, + enum vlog_level level) +{ + assert(facility < VLF_N_FACILITIES || facility == VLF_ANY_FACILITY); + if (facility == VLF_ANY_FACILITY) { + for (facility = 0; facility < VLF_N_FACILITIES; facility++) { + set_facility_level(facility, module, level); + } + } else { + set_facility_level(facility, module, level); + } +} + +/* Set debugging levels: + * + * mod:facility:level mod2:facility:level ... + * + * Return null if successful, otherwise an error message that the caller must + * free(). + */ +char * +vlog_set_levels_from_string(const char *s_) +{ + char *save_ptr; + char *s = xstrdup(s_); + char *module, *level, *facility; + + for (module = strtok_r(s, ": \t", &save_ptr); module != NULL; + module = strtok_r(NULL, ": \t", &save_ptr)) { + enum vlog_module e_module; + enum vlog_level e_level; + enum vlog_facility e_facility; + + facility = strtok_r(NULL, ":", &save_ptr); + level = strtok_r(NULL, ":", &save_ptr); + if (level == NULL || facility == NULL) { + free(s); + return xstrdup("syntax error in level string"); + } + + if (!strcmp(module, "ANY")) { + e_module = VLM_ANY_MODULE; + } else { + e_module = vlog_get_module_val(module); + if (e_module >= VLM_N_MODULES) { + char *msg = xasprintf("unknown module \"%s\"", module); + free(s); + return msg; + } + } + + if (!strcmp(facility, "ANY")) { + e_facility = VLF_ANY_FACILITY; + } else { + e_facility = vlog_get_facility_val(facility); + if (e_facility >= VLF_N_FACILITIES) { + char *msg = xasprintf("unknown facility \"%s\"", facility); + free(s); + return msg; + } + } + + e_level = vlog_get_level_val(level); + if (e_level >= VLL_N_LEVELS) { + char *msg = xasprintf("unknown level \"%s\"", level); + free(s); + return msg; + } + + vlog_set_levels(e_module, e_facility, e_level); + } + free(s); + return NULL; +} + +/* If 'arg' is null, configure maximum verbosity. Otherwise, sets + * configuration according to 'arg' (see vlog_set_levels_from_string()). If + * parsing fails, default to maximum verbosity. */ +void +vlog_set_verbosity(const char *arg) +{ + if (arg == NULL || !vlog_set_levels_from_string(arg)) { + vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_DBG); + } +} + +/* Initializes the logging subsystem. */ +void +vlog_init(void) +{ + openlog(program_name, LOG_NDELAY, LOG_DAEMON); + vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_WARN); +} + +/* Closes the logging subsystem. */ +void +vlog_exit(void) +{ + closelog(); +} + +/* Print the current logging level for each module. */ +char * +vlog_get_levels(void) +{ + struct ds s = DS_EMPTY_INITIALIZER; + enum vlog_module module; + + ds_put_format(&s, " console syslog\n"); + ds_put_format(&s, " ------- ------\n"); + + for (module = 0; module < VLM_N_MODULES; module++) { + ds_put_format(&s, "%-16s %4s %4s\n", + vlog_get_module_name(module), + vlog_get_level_name(vlog_get_level(module, VLF_CONSOLE)), + vlog_get_level_name(vlog_get_level(module, VLF_SYSLOG))); + } + + return ds_cstr(&s); +} + +/* Writes 'message' to the log at the given 'level' and as coming from the + * given 'module'. */ +void +vlog(enum vlog_module module, enum vlog_level level, const char *message, ...) +{ + bool log_console = levels[module][VLF_CONSOLE] >= level; + bool log_syslog = levels[module][VLF_SYSLOG] >= level; + if (log_console || log_syslog) { + static int msg_num; + const char *module_name = vlog_get_module_name(module); + const char *level_name = vlog_get_level_name(level); + va_list args; + char s[1024]; + size_t len; + + len = sprintf(s, "%05d|%s|%s:", ++msg_num, module_name, level_name); + va_start(args, message); + len += vsnprintf(s + len, sizeof s - len, message, args); + va_end(args); + if (len >= sizeof s) { + len = sizeof s; + } + if (s[len - 1] == '\n') { + s[len - 1] = '\0'; + } + + if (log_console) { + fprintf(stderr, "%s\n", s); + } + + if (log_syslog) { + static const int syslog_levels[VLL_N_LEVELS] = { + [VLL_EMER] = LOG_EMERG, + [VLL_ERR] = LOG_ERR, + [VLL_WARN] = LOG_WARNING, + [VLL_DBG] = LOG_DEBUG, + }; + + syslog(syslog_levels[level], "%s", s); + } + } +} diff --git a/man/.gitignore b/man/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/Makefile.am b/man/Makefile.am new file mode 100644 index 00000000..9bc48012 --- /dev/null +++ b/man/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = man8 diff --git a/man/man1/.gitignore b/man/man1/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/man1/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/man8/.gitignore b/man/man8/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/man8/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am new file mode 100644 index 00000000..0f28df8c --- /dev/null +++ b/man/man8/Makefile.am @@ -0,0 +1 @@ +dist_man_MANS = controller.8 dpctl.8 secchan.8 vlogconf.8 diff --git a/man/man8/controller.8 b/man/man8/controller.8 new file mode 100644 index 00000000..4af54807 --- /dev/null +++ b/man/man8/controller.8 @@ -0,0 +1,69 @@ +.TH controller 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +controller \- OpenFlow controller reference implementation + +.SH SYNOPSIS +.B controller +[OPTIONS] ptcp:[\fIPORT\fR] | nl:\fIDP_IDX\fR + +.SH DESCRIPTION +A sample OpenFlow controller which functions as an L2 MAC-learning +switch or hub. \fBcontroller\fR can manage a remote datapath through +a secure channel (see \fBsecchan(8)\fR). It can also connect directly +to a local datapath via Netlink. + +To connect to local datapath number \fIDP_IDX\fR (Linux only), specify +nl:\fIDP_IDX\fR on the command line. To listen for TCP connections +from remote datapaths on port \fIPORT\fR, specify ptcp:[\fIPORT\fR]. +(\fIPORT\fR defaults to 975 if omitted.) + +\fBcontroller\fR can control multiple datapaths. Multiple ptcp: or +nl: arguments may be given. Multiple TCP clients may connect to a +single TCP server port. + +.SH OPTIONS +.TP +.BR \-H ", " \-\^\-hub +By default, the controller acts as an L2 MAC-learning switch. This +option changes its behavior to that of a hub that floods packets on +all ports. + +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-n ", " \-\^\-noflow +This is similar to the \fB\-\^\-hub\fR option, but does not add a +flow entry in the switch. This causes all traffic seen by the switch +to be passed to the controller before being sent out all ports. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH EXAMPLES + +.TP +To connect directly to local datapath 0 over Netlink (Linux only): + +.B % controller nl:0 + +.TP +To bind locally to port 975 (the default) and wait for incoming connections from secure channels: + +.B % controller ptcp: + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR secchan (8) +.BR vlogconf (8) + +.SH BUGS +Currently \fBcontroller\fR does not support SSL. diff --git a/man/man8/dpctl.8 b/man/man8/dpctl.8 new file mode 100644 index 00000000..e030c6d4 --- /dev/null +++ b/man/man8/dpctl.8 @@ -0,0 +1,141 @@ +.TH dpctl 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +dpctl \- command line tool to administer OpenFlow datapaths + +.SH SYNOPSIS +.B dpctl +[OPTIONS] COMMAND [ARGS...] + +.SH DESCRIPTION +The +.B dpctl +program is a command line tool through which OpenFlow datapaths on the +local host can be created, deleted, modified, and monitored. A single +machine may host up to 32 datapaths (numbered 0 to 31). In most +situations, a machine hosts only one datapath. + +A newly created datapath is not associated with any of the +host's network interfaces and thus does not process any incoming +traffic. To intercept and process traffic on a given interface, the +interface must be explicitly added to a datapath through the +\fBaddif\fR command. + +.SH OPTIONS +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH COMMANDS +.TP +.BI adddp " DP_IDX" +Creates datapath numbered \fIDP_IDX\fR on the local host. Will fail +if \fIDP_IDX\fR is not in the range 0 to 31, or if the datapath with +that number already exists on the host. + +.TP +.BI deldp " DP_IDX" +Deletes datapath \fIDP_IDX\fR on the local host. \fIDP_IDX\fR must be +an existing datapath. All of a datapath's interfaces must be +explicitly removed before the datapath can be deleted (see \fBdelif\fR +command). + +.TP +.BI show " DP_IDX" +Prints to the console information on datapath \fIDP_IDX\fR including +information on its flow tables and ports. + +.TP +.BI addif " DP_IDX INTERFACE" +Adds \fIINTERFACE\fR to the list of network interfaces datapath +\fIDP_IDX\fR monitors, where \fIDP_IDX\fR is the ID of an existing +datapath, and \fIINTERFACE\fR is the name of one of the host's +interfaces, e.g. \fBeth0\fR. Once an interface has been added +to a datapath, the datapath has complete ownership of the interface's +traffic and the interface appears silent to the rest of the system. + +.TP +.BI delif " DP_IDX INTERFACE" +Removes \fIINTERFACE\fR from the list of network interfaces datapath +\fIDP_IDX\fR monitors. + +.TP +.BI monitor " DP_IDX" +Prints to the console all OpenFlow packets sent by datapath +\fIDP_IDX\fR to its controller, where \fIDP_IDX\fR is the ID of an +existing datapath. + +.TP +.BI dump-tables " DP_IDX" +Prints to the console statistics for each of the flow tables used by +datapath \fIDP_IDX\fR, where \fIDP_IDX\fR is the ID of an existing +datapath. + +.TP +.BI dump-flows " DP_IDX TABLE_ID" +Prints to the console all flow entries in datapath \fIDP_IDX\fR's table +\fITABLE_ID\fR, where \fIDP_IDX\fR is the ID of an existing datapath, +and \fITABLE_ID\fR is the integer ID of one of the datapath's tables +as displayed in the output produced by \fBdump-tables\fR. + +.SH EXAMPLES + +A typical dpctl command sequence: +.nf +.TP +Create datapath numbered 0: + +.B % dpctl adddp 0 + +.TP +Add two interfaces to the new datapath: + +.B % dpctl addif 0 eth0 +.B % dpctl addif 0 eth1 + +.TP +Monitor traffic received by the datapath (exit with control-C): + +.B % dpctl monitor 0 + + +.TP +View the datapath's table stats after some traffic has passed through: + +.B % dpctl dump-tables 0 + +.TP +View the flow entries in one of the datapath's tables (shown is the command for the table 1). (This assumes that there is running controller adding flows to the flowtables) + +.B % dpctl dump-flows 0 1 + +.TP +Remote interfaces from the datapath when finished: + +.B % dpctl delif 0 eth0 +.B % dpctl delif 0 eth1 + +.TP +Delete the datapath: + +.B % dpctl deldp 0 +.fi +.SH "SEE ALSO" + +.BR secchan (8), +.BR controller (8) +.BR vlogconf (8) + +.SH BUGS + +dump-flows currently only prints the first action of each flow. This is +a shortcoming in the modules netlink flow query functionality and will +be addressed in future releases diff --git a/man/man8/secchan.8 b/man/man8/secchan.8 new file mode 100644 index 00000000..3f783319 --- /dev/null +++ b/man/man8/secchan.8 @@ -0,0 +1,47 @@ +.TH secchan 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +secchan \- secure channel connecting an OpenFlow datapath to a controller + +.SH SYNOPSIS +.B secchan +[OPTIONS] nl:\fIDP_IDX\fR tcp:\fICONTROLLER_IP\fR[:\fICONTROLLER_TCP_PORT\fR] + +.SH DESCRIPTION +The \fBsecchan\fR program sets up a secure channel between a local +OpenFlow datapath and a remote controller. \fBsecchan\fR connects to +the datapath over netlink and to the controller over TCP, and then +proceeds to forward packets from one endpoint to the other. + +\fIDP_IDX\fR \- the ID of the local datapath to connect to + +\fICONTROLLER_IP\fR \- the controller's IP address + +\fICONTROLLER_TCP_PORT\fR \- the controller's TCP port to connect to \- defaults to 975 + +.SH OPTIONS +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-u ", " \-\^\-unreliable +Do not attempt to reconnect the channel if a connection drops. By +default, \fBsecchan\fR attempts to reconnect. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR controller (8) +.BR vlogconf (8) + +.SH BUGS +Currently \fBsecchan\fR does not support SSL diff --git a/man/man8/vlogconf.8 b/man/man8/vlogconf.8 new file mode 100644 index 00000000..720a68cd --- /dev/null +++ b/man/man8/vlogconf.8 @@ -0,0 +1,45 @@ +.TH vlogconf 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +vlogconf \- configuration utility for OpenFlow logging in userspace + +.SH SYNOPSIS +\fBvlogconf\fR [OPTIONS] +.br +\fBvlogconf\fR \fIMODULE\fR \fIFACILITY\fR \fILEVEL\fR + +.SH DESCRIPTION +The \fBvlogconf\fR program configures the logging system used by the +OpenFlow userspace programs. The logging configuration may be modified +while OpenFlow programs are running. + +\fIMODULE\fR \- The module for which the logging level is being +modified. To see a list of supported modules, rerun \fBvlogconf\fR with +the \fI\-print\fR option. + +\fIFACILITY\fR \- The method of logging. Valid values are \fBSYSLOG\fR and +\fBCONSOLE\fR. + +\fILEVEL\fR \- The level with which the module should be logged. Valid +values are \fBDBG\fR (debug), \fBWARN\fR (warning), \fBERR\fR (error), +and \fBEMER\fR (emergency). + +.SH OPTIONS +.TP +\fB\-p\fR, \fB\-\^\-print\fR +Prints the current logging configuration. + +.TP +\fB\-a\fR, \fB\-\^\-all\fR \fIlevel\fR +Sets all modules and facilities to the specified level. + +.TP +\fB\-h\fR, \fB\-\^\-help\fR +Prints a brief help message to the console. + + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR secchan (8), +.BR controller (8) diff --git a/secchan/.gitignore b/secchan/.gitignore new file mode 100644 index 00000000..b3cdd994 --- /dev/null +++ b/secchan/.gitignore @@ -0,0 +1,6 @@ +/Makefile +/Makefile.in +/controller-lite +/ctlpath-lite +/dpctl-lite +/secchan diff --git a/secchan/Makefile.am b/secchan/Makefile.am new file mode 100644 index 00000000..bfb0d262 --- /dev/null +++ b/secchan/Makefile.am @@ -0,0 +1,6 @@ +include ../Make.vars + +bin_PROGRAMS = secchan + +secchan_SOURCES = secchan.c +secchan_LDADD = ../lib/libopenflow.la diff --git a/secchan/secchan.c b/secchan/secchan.c new file mode 100644 index 00000000..b8a4c4c3 --- /dev/null +++ b/secchan/secchan.c @@ -0,0 +1,256 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "command-line.h" +#include "compiler.h" +#include "fault.h" +#include "util.h" +#include "vconn.h" +#include "vlog-socket.h" +#include "openflow.h" + +#include "vlog.h" +#define THIS_MODULE VLM_secchan + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static bool reliable = true; + +struct half { + const char *name; + struct vconn *vconn; + struct pollfd *pollfd; + struct buffer *rxbuf; +}; + +static void reconnect(struct half *); + +int +main(int argc, char *argv[]) +{ + struct half halves[2]; + struct pollfd pollfds[2 + 1]; + struct vlog_server *vlog_server; + int retval; + int i; + + set_program_name(argv[0]); + register_fault_handlers(); + vlog_init(); + parse_options(argc, argv); + + if (argc - optind != 2) { + fatal(0, "exactly two peer arguments required; use --help for usage"); + } + + retval = vlog_server_listen(NULL, &vlog_server); + if (retval) { + fatal(retval, "Could not listen for vlog connections"); + } + + for (i = 0; i < 2; i++) { + halves[i].name = argv[optind + i]; + halves[i].vconn = NULL; + halves[i].pollfd = &pollfds[i]; + halves[i].rxbuf = NULL; + reconnect(&halves[i]); + } + for (;;) { + /* Wait until there's something to do. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + struct half *peer = &halves[!i]; + int want = 0; + if (peer->rxbuf) { + want |= WANT_SEND; + } + if (!this->rxbuf) { + want |= WANT_RECV; + } + this->pollfd->fd = -1; + this->pollfd->events = 0; + vconn_prepoll(this->vconn, want, this->pollfd); + } + if (vlog_server) { + pollfds[2].fd = vlog_server_get_fd(vlog_server); + pollfds[2].events = POLLIN; + } + do { + retval = poll(pollfds, 2 + (vlog_server != NULL), -1); + } while (retval < 0 && errno == EINTR); + if (retval <= 0) { + fatal(retval < 0 ? errno : 0, "poll"); + } + + /* Let each connection deal with any pending operations. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + vconn_postpoll(this->vconn, &this->pollfd->revents); + if (this->pollfd->revents & POLLERR) { + this->pollfd->revents |= POLLIN | POLLOUT; + } + } + if (vlog_server && pollfds[2].revents) { + vlog_server_poll(vlog_server); + } + + /* Do as much work as we can without waiting. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + struct half *peer = &halves[!i]; + + if (this->pollfd->revents & POLLIN && !this->rxbuf) { + retval = vconn_recv(this->vconn, &this->rxbuf); + if (retval && retval != EAGAIN) { + VLOG_DBG("%s: recv: closing connection: %s", + this->name, strerror(retval)); + reconnect(this); + break; + } + } + + if (peer->pollfd->revents & POLLOUT && this->rxbuf) { + retval = vconn_send(peer->vconn, this->rxbuf); + if (!retval) { + this->rxbuf = NULL; + } else if (retval != EAGAIN) { + VLOG_DBG("%s: send: closing connection: %s", + peer->name, strerror(retval)); + reconnect(peer); + break; + } + } + } + } + + return 0; +} + +static void +reconnect(struct half *this) +{ + int backoff; + + if (this->vconn != NULL) { + if (!reliable) { + fatal(0, "%s: connection dropped", this->name); + } + + VLOG_WARN("%s: connection dropped, reconnecting", this->name); + vconn_close(this->vconn); + this->vconn = NULL; + buffer_delete(this->rxbuf); + this->rxbuf = NULL; + } + this->pollfd->revents = POLLIN | POLLOUT; + + for (backoff = 1; ; backoff = MIN(backoff * 2, 60)) { + int retval = vconn_open(this->name, &this->vconn); + if (!retval) { + VLOG_WARN("%s: connected", this->name); + if (vconn_is_passive(this->vconn)) { + fatal(0, "%s: passive vconn not supported in control path", + this->name); + } + return; + } + + if (!reliable) { + fatal(0, "%s: connection failed", this->name); + } + VLOG_WARN("%s: connection failed (%s), reconnecting", + this->name, strerror(errno)); + sleep(backoff); + } +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"unreliable", no_argument, 0, 'u'}, + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'u': + reliable = false; + break; + + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: Secure Channel\n" + "usage: %s [OPTIONS] nl:DP_ID tcp:HOST:[PORT]\n" + "\nConnects to local datapath DP_ID via Netlink and \n" + "controller on HOST via TCP to PORT (default: %d).\n" + "\nNetworking options:\n" + " -u, --unreliable do not reconnect after connections drop\n" + "\nOther options:\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name, OFP_TCP_PORT); + exit(EXIT_SUCCESS); +} diff --git a/third-party/.gitignore b/third-party/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/third-party/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/third-party/Makefile.am b/third-party/Makefile.am new file mode 100644 index 00000000..587a7e8c --- /dev/null +++ b/third-party/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST = README ofp-tcpdump.patch diff --git a/third-party/README b/third-party/README new file mode 100644 index 00000000..2621cdc3 --- /dev/null +++ b/third-party/README @@ -0,0 +1,35 @@ +This directory contains third-party software that may be useful for +debugging. + +tcpdump +------- +The "ofp-tcpdump.patch" patch adds the ability to parse OpenFlow +messages to tcpdump. These instructions assume that tcpdump 3.9.8 +is going to be used, but it should work with other versions that are not +substantially different. To begin, download tcpdump and apply the +patch: + + wget http://www.tcpdump.org/release/tcpdump-3.9.8.tar.gz + tar xzf tcpdump-3.9.8.tar.gz + ln -s tcpdump-3.9.8 tcpdump + patch -p0 < ofp-tcpdump.patch + +Then build the new version of tcpdump: + + cd tcpdump + ./configure + make + +Clearly, tcpdump can only parse unencrypted packets, so you will need to +connect the controller and datapath using plain TCP. To look at the +traffic, tcpdump will be started in a manner similar to the following: + + sudo ./tcpdump -s0 -i eth0 port 975 + +The "-s0" flag indicates that tcpdump should capture the entire packet. +If the OpenFlow message is not received in its entirety, "[|openflow]" will +be printed instead of the OpenFlow message contents. + +The verbosity of the output may be increased by adding additional "-v" +flags. If "-vvv" is used, the raw OpenFlow data is also printed in +hex and ASCII. diff --git a/third-party/ofp-tcpdump.patch b/third-party/ofp-tcpdump.patch new file mode 100644 index 00000000..9ee241cf --- /dev/null +++ b/third-party/ofp-tcpdump.patch @@ -0,0 +1,119 @@ +diff -rNu tcpdump/interface.h tcpdump/interface.h +--- tcpdump/interface.h 2007-06-13 18:03:20.000000000 -0700 ++++ tcpdump/interface.h 2008-02-06 15:06:30.000000000 -0800 +@@ -148,7 +148,8 @@ + + extern const char *dnaddr_string(u_short); + +-extern void error(const char *, ...) ++#define error(fmt, args...) tcpdump_error(fmt, ## args) ++extern void tcpdump_error(const char *, ...) + __attribute__((noreturn, format (printf, 1, 2))); + extern void warning(const char *, ...) __attribute__ ((format (printf, 1, 2))); + +@@ -176,6 +177,7 @@ + extern void hex_print_with_offset(const char *, const u_char *, u_int, u_int); + extern void hex_print(const char *, const u_char *, u_int); + extern void telnet_print(const u_char *, u_int); ++extern void openflow_print(const u_char *, u_int); + extern int ether_encap_print(u_short, const u_char *, u_int, u_int, u_short *); + extern int llc_print(const u_char *, u_int, u_int, const u_char *, + const u_char *, u_short *); +diff -rNu tcpdump/Makefile.in tcpdump/Makefile.in +--- tcpdump/Makefile.in 2007-09-25 18:59:52.000000000 -0700 ++++ tcpdump/Makefile.in 2008-02-07 11:46:03.000000000 -0800 +@@ -49,10 +49,10 @@ + CFLAGS = $(CCOPT) $(DEFS) $(INCLS) + + # Standard LDFLAGS +-LDFLAGS = @LDFLAGS@ ++LDFLAGS = @LDFLAGS@ -L../../lib/.libs + + # Standard LIBS +-LIBS = @LIBS@ ++LIBS = @LIBS@ -lopenflow + + INSTALL = @INSTALL@ + INSTALL_PROGRAM = @INSTALL_PROGRAM@ +@@ -87,7 +87,8 @@ + print-slow.c print-snmp.c print-stp.c print-sunatm.c print-sunrpc.c \ + print-symantec.c print-syslog.c print-tcp.c print-telnet.c print-tftp.c \ + print-timed.c print-token.c print-udp.c print-vjc.c print-vrrp.c \ +- print-wb.c print-zephyr.c setsignal.c tcpdump.c util.c ++ print-wb.c print-zephyr.c setsignal.c tcpdump.c util.c \ ++ print-openflow.c + + LOCALSRC = @LOCALSRC@ + GENSRC = version.c +diff -rNu tcpdump/print-openflow.c tcpdump/print-openflow.c +--- tcpdump/print-openflow.c 1969-12-31 16:00:00.000000000 -0800 ++++ tcpdump/print-openflow.c 2008-02-07 11:29:01.000000000 -0800 +@@ -0,0 +1,46 @@ ++/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include ++ ++#include "interface.h" ++#include "../../include/openflow.h" ++#include "../../include/ofp-print.h" ++ ++void ++openflow_print(const u_char *sp, u_int length) ++{ ++ const struct ofp_header *ofp = (struct ofp_header *)sp; ++ ++ if (!TTEST2(*sp, ntohs(ofp->length))) ++ goto trunc; ++ ++ ofp_print(stdout, sp, length, vflag); ++ return; ++ ++trunc: ++ printf("[|openflow]"); ++} +diff -rNu tcpdump/print-tcp.c tcpdump/print-tcp.c +--- tcpdump/print-tcp.c 2006-09-19 12:07:57.000000000 -0700 ++++ tcpdump/print-tcp.c 2008-02-07 13:07:58.000000000 -0800 +@@ -52,6 +52,8 @@ + + #include "nameser.h" + ++#include "../../include/openflow.h" ++ + #ifdef HAVE_LIBCRYPTO + #include + +@@ -680,7 +682,8 @@ + } + else if (length > 0 && (sport == LDP_PORT || dport == LDP_PORT)) { + ldp_print(bp, length); +- } ++ } else if (sport == OFP_TCP_PORT || dport == OFP_TCP_PORT) ++ openflow_print(bp, length); + } + return; + bad: diff --git a/utilities/.gitignore b/utilities/.gitignore new file mode 100644 index 00000000..b2c322f2 --- /dev/null +++ b/utilities/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/dpctl +/vlogconf diff --git a/utilities/Makefile.am b/utilities/Makefile.am new file mode 100644 index 00000000..8367bac9 --- /dev/null +++ b/utilities/Makefile.am @@ -0,0 +1,12 @@ +include ../Make.vars + +bin_PROGRAMS = vlogconf +if HAVE_NETLINK +bin_PROGRAMS += dpctl +endif + +dpctl_SOURCES = dpctl.c +dpctl_LDADD = ../lib/libopenflow.la + +vlogconf_SOURCES = vlogconf.c +vlogconf_LDADD = ../lib/libopenflow.la diff --git a/utilities/dpctl.c b/utilities/dpctl.c new file mode 100644 index 00000000..1bf36996 --- /dev/null +++ b/utilities/dpctl.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "compiler.h" +#include "buffer.h" +#include "dpif.h" +#ifdef HAVE_NETLINK +#include "netlink.h" +#include "openflow-netlink.h" +#endif +#include "util.h" +#include "socket-util.h" +#include "openflow.h" +#include "ofp-print.h" +#include "vconn.h" + +#include "vlog.h" +#define THIS_MODULE VLM_DPCTL + +static const char* ifconfigbin = "/sbin/ifconfig"; + +struct command { + const char *name; + int min_args; + int max_args; + void (*handler)(int argc, char *argv[]); +}; + +static struct command all_commands[]; + +static void usage(void) NO_RETURN; +static void parse_options(int argc, char *argv[]); + +int main(int argc, char *argv[]) +{ + struct command *p; + + set_program_name(argv[0]); + vlog_init(); + parse_options(argc, argv); + + argc -= optind; + argv += optind; + if (argc < 1) + fatal(0, "missing command name; use --help for help"); + + for (p = all_commands; p->name != NULL; p++) { + if (!strcmp(p->name, argv[0])) { + int n_arg = argc - 1; + if (n_arg < p->min_args) + fatal(0, "'%s' command requires at least %d arguments", + p->name, p->min_args); + else if (n_arg > p->max_args) + fatal(0, "'%s' command takes at most %d arguments", + p->name, p->max_args); + else { + p->handler(argc, argv); + exit(0); + } + } + } + fatal(0, "unknown command '%s'; use --help for help", argv[0]); + + return 0; +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: Datapath Utility\n" + "usage: %s [OPTIONS] COMMAND [ARG...]\n" + "\nAvailable commands:\n" + " adddp DP_ID add a new datapath with ID DP_ID\n" + " deldp DP_ID delete datapath DP_ID\n" + " show DP show information about DP\n" + " addif DP_ID IFACE add IFACE as a port on DP_ID\n" + " delif DP_ID IFACE delete IFACE as a port on DP_ID\n" + " monitor DP_ID print packets received on DP_ID\n" + " dump-tables DP_ID print stats for all tables in DP_ID\n" + " dump-flows DP_ID T_ID print all flow entries in table T_ID of DP_ID\n" + " dump-flows DP_ID T_ID FLOW print matching FLOWs in table T_ID of DP_ID\n" + " add-flows DP FILE add flows from FILE to DP\n" + " benchmark-nl DP_ID N SIZE send N packets of SIZE bytes up netlink\n" + "\nOptions:\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name); + exit(EXIT_SUCCESS); +} + +static void run(int retval, const char *name) +{ + if (retval) { + fatal(retval, "%s", name); + } +} + +static int if_up(const char* intf) +{ + char command[256]; + snprintf(command, sizeof command, "%s %s up &> /dev/null", + ifconfigbin, intf); + return system(command); +} + +static void do_add_dp(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_add_dp(&dp), "add_dp"); + dpif_close(&dp); +} + +static void do_del_dp(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_del_dp(&dp), "del_dp"); + dpif_close(&dp); +} + +static void do_show(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_show(&dp), "show"); + dpif_close(&dp); +} + +static void do_add_port(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + if_up(argv[2]); + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_add_port(&dp, argv[2]), "add_port"); + dpif_close(&dp); +} + +static void do_del_port(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_del_port(&dp, argv[2]), "del_port"); + dpif_close(&dp); +} + +#define BENCHMARK_INCR 100 + +static void do_benchmark_nl(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + uint32_t num_packets, i, milestone; + struct timeval start, end; + + run(dpif_open(atoi(argv[1]), true, &dp), "dpif_open"); + num_packets = atoi(argv[2]); + milestone = BENCHMARK_INCR; + run(dpif_benchmark_nl(&dp, num_packets, atoi(argv[3])), "benchmark_nl"); + if (gettimeofday(&start, NULL) == -1) { + run(errno, "gettimeofday"); + } + for (i = 0; i < num_packets;i++) { + struct buffer *b; + run(dpif_recv_openflow(&dp, &b, true), "dpif_recv_openflow"); + if (i == milestone) { + gettimeofday(&end, NULL); + printf("%u packets received in %f ms\n", + BENCHMARK_INCR, + (1000*(double)(end.tv_sec - start.tv_sec)) + + (.001*(end.tv_usec - start.tv_usec))); + milestone += BENCHMARK_INCR; + start = end; + } + buffer_delete(b); + } + gettimeofday(&end, NULL); + printf("%u packets received in %f ms\n", + i - (milestone - BENCHMARK_INCR), + (1000*(double)(end.tv_sec - start.tv_sec)) + + (.001*(end.tv_usec - start.tv_usec))); + + dpif_close(&dp); +} + +static void do_monitor(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), true, &dp), "dpif_open"); + for (;;) { + struct buffer *b; + run(dpif_recv_openflow(&dp, &b, true), "dpif_recv_openflow"); + ofp_print(stderr, b->data, b->size, 2); + buffer_delete(b); + } +} + +static void do_dump_tables(int argc, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_dump_tables(&dp), "dump_tables"); + dpif_close(&dp); +} + + +static uint32_t +str_to_int(const char *str) +{ + uint32_t value; + if (sscanf(str, "%"SCNu32, &value) != 1) { + fatal(0, "invalid numeric format %s", str); + } + return value; +} + +static void +str_to_mac(const char *str, uint8_t mac[6]) +{ + if (sscanf(str, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8, + &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) { + fatal(0, "invalid mac address %s", str); + } +} + +static void +str_to_ip(const char *str, uint32_t *ip) +{ + struct in_addr in_addr; + int retval; + + retval = lookup_ip(str, &in_addr); + if (retval) { + fatal(0, "%s: could not convert to IP address", str); + } + *ip = in_addr.s_addr; +} + +static void +str_to_action(const char *str, struct ofp_action *action) +{ + uint16_t port; + + if (!strcasecmp(str, "flood")) { + port = OFPP_FLOOD; + } else if (!strcasecmp(str, "controller")) { + port = OFPP_CONTROLLER; + } else { + port = str_to_int(str); + } + + memset(action, 0, sizeof *action); + action->type = OFPAT_OUTPUT; + action->arg.output.port = htons(port); +} + +static void +str_to_flow(char *string, struct ofp_match *match, struct ofp_action *action) +{ + struct field { + const char *name; + uint32_t wildcard; + enum { F_U8, F_U16, F_MAC, F_IP } type; + size_t offset; + }; + +#define F_OFS(MEMBER) offsetof(struct ofp_match, MEMBER) + static const struct field fields[] = { + { "in_port", OFPFW_IN_PORT, F_U16, F_OFS(in_port) }, + { "dl_vlan", OFPFW_DL_VLAN, F_U16, F_OFS(dl_vlan) }, + { "dl_src", OFPFW_DL_SRC, F_MAC, F_OFS(dl_src) }, + { "dl_dst", OFPFW_DL_DST, F_MAC, F_OFS(dl_dst) }, + { "dl_type", OFPFW_DL_TYPE, F_U16, F_OFS(dl_type) }, + { "nw_src", OFPFW_NW_SRC, F_IP, F_OFS(nw_src) }, + { "nw_dst", OFPFW_NW_DST, F_IP, F_OFS(nw_dst) }, + { "nw_proto", OFPFW_NW_PROTO, F_U8, F_OFS(nw_proto) }, + { "tp_src", OFPFW_TP_SRC, F_U16, F_OFS(tp_src) }, + { "tp_dst", OFPFW_TP_DST, F_U16, F_OFS(tp_dst) }, + }; + + char *name, *value; + uint32_t wildcards; + bool got_action = false; + + memset(match, 0, sizeof *match); + wildcards = OFPFW_ALL; + for (name = strtok(string, "="), value = strtok(NULL, " \t\n"); + name && value; + name = strtok(NULL, "="), value = strtok(NULL, " \t\n")) + { + const struct field *f; + void *data; + + if (action && !strcmp(name, "action")) { + got_action = true; + str_to_action(value, action); + continue; + } + + for (f = fields; f < &fields[ARRAY_SIZE(fields)]; f++) { + if (!strcmp(f->name, name)) { + goto found; + } + } + fprintf(stderr, "%s: unknown field %s (fields are", + program_name, name); + for (f = fields; f < &fields[ARRAY_SIZE(fields)]; f++) { + if (f != fields) { + putc(',', stderr); + } + fprintf(stderr, " %s", f->name); + } + fprintf(stderr, ")\n"); + exit(1); + + found: + data = (char *) match + f->offset; + if (!strcmp(value, "*")) { + wildcards |= f->wildcard; + } else { + wildcards &= ~f->wildcard; + if (f->type == F_U8) { + *(uint8_t *) data = str_to_int(value); + } else if (f->type == F_U16) { + *(uint16_t *) data = htons(str_to_int(value)); + } else if (f->type == F_MAC) { + str_to_mac(value, data); + } else if (f->type == F_IP) { + str_to_ip(value, data); + } else { + NOT_REACHED(); + } + } + } + if (name && !value) { + fatal(0, "field %s missing value", name); + } + if (action && !got_action) { + fatal(0, "must specify an action"); + } + match->wildcards = htons(wildcards); +} + +static void do_dump_flows(int argc, char *argv[]) +{ + struct dpif dp; + struct ofp_match match, *matchp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + if (argc == 4) { + str_to_flow(argv[3], &match, NULL); + matchp = &match; + } else { + matchp = NULL; + } + run(dpif_dump_flows(&dp, atoi(argv[2]), matchp), "dump_flows"); + dpif_close(&dp); +} + +static void do_add_flows(int argc, char *argv[]) +{ + struct vconn *vconn; + char vconn_name[16]; + + FILE *file; + char line[1024]; + + int retval; + + file = fopen(argv[2], "r"); + if (file == NULL) { + fatal(errno, "%s: open", argv[2]); + } + + sprintf(vconn_name, "nl:%d", atoi(argv[1])); + retval = vconn_open(vconn_name, &vconn); + if (retval) { + fatal(retval, "opening datapath"); + } + + while (fgets(line, sizeof line, file)) { + struct buffer *buffer; + struct ofp_flow_mod *ofm; + size_t size; + + char *comment; + + /* Delete comments. */ + comment = strchr(line, '#'); + if (comment) { + *comment = '\0'; + } + + /* Drop empty lines. */ + if (line[strspn(line, " \t\n")] == '\0') { + continue; + } + + size = sizeof *ofm + sizeof ofm->actions[0]; + buffer = buffer_new(size); + ofm = buffer_put_uninit(buffer, size); + + /* Parse. */ + memset(ofm, 0, size); + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.version = OFP_VERSION; + ofm->header.length = htons(size); + ofm->command = htons(OFPFC_ADD); + ofm->max_idle = htons(50); + ofm->buffer_id = htonl(UINT32_MAX); + ofm->group_id = htonl(0); + str_to_flow(line, &ofm->match, &ofm->actions[0]); + + retval = vconn_send_wait(vconn, buffer); + if (retval) { + fatal(retval, "sending to datapath"); + } + } + vconn_close(vconn); + fclose(file); +} + +static void do_help(int argc UNUSED, char *argv[] UNUSED) +{ + usage(); +} + +static struct command all_commands[] = { + { "add-dp", 1, 1, do_add_dp }, + { "adddp", 1, 1, do_add_dp }, + + { "del-dp", 1, 1, do_del_dp }, + { "deldp", 1, 1, do_del_dp }, + + { "show", 1, 1, do_show }, + + { "add-port", 2, 2, do_add_port }, + { "addif", 2, 2, do_add_port }, + + { "del-port", 2, 2, do_del_port }, + { "delif", 2, 2, do_del_port }, + + { "help", 0, INT_MAX, do_help }, + { "monitor", 1, 1, do_monitor }, + { "dump-tables", 1, 1, do_dump_tables }, + { "dump-flows", 2, 3, do_dump_flows }, + { "add-flows", 2, 2, do_add_flows }, + + { "benchmark-nl", 3, 3, do_benchmark_nl }, +}; diff --git a/utilities/vlogconf.c b/utilities/vlogconf.c new file mode 100644 index 00000000..6ffe99de --- /dev/null +++ b/utilities/vlogconf.c @@ -0,0 +1,185 @@ +#include "vlog.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "compiler.h" +#include "util.h" +#include "vlog-socket.h" + +void +usage(char *prog_name, int exit_code) +{ + printf("Usage: %s [TARGET] [ACTION...]\n" + "Targets:\n" + " -a, --all Apply to all targets (default)\n" + " -t, --target=TARGET Specify target program, as a pid or an\n" + " absolute path to a Unix domain socket\n" + "Actions:\n" + " -l, --list List current settings\n" + " -s, --set=MODULE:FACILITY:LEVEL\n" + " Set MODULE and FACILITY log level to LEVEL\n" + " MODULE may be any valid module name or 'ANY'\n" + " FACILITY may be 'syslog' or 'console' or 'ANY'\n" + " LEVEL may be 'emer', 'err', 'warn', or 'dbg'\n" + " -h, --help Print this helpful information\n", + prog_name); + exit(exit_code); +} + +static char * +transact(struct vlog_client *client, const char *request, bool *ok) +{ + char *reply; + int error = vlog_client_transact(client, request, &reply); + if (error) { + fprintf(stderr, "%s: transaction error: %s\n", + vlog_client_target(client), strerror(error)); + *ok = false; + } + return reply ? reply : xstrdup(""); +} + +static void +transact_ack(struct vlog_client *client, const char* request, bool *ok) +{ + char *reply; + int error = vlog_client_transact(client, request, &reply); + if (error) { + fprintf(stderr, "%s: transaction error: %s\n", + vlog_client_target(client), strerror(error)); + *ok = false; + } else if (strcmp(reply, "ack")) { + fprintf(stderr, "Received unexpected reply from %s: %s\n", + vlog_client_target(client), reply); + *ok = false; + } + free(reply); +} + +static void +add_target(struct vlog_client ***clients, size_t *n_clients, + const char *path, bool *ok) +{ + struct vlog_client *client; + int error = vlog_client_connect(path, &client); + if (error) { + fprintf(stderr, "Error connecting to \"%s\": %s\n", + path, strerror(error)); + *ok = false; + } else { + *clients = xrealloc(*clients, sizeof *clients * (*n_clients + 1)); + (*clients)[*n_clients] = client; + ++*n_clients; + } +} + +static void +add_all_targets(struct vlog_client ***clients, size_t *n_clients, bool *ok) +{ + DIR *directory; + struct dirent* de; + + directory = opendir("/tmp"); + if (!directory) { + fprintf(stderr, "/tmp: opendir: %s\n", strerror(errno)); + } + + while ((de = readdir(directory)) != NULL) { + if (!strncmp(de->d_name, "vlogs.", 5)) { + char *path = xasprintf("/tmp/%s", de->d_name); + add_target(clients, n_clients, path, ok); + free(path); + } + } + + closedir(directory); +} + +int main(int argc, char *argv[]) +{ + static const struct option long_options[] = { + /* Target options must come first. */ + {"all", no_argument, NULL, 'a'}, + {"target", required_argument, NULL, 't'}, + {"help", no_argument, NULL, 'h'}, + + /* Action options come afterward. */ + {"list", no_argument, NULL, 'l'}, + {"set", required_argument, NULL, 's'}, + {0, 0, 0, 0}, + }; + char *short_options; + + /* Determine targets. */ + bool ok = true; + int n_actions = 0; + struct vlog_client **clients = NULL; + size_t n_clients = 0; + + set_program_name(argv[0]); + + short_options = long_options_to_short_options(long_options); + for (;;) { + int option; + size_t i; + + option = getopt_long(argc, argv, short_options, long_options, NULL); + if (option == -1) { + break; + } + if (!strchr("ath", option) && n_clients == 0) { + fatal(0, "no targets specified (use --help for help)"); + } else { + ++n_actions; + } + switch (option) { + case 'a': + add_all_targets(&clients, &n_clients, &ok); + break; + + case 't': + add_target(&clients, &n_clients, optarg, &ok); + break; + + case 'l': + for (i = 0; i < n_clients; i++) { + struct vlog_client *client = clients[i]; + char *reply; + + printf("%s:\n", vlog_client_target(client)); + reply = transact(client, "list", &ok); + fputs(reply, stdout); + free(reply); + } + break; + + case 's': + for (i = 0; i < n_clients; i++) { + struct vlog_client *client = clients[i]; + char *request = xasprintf("set %s", optarg); + transact_ack(client, request, &ok); + free(request); + } + break; + + case 'h': + usage(argv[0], EXIT_SUCCESS); + break; + + default: + NOT_REACHED(); + } + } + if (!n_actions) { + fprintf(stderr, + "warning: no actions specified (use --help for help)\n"); + } + exit(ok ? 0 : 1); +} -- 2.30.2