From bc45868c8d8f59392999b2f0aec6f9f54d5cfadc Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 2 Mar 2009 13:42:44 -0800 Subject: [PATCH] Refactor the OpenFlow implementation. This new implementation has an architecture that is much more suited to eventually getting pushed upstream into the Linux kernel, because it does not do any OpenFlow processing in the kernel. Rather, we define a new "datapath protocol" that secchan uses, via ioctl calls, to set up the flow table in the kernel. This implementation also should have much better performance with flows that contain wildcards, since it uses a flow classifier that should be much better than linear search in the cases that we suspect are important. This release does contain some feature regressions; see the new file MISSING at the root of the tree for more information. We will be fixing these regressions over the next weeks and months. This has not been tested much. It needs plenty of testing and QA before it will be suitable for any kind of production environment. The vswitchd changes, in particular, have not been tested at all and thus vswitchd is likely to be broken. --- MISSING | 21 + Makefile.am | 1 - datapath/Modules.mk | 22 +- datapath/actions.c | 443 ++++ datapath/actions.h | 17 + datapath/brcompat.c | 16 +- datapath/chain.c | 212 -- datapath/chain.h | 39 - datapath/crc32.c | 40 - datapath/crc32.h | 22 - datapath/datapath.c | 2612 +++++++++------------- datapath/datapath.h | 122 +- datapath/dp_act.c | 515 ----- datapath/dp_act.h | 15 - datapath/dp_dev.c | 10 +- datapath/dp_notify.c | 23 +- datapath/flow.c | 325 +-- datapath/flow.h | 185 +- datapath/forward.c | 571 ----- datapath/forward.h | 35 - datapath/hwtable_dummy/hwtable_dummy.c | 10 +- datapath/nx_act.c | 46 - datapath/nx_act.h | 14 - datapath/nx_act_snat.h | 44 - datapath/nx_msg.c | 48 - datapath/nx_msg.h | 7 - datapath/{nx_act_snat.c => snat.c} | 171 +- datapath/snat.h | 23 + datapath/table-hash.c | 450 ---- datapath/table-linear.c | 217 -- datapath/table.c | 231 ++ datapath/table.h | 113 - include/openflow/datapath-protocol.h | 320 +++ include/openflow/nicira-ext.h | 45 +- include/openflow/openflow-netlink.h | 5 +- lib/automake.mk | 3 +- lib/classifier.c | 34 +- lib/classifier.h | 13 +- lib/dhcp-client.c | 2 +- lib/dpif.c | 1075 ++++++--- lib/dpif.h | 83 +- lib/flow.c | 14 +- lib/flow.h | 45 +- lib/learning-switch.c | 6 +- lib/mac-learning.c | 13 +- lib/mac-learning.h | 12 +- lib/ofp-print.c | 89 +- lib/vconn-netlink.c | 157 -- lib/vconn-provider.h | 5 +- lib/vconn.c | 16 +- lib/vconn.h | 11 +- lib/vlog-modules.def | 6 +- m4/libopenflow.m4 | 3 +- secchan/automake.mk | 31 +- secchan/discovery.c | 112 +- secchan/discovery.h | 10 +- secchan/executer.c | 112 +- secchan/executer.h | 14 +- secchan/fail-open.c | 160 +- secchan/fail-open.h | 21 +- secchan/flow-end.c | 379 ---- secchan/flow-end.h | 45 - secchan/in-band.c | 264 +-- secchan/in-band.h | 17 +- secchan/netflow.c | 313 +++ secchan/netflow.h | 63 +- secchan/ofproto.c | 2418 ++++++++++++++++++++ secchan/{ratelimit.h => ofproto.h} | 28 +- secchan/{ratelimit.c => pinsched.c} | 220 +- udatapath/dp_act.h => secchan/pinsched.h | 33 +- secchan/pktbuf.c | 157 ++ secchan/{stp-secchan.h => pktbuf.h} | 25 +- secchan/port-watcher.c | 621 ----- secchan/port-watcher.h | 77 - secchan/secchan.8.in | 24 - secchan/secchan.c | 579 +---- secchan/secchan.h | 63 +- secchan/snat.c | 294 --- secchan/snat.h | 44 - secchan/status.c | 50 +- secchan/status.h | 15 +- secchan/stp-secchan.c | 294 --- tests/test-classifier.c | 8 +- tests/test-flows.c | 2 +- udatapath/.gitignore | 4 - udatapath/automake.mk | 27 - udatapath/chain.c | 192 -- udatapath/chain.h | 70 - udatapath/crc32.c | 68 - udatapath/crc32.h | 50 - udatapath/datapath.c | 1668 -------------- udatapath/datapath.h | 109 - udatapath/dp_act.c | 478 ---- udatapath/nx_act.c | 55 - udatapath/nx_act.h | 48 - udatapath/nx_msg.c | 58 - udatapath/nx_msg.h | 43 - udatapath/switch-flow.c | 287 --- udatapath/switch-flow.h | 97 - udatapath/table-hash.c | 430 ---- udatapath/table-linear.c | 245 -- udatapath/table.h | 144 -- udatapath/udatapath.8.in | 140 -- udatapath/udatapath.c | 308 --- utilities/automake.mk | 4 + utilities/dpctl.c | 250 ++- utilities/nlmon.c | 90 + vswitchd/brcompat.c | 7 - vswitchd/bridge.c | 257 +-- vswitchd/flowtrack.c | 4 +- vswitchd/flowtrack.h | 6 +- 111 files changed, 7173 insertions(+), 13036 deletions(-) create mode 100644 MISSING create mode 100644 datapath/actions.c create mode 100644 datapath/actions.h delete mode 100644 datapath/chain.c delete mode 100644 datapath/chain.h delete mode 100644 datapath/crc32.c delete mode 100644 datapath/crc32.h delete mode 100644 datapath/dp_act.c delete mode 100644 datapath/dp_act.h delete mode 100644 datapath/forward.c delete mode 100644 datapath/forward.h delete mode 100644 datapath/nx_act.c delete mode 100644 datapath/nx_act.h delete mode 100644 datapath/nx_act_snat.h delete mode 100644 datapath/nx_msg.c delete mode 100644 datapath/nx_msg.h rename datapath/{nx_act_snat.c => snat.c} (84%) create mode 100644 datapath/snat.h delete mode 100644 datapath/table-hash.c delete mode 100644 datapath/table-linear.c create mode 100644 datapath/table.c delete mode 100644 datapath/table.h create mode 100644 include/openflow/datapath-protocol.h delete mode 100644 lib/vconn-netlink.c delete mode 100644 secchan/flow-end.c delete mode 100644 secchan/flow-end.h create mode 100644 secchan/netflow.c create mode 100644 secchan/ofproto.c rename secchan/{ratelimit.h => ofproto.h} (68%) rename secchan/{ratelimit.c => pinsched.c} (52%) rename udatapath/dp_act.h => secchan/pinsched.h (75%) create mode 100644 secchan/pktbuf.c rename secchan/{stp-secchan.h => pktbuf.h} (79%) delete mode 100644 secchan/port-watcher.c delete mode 100644 secchan/port-watcher.h delete mode 100644 secchan/snat.c delete mode 100644 secchan/snat.h delete mode 100644 secchan/stp-secchan.c delete mode 100644 udatapath/.gitignore delete mode 100644 udatapath/automake.mk delete mode 100644 udatapath/chain.c delete mode 100644 udatapath/chain.h delete mode 100644 udatapath/crc32.c delete mode 100644 udatapath/crc32.h delete mode 100644 udatapath/datapath.c delete mode 100644 udatapath/datapath.h delete mode 100644 udatapath/dp_act.c delete mode 100644 udatapath/nx_act.c delete mode 100644 udatapath/nx_act.h delete mode 100644 udatapath/nx_msg.c delete mode 100644 udatapath/nx_msg.h delete mode 100644 udatapath/switch-flow.c delete mode 100644 udatapath/switch-flow.h delete mode 100644 udatapath/table-hash.c delete mode 100644 udatapath/table-linear.c delete mode 100644 udatapath/table.h delete mode 100644 udatapath/udatapath.8.in delete mode 100644 udatapath/udatapath.c create mode 100644 utilities/nlmon.c diff --git a/MISSING b/MISSING new file mode 100644 index 00000000..ffe5160a --- /dev/null +++ b/MISSING @@ -0,0 +1,21 @@ +The following features are temporarily missing, pending time to +reimplement them with the new architecture: + +- Hardware table support in the kernel datapath. + +- STP support in secchan (note that this is distinct from STP support + in vswitchd). + +- The OFPPC_NO_RECV, OFPPC_NO_RECV_STP, and OFPPC_NO_FWD bits in port + configurations. + +- The OFPP_TABLE action. + +- SNAT support in secchan (but SNAT is still supported in the kernel + datapath). + +- udatapath. + +- vswitchd (this is our top priority). + +- A lot of the manpages and documentation need to be updated. diff --git a/Makefile.am b/Makefile.am index c62980f1..d276604d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -62,7 +62,6 @@ include lib/automake.mk include secchan/automake.mk include controller/automake.mk include utilities/automake.mk -include udatapath/automake.mk include tests/automake.mk include include/automake.mk include third-party/automake.mk diff --git a/datapath/Modules.mk b/datapath/Modules.mk index ab1769c3..2e320c11 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -10,33 +10,21 @@ build_modules = $(both_modules) # Modules to build dist_modules = $(both_modules) # Modules to distribute openflow_sources = \ - chain.c \ - crc32.c \ + actions.c \ datapath.c \ - dp_act.c \ dp_dev.c \ dp_notify.c \ flow.c \ - forward.c \ - nx_act.c \ - nx_act_snat.c \ - nx_msg.c \ - table-hash.c \ - table-linear.c + snat.c \ + table.c openflow_headers = \ - chain.h \ + actions.h \ compat.h \ - crc32.h \ datapath.h \ dp_dev.h \ flow.h \ - forward.h \ - dp_act.h \ - nx_act.h \ - nx_act_snat.h \ - nx_msg.h \ - table.h + snat.h dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) diff --git a/datapath/actions.c b/datapath/actions.c new file mode 100644 index 00000000..717c0582 --- /dev/null +++ b/datapath/actions.c @@ -0,0 +1,443 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland + * Stanford Junior University + */ + +/* Functions for executing OpenFlow actions. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "datapath.h" +#include "dp_dev.h" +#include "actions.h" +#include "openflow/datapath-protocol.h" +#include "snat.h" + +struct sk_buff * +make_writable(struct sk_buff *skb, gfp_t gfp) +{ + if (skb_shared(skb) || skb_cloned(skb)) { + struct sk_buff *nskb = skb_copy(skb, gfp); + if (nskb) { + kfree_skb(skb); + return nskb; + } + } else { + unsigned int hdr_len = (skb_transport_offset(skb) + + sizeof(struct tcphdr)); + if (pskb_may_pull(skb, min(hdr_len, skb->len))) + return skb; + } + kfree_skb(skb); + return NULL; +} + + +static struct sk_buff * +vlan_pull_tag(struct sk_buff *skb) +{ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + struct ethhdr *eh; + + + /* Verify we were given a vlan packet */ + if (vh->h_vlan_proto != htons(ETH_P_8021Q)) + return skb; + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); + + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); + + skb->protocol = eh->h_proto; + skb->mac_header += VLAN_HLEN; + + return skb; +} + + +static struct sk_buff * +modify_vlan_tci(struct sk_buff *skb, struct odp_flow_key *key, + u16 tci, u16 mask) +{ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + + if (key->dl_vlan != htons(ODP_VLAN_NONE)) { + /* Modify vlan id, but maintain other TCI values */ + vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci); + } else { + /* Add vlan header */ + + /* xxx The vlan_put_tag function, doesn't seem to work + * xxx reliably when it attempts to use the hardware-accelerated + * xxx version. We'll directly use the software version + * xxx until the problem can be diagnosed. + */ + skb = __vlan_put_tag(skb, tci); + if (!skb) + return NULL; + vh = vlan_eth_hdr(skb); + } + key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK); + + return skb; +} + +static struct sk_buff *set_vlan_vid(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_vlan_vid *a, + gfp_t gfp) +{ + u16 tci = ntohs(a->vlan_vid); + skb = make_writable(skb, gfp); + if (skb) + skb = modify_vlan_tci(skb, key, tci, VLAN_VID_MASK); + return skb; +} + +/* Mask for the priority bits in a vlan header. The kernel doesn't + * define this like it does for VID. */ +#define VLAN_PCP_MASK 0xe000 + +static struct sk_buff *set_vlan_pcp(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_vlan_pcp *a, + gfp_t gfp) +{ + u16 tci = a->vlan_pcp << 13; + skb = make_writable(skb, gfp); + if (skb) + skb = modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK); + return skb; +} + +static struct sk_buff *strip_vlan(struct sk_buff *skb, + struct odp_flow_key *key, gfp_t gfp) +{ + skb = make_writable(skb, gfp); + if (skb) { + vlan_pull_tag(skb); + key->dl_vlan = htons(ODP_VLAN_NONE); + } + return skb; +} + +static struct sk_buff *set_dl_addr(struct sk_buff *skb, + const struct odp_action_dl_addr *a, + gfp_t gfp) +{ + skb = make_writable(skb, gfp); + if (skb) { + struct ethhdr *eh = eth_hdr(skb); + memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest, + a->dl_addr, ETH_ALEN); + } + return skb; +} + +/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field + * covered by the sum has been changed from 'from' to 'to'. If set, + * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. + * Based on nf_proto_csum_replace4. */ +static void update_csum(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) +{ + __be32 diff[] = { ~from, to }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + csum_unfold(*sum))); +} + +static struct sk_buff *set_nw_addr(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_nw_addr *a, + gfp_t gfp) +{ + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + skb = make_writable(skb, gfp); + if (skb) { + struct iphdr *nh = ip_hdr(skb); + u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr; + u32 old = *f; + u32 new = a->nw_addr; + + if (key->nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + update_csum(&th->check, skb, old, new, 1); + } else if (key->nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + update_csum(&th->check, skb, old, new, 1); + } + update_csum(&nh->check, skb, old, new, 0); + *f = new; + } + return skb; +} + +static struct sk_buff * +set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, + const struct odp_action_tp_port *a, + gfp_t gfp) +{ + int check_ofs; + + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + if (key->nw_proto == IPPROTO_TCP) + check_ofs = offsetof(struct tcphdr, check); + else if (key->nw_proto == IPPROTO_UDP) + check_ofs = offsetof(struct udphdr, check); + else + return skb; + + skb = make_writable(skb, gfp); + if (skb) { + struct udphdr *th = udp_hdr(skb); + u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest; + u16 old = *f; + u16 new = a->tp_port; + update_csum((u16*)((u8*)skb->data + check_ofs), + skb, old, new, 1); + *f = new; + } + return skb; +} + +static inline unsigned packet_length(const struct sk_buff *skb) +{ + unsigned length = skb->len - ETH_HLEN; + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + return length; +} + +#ifdef SUPPORT_SNAT +static int +dp_xmit_skb_finish(struct sk_buff *skb) +{ + /* Copy back the Ethernet header that was stowed earlier. */ + if (skb->protocol == htons(ETH_P_IP) && snat_copy_header(skb)) { + kfree_skb(skb); + return -EINVAL; + } + skb_reset_mac_header(skb); + + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { + printk("dropped over-mtu packet: %d > %d\n", + packet_length(skb), skb->dev->mtu); + kfree_skb(skb); + return -E2BIG; + } + + skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); + + return 0; +} + +int dp_xmit_skb(struct sk_buff *skb) +{ + int len = skb->len; + int err; + + skb_pull(skb, ETH_HLEN); + + /* The ip_fragment function does not copy the Ethernet header into + * the newly generated frames, so stow the original. */ + if (skb->protocol == htons(ETH_P_IP)) + snat_save_header(skb); + + if (skb->protocol == htons(ETH_P_IP) && + skb->len > skb->dev->mtu && + !skb_is_gso(skb)) { + err = ip_fragment(skb, dp_xmit_skb_finish); + } else { + err = dp_xmit_skb_finish(skb); + } + if (err) + return err; + + return len; +} +#else +int dp_xmit_skb(struct sk_buff *skb) +{ + struct datapath *dp = skb->dev->br_port->dp; + int len = skb->len; + + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { + printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", + dp->netdev->name, packet_length(skb), skb->dev->mtu); + kfree_skb(skb); + return -E2BIG; + } + + dev_queue_xmit(skb); + + return len; +} +#endif + +static void +do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct net_bridge_port *p; + + if (!skb) + goto error; + + if (out_port == ODPP_LOCAL) { + struct net_device *dev = dp->netdev; + if (!dev) + goto error; +#ifdef SUPPORT_SNAT + snat_local_in(skb); +#endif + dp_dev_recv(dev, skb); + return; + } + + p = dp->ports[out_port]; + if (!p) + goto error; + + skb->dev = p->dev; + dp_xmit_skb(skb); + return; + +error: + kfree_skb(skb); +} + +static int output_group(struct datapath *dp, __u16 group, + struct sk_buff *skb, gfp_t gfp) +{ + struct odp_port_group *g = rcu_dereference(dp->groups[group]); + int prev_port = -1; + int i; + + if (!g) + return -EINVAL; + for (i = 0; i < g->n_ports; i++) { + struct net_bridge_port *p = dp->ports[g->ports[i]]; + if (!p || skb->dev == p->dev) + continue; + if (prev_port != -1) { + struct sk_buff *clone = skb_clone(skb, gfp); + if (!clone) { + kfree_skb(skb); + return -1; + } + do_output(dp, clone, prev_port); + } + prev_port = p->port_no; + } + return prev_port; +} + +static int +output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) +{ + skb = skb_clone(skb, gfp); + if (!skb) + return -ENOMEM; + return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg); +} + +/* Execute a list of actions against 'skb'. */ +int execute_actions(struct datapath *dp, struct sk_buff *skb, + struct odp_flow_key *key, + const struct sw_flow_actions *actions, + gfp_t gfp) +{ + /* Every output action needs a separate clone of 'skb', but the common + * case is just a single output action, so that doing a clone and + * then freeing the original skbuff is wasteful. So the following code + * is slightly obscure just to avoid that. */ + int prev_port = -1; + unsigned int i; + int err = 0; + for (i = 0; i < actions->n_actions; i++) { + const union odp_action *a = &actions->actions[i]; + WARN_ON_ONCE(skb_shared(skb)); + if (prev_port != -1) { + do_output(dp, skb_clone(skb, gfp), prev_port); + prev_port = -1; + } + + switch (a->type) { + case ODPAT_OUTPUT: + prev_port = a->output.port; + break; + + case ODPAT_OUTPUT_GROUP: + prev_port = output_group(dp, a->output_group.group, + skb, gfp); + break; + + case ODPAT_CONTROLLER: + err = output_control(dp, skb, a->controller.arg, gfp); + if (err) { + kfree_skb(skb); + return err; + } + break; + + case ODPAT_SET_VLAN_VID: + skb = set_vlan_vid(skb, key, &a->vlan_vid, gfp); + break; + + case ODPAT_SET_VLAN_PCP: + skb = set_vlan_pcp(skb, key, &a->vlan_pcp, gfp); + break; + + case ODPAT_STRIP_VLAN: + skb = strip_vlan(skb, key, gfp); + break; + + case ODPAT_SET_DL_SRC: + case ODPAT_SET_DL_DST: + skb = set_dl_addr(skb, &a->dl_addr, gfp); + break; + + case ODPAT_SET_NW_SRC: + case ODPAT_SET_NW_DST: + skb = set_nw_addr(skb, key, &a->nw_addr, gfp); + break; + + case ODPAT_SET_TP_SRC: + case ODPAT_SET_TP_DST: + skb = set_tp_port(skb, key, &a->tp_port, gfp); + break; + +#ifdef SUPPORT_SNAT + case ODPAT_SNAT: + snat_skb(dp, skb, a->snat.port, gfp); + break; +#endif + } + if (!skb) + return -ENOMEM; + } + if (prev_port != -1) + do_output(dp, skb, prev_port); + else + kfree_skb(skb); + return err; +} diff --git a/datapath/actions.h b/datapath/actions.h new file mode 100644 index 00000000..981e0eac --- /dev/null +++ b/datapath/actions.h @@ -0,0 +1,17 @@ +#ifndef ACTIONS_H +#define ACTIONS_H 1 + +#include + +struct datapath; +struct sk_buff; +struct odp_flow_key; +struct sw_flow_actions; + +struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp); +int dp_xmit_skb(struct sk_buff *); +int execute_actions(struct datapath *dp, struct sk_buff *skb, + struct odp_flow_key *key, + const struct sw_flow_actions *, gfp_t gfp); + +#endif /* actions.h */ diff --git a/datapath/brcompat.c b/datapath/brcompat.c index 5b86a70b..b64a2b8b 100644 --- a/datapath/brcompat.c +++ b/datapath/brcompat.c @@ -39,8 +39,8 @@ get_dp_ifindices(int *indices, int num) int i, index = 0; rcu_read_lock(); - for (i=0; i < DP_MAX && index < num; i++) { - struct datapath *dp = dp_get_by_idx(i); + for (i=0; i < ODP_MAX && index < num; i++) { + struct datapath *dp = get_dp(i); if (!dp) continue; indices[index++] = dp->netdev->ifindex; @@ -167,13 +167,13 @@ old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_GET_BRIDGE_INFO: { struct __bridge_info b; - uint64_t id = 0; + u64 id = 0; int i; memset(&b, 0, sizeof(struct __bridge_info)); for (i=0; idev_addr[i] << (8*(ETH_ALEN-1 - i)); + id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i)); b.bridge_id = cpu_to_be64(id); b.stp_enabled = 0; @@ -191,8 +191,8 @@ old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EINVAL; if (num == 0) num = 256; - if (num > OFPP_MAX) - num = OFPP_MAX; + if (num > DP_MAX_PORTS) + num = DP_MAX_PORTS; indices = kcalloc(num, sizeof(int), GFP_KERNEL); if (indices == NULL) @@ -425,8 +425,8 @@ __init brc_init(void) printk("OpenFlow Bridge Compatibility, built "__DATE__" "__TIME__"\n"); rcu_read_lock(); - for (i=0; i -#include -#include -#include - -static struct sw_table *(*create_hw_table_hook)(void); -static struct module *hw_table_owner; -static DEFINE_SPINLOCK(hook_lock); - -/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or - * negative error. If 'table' is null it is assumed that table creation failed - * due to out-of-memory. */ -static int add_table(struct sw_chain *chain, struct sw_table *table) -{ - if (table == NULL) - return -ENOMEM; - if (chain->n_tables >= CHAIN_MAX_TABLES) { - printk(KERN_EMERG "%s: too many tables in chain\n", - chain->dp->netdev->name); - table->destroy(table); - return -ENOBUFS; - } - chain->tables[chain->n_tables++] = table; - return 0; -} - -/* Creates and returns a new chain associated with 'dp'. Returns NULL if the - * chain cannot be created. */ -struct sw_chain *chain_create(struct datapath *dp) -{ - struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL); - if (chain == NULL) - goto error; - chain->dp = dp; - chain->owner = try_module_get(hw_table_owner) ? hw_table_owner : NULL; - if (chain->owner && create_hw_table_hook) { - struct sw_table *hwtable = create_hw_table_hook(); - if (!hwtable || add_table(chain, hwtable)) - goto error; - } - - if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, - 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) - || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) - goto error; - return chain; - -error: - if (chain) - chain_destroy(chain); - return NULL; -} - -/* Searches 'chain' for a flow matching 'key', which must not have any wildcard - * fields. Returns the flow if successful, otherwise a null pointer. - * - * Caller must hold rcu_read_lock or dp_mutex. */ -struct sw_flow *chain_lookup(struct sw_chain *chain, - const struct sw_flow_key *key) -{ - int i; - - BUG_ON(key->wildcards); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - struct sw_flow *flow = t->lookup(t, key); - t->n_lookup++; - if (flow) { - t->n_matched++; - return flow; - } - } - return NULL; -} - -/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if - * successful or a negative error. - * - * If successful, 'flow' becomes owned by the chain, otherwise it is retained - * by the caller. - * - * Caller must hold dp_mutex. */ -int chain_insert(struct sw_chain *chain, struct sw_flow *flow) -{ - int i; - - might_sleep(); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - if (t->insert(t, flow)) - return 0; - } - - return -ENOBUFS; -} - -/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards - * and priority must match. Returns the number of flows that were modified. - * - * Expensive in the general case as currently implemented, since it requires - * iterating through the entire contents of each table for keys that contain - * wildcards. Relatively cheap for fully specified keys. */ -int -chain_modify(struct sw_chain *chain, const struct sw_flow_key *key, - uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - int count = 0; - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - count += t->modify(t, key, priority, strict, actions, actions_len); - } - - return count; -} - -/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port' - * is not OFPP_NONE, then matching entries must have that port as an - * argument for an output action. If 'strict" is set, then wildcards and - * priority must match. Returns the number of flows that were deleted. - * - * Expensive in the general case as currently implemented, since it requires - * iterating through the entire contents of each table for keys that contain - * wildcards. Relatively cheap for fully specified keys. - * - * Caller must hold dp_mutex. */ -int chain_delete(struct sw_chain *chain, const struct sw_flow_key *key, - uint16_t out_port, uint16_t priority, int strict) -{ - int count = 0; - int i; - - might_sleep(); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - count += t->delete(chain->dp, t, key, out_port, priority, strict); - } - - return count; -} - -/* Performs timeout processing on all the tables in 'chain'. Returns the - * number of flow entries deleted through expiration. - * - * Expensive as currently implemented, since it iterates through the entire - * contents of each table. - * - * Caller must not hold dp_mutex, because individual tables take and release it - * as necessary. */ -int chain_timeout(struct sw_chain *chain) -{ - int count = 0; - int i; - - might_sleep(); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - count += t->timeout(chain->dp, t); - } - return count; -} - -/* Destroys 'chain', which must not have any users. */ -void chain_destroy(struct sw_chain *chain) -{ - int i; - - synchronize_rcu(); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - if (t->destroy) - t->destroy(t); - } - module_put(chain->owner); - kfree(chain); -} - -int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void), - struct module *owner) -{ - int retval = -EBUSY; - - spin_lock(&hook_lock); - if (!create_hw_table_hook) { - create_hw_table_hook = create_hw_table; - hw_table_owner = owner; - retval = 0; - } - spin_unlock(&hook_lock); - - return retval; -} -EXPORT_SYMBOL(chain_set_hw_hook); - -void chain_clear_hw_hook(void) -{ - create_hw_table_hook = NULL; - hw_table_owner = NULL; -} -EXPORT_SYMBOL(chain_clear_hw_hook); diff --git a/datapath/chain.h b/datapath/chain.h deleted file mode 100644 index a78a9385..00000000 --- a/datapath/chain.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef CHAIN_H -#define CHAIN_H 1 - -#include - -struct sw_flow; -struct sw_flow_key; -struct ofp_action_header; -struct datapath; - - -#define TABLE_LINEAR_MAX_FLOWS 100 -#define TABLE_HASH_MAX_FLOWS 65536 - -/* Set of tables chained together in sequence from cheap to expensive. */ -#define CHAIN_MAX_TABLES 4 -struct sw_chain { - int n_tables; - struct sw_table *tables[CHAIN_MAX_TABLES]; - - struct datapath *dp; - struct module *owner; -}; - -struct sw_chain *chain_create(struct datapath *); -struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *); -int chain_insert(struct sw_chain *, struct sw_flow *); -int chain_modify(struct sw_chain *, const struct sw_flow_key *, - uint16_t, int, const struct ofp_action_header *, size_t); -int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t, - uint16_t, int); -int chain_timeout(struct sw_chain *); -void chain_destroy(struct sw_chain *); - -int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void), - struct module *owner); -void chain_clear_hw_hook(void); - -#endif /* chain.h */ diff --git a/datapath/crc32.c b/datapath/crc32.c deleted file mode 100644 index 4027ee26..00000000 --- a/datapath/crc32.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008 The Board of Trustees of The Leland - * Stanford Junior University - */ - -#include "crc32.h" - -void crc32_init(struct crc32 *crc, unsigned int polynomial) -{ - int i; - - for (i = 0; i < CRC32_TABLE_SIZE; ++i) { - unsigned int reg = i << 24; - int j; - for (j = 0; j < CRC32_TABLE_BITS; j++) { - int topBit = (reg & 0x80000000) != 0; - reg <<= 1; - if (topBit) - reg ^= polynomial; - } - crc->table[i] = reg; - } -} - -unsigned int crc32_calculate(const struct crc32 *crc, - const void *data_, size_t n_bytes) -{ - // FIXME: this can be optimized by unrolling, see linux-2.6/lib/crc32.c. - const uint8_t *data = data_; - unsigned int result = 0; - size_t i; - - for (i = 0; i < n_bytes; i++) { - unsigned int top = result >> 24; - top ^= data[i]; - result = (result << 8) ^ crc->table[top]; - } - return result; -} diff --git a/datapath/crc32.h b/datapath/crc32.h deleted file mode 100644 index 21a350a9..00000000 --- a/datapath/crc32.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CRC32_H -#define CRC32_H 1 - -#include -#ifndef __KERNEL__ -#include -#endif -#include - -#define CRC32_TABLE_BITS 8 -#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS) - -struct crc32 { - unsigned int table[CRC32_TABLE_SIZE]; -}; - -void crc32_init(struct crc32 *, unsigned int polynomial); -unsigned int crc32_calculate(const struct crc32 *, - const void *data_, size_t n_bytes); - - -#endif /* crc32.h */ diff --git a/datapath/datapath.c b/datapath/datapath.c index 2e1ba6ab..b034e17c 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -8,11 +8,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -20,11 +20,12 @@ #include #include #include -#include +#include #include #include #include #include +#include #include #include #include @@ -35,39 +36,16 @@ #include #include -#include "openflow/nicira-ext.h" -#include "openflow/openflow-netlink.h" +#include "openflow/datapath-protocol.h" #include "datapath.h" -#include "nx_act_snat.h" -#include "table.h" -#include "chain.h" +#include "snat.h" +#include "actions.h" #include "dp_dev.h" -#include "forward.h" #include "flow.h" #include "compat.h" -/* Strings to describe the manufacturer, hardware, and software. This data - * is queriable through the switch description stats message. */ -static char mfr_desc[DESC_STR_LEN] = "Nicira Networks, Inc."; -static char hw_desc[DESC_STR_LEN] = "Reference Linux Kernel Module"; -static char sw_desc[DESC_STR_LEN] = VERSION BUILDNR; -static char serial_num[SERIAL_NUM_LEN] = "None"; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -module_param_string(mfr_desc, mfr_desc, sizeof mfr_desc, 0444); -module_param_string(hw_desc, hw_desc, sizeof hw_desc, 0444); -module_param_string(sw_desc, sw_desc, sizeof sw_desc, 0444); -module_param_string(serial_num, serial_num, sizeof serial_num, 0444); -#else -MODULE_PARM(mfr_desc, "s"); -MODULE_PARM(hw_desc, "s"); -MODULE_PARM(sw_desc, "s"); -MODULE_PARM(serial_num, "s"); -#endif - - int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); EXPORT_SYMBOL(dp_ioctl_hook); @@ -83,33 +61,6 @@ EXPORT_SYMBOL(dp_add_if_hook); int (*dp_del_if_hook)(struct net_bridge_port *p); EXPORT_SYMBOL(dp_del_if_hook); -/* Number of milliseconds between runs of the maintenance thread. */ -#define MAINT_SLEEP_MSECS 1000 - -#define UINT32_MAX 4294967295U -#define UINT16_MAX 65535 -#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) - -static struct genl_family dp_genl_family; - -/* - * Datapath multicast groups. - * - * Really we want one multicast group per in-use datapath (or even more than - * one). Locking issues, however, mean that we can't allocate a multicast - * group at the point in the code where we we actually create a datapath[*], so - * we have to pre-allocate them. It's massive overkill to allocate DP_MAX of - * them in advance, since we will hardly ever actually create DP_MAX datapaths, - * so instead we allocate a few multicast groups at startup and choose one for - * each datapath by hashing its datapath index. - * - * [*] dp_genl_add, to add a new datapath, is called under the genl_lock - * mutex, and genl_register_mc_group, called to acquire a new multicast - * group ID, also acquires genl_lock, thus deadlock. - */ -#define N_MC_GROUPS 16 /* Must be power of 2. */ -static struct genl_multicast_group mc_groups[N_MC_GROUPS]; - /* Datapaths. Protected on the read side by rcu_read_lock, on the write side * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex * maintained by the Generic Netlink code, but the timeout path needs mutual @@ -121,233 +72,178 @@ static struct genl_multicast_group mc_groups[N_MC_GROUPS]; * It is safe to access the datapath and net_bridge_port structures with just * dp_mutex. */ -static struct datapath *dps[DP_MAX]; +static struct datapath *dps[ODP_MAX]; DEFINE_MUTEX(dp_mutex); EXPORT_SYMBOL(dp_mutex); +/* Number of milliseconds between runs of the maintenance thread. */ +#define MAINT_SLEEP_MSECS 1000 + +#ifdef SUPPORT_SNAT static int dp_maint_func(void *data); -static void init_port_status(struct net_bridge_port *p); -static int dp_genl_openflow_done(struct netlink_callback *); -static struct net_bridge_port *new_nbp(struct datapath *, - struct net_device *, int port_no); - -/* nla_shrink - reduce amount of space reserved by nla_reserve - * @skb: socket buffer from which to recover room - * @nla: netlink attribute to adjust - * @len: new length of attribute payload - * - * Reduces amount of space reserved by a call to nla_reserve. - * - * No other attributes may be added between calling nla_reserve and this - * function, since it will create a hole in the message. - */ -void nla_shrink(struct sk_buff *skb, struct nlattr *nla, int len) +#endif +static int new_nbp(struct datapath *, struct net_device *, int port_no); + +/* Must be called with rcu_read_lock or dp_mutex. */ +struct datapath *get_dp(int dp_idx) { - int delta = nla_total_size(len) - nla_total_size(nla_len(nla)); - BUG_ON(delta > 0); - skb->tail += delta; - skb->len += delta; - nla->nla_len = nla_attr_size(len); + if (dp_idx < 0 || dp_idx >= ODP_MAX) + return NULL; + return rcu_dereference(dps[dp_idx]); } +EXPORT_SYMBOL_GPL(get_dp); -/* Puts a set of openflow headers for a message of the given 'type' into 'skb'. - * If 'sender' is nonnull, then it is used as the message's destination. 'dp' - * must specify the datapath to use. - * - * '*max_openflow_len' receives the maximum number of bytes that are available - * for the embedded OpenFlow message. The caller must call - * resize_openflow_skb() to set the actual size of the message to this number - * of bytes or less. - * - * Returns the openflow header if successful, otherwise (if 'skb' is too small) - * an error code. */ -static void * -put_openflow_headers(struct datapath *dp, struct sk_buff *skb, uint8_t type, - const struct sender *sender, int *max_openflow_len) +struct datapath *get_dp_locked(int dp_idx) { - struct ofp_header *oh; - struct nlattr *attr; - int openflow_len; - - /* Assemble the Generic Netlink wrapper. */ - if (!genlmsg_put(skb, - sender ? sender->pid : 0, - sender ? sender->seq : 0, - &dp_genl_family, 0, DP_GENL_C_OPENFLOW)) - return ERR_PTR(-ENOBUFS); - if (nla_put_u32(skb, DP_GENL_A_DP_IDX, dp->dp_idx) < 0) - return ERR_PTR(-ENOBUFS); - openflow_len = (skb_tailroom(skb) - NLA_HDRLEN) & ~(NLA_ALIGNTO - 1); - if (openflow_len < sizeof *oh) - return ERR_PTR(-ENOBUFS); - *max_openflow_len = openflow_len; - attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, openflow_len); - BUG_ON(!attr); - - /* Fill in the header. The caller is responsible for the length. */ - oh = nla_data(attr); - oh->version = OFP_VERSION; - oh->type = type; - oh->xid = sender ? sender->xid : 0; - - return oh; + struct datapath *dp; + + mutex_lock(&dp_mutex); + dp = get_dp(dp_idx); + if (dp) + mutex_lock(&dp->mutex); + mutex_unlock(&dp_mutex); + return dp; } -/* Resizes OpenFlow header 'oh', which must be at the tail end of 'skb', to new - * length 'new_length' (in bytes), adjusting pointers and size values as - * necessary. */ -static void -resize_openflow_skb(struct sk_buff *skb, - struct ofp_header *oh, size_t new_length) +static inline size_t br_nlmsg_size(void) { - struct nlattr *attr = ((void *) oh) - NLA_HDRLEN; - nla_shrink(skb, attr, new_length); - oh->length = htons(new_length); - nlmsg_end(skb, (struct nlmsghdr *) skb->data); + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1); /* IFLA_OPERSTATE */ } -/* Allocates a new skb to contain an OpenFlow message 'openflow_len' bytes in - * length. Returns a null pointer if memory is unavailable, otherwise returns - * the OpenFlow header and stores a pointer to the skb in '*pskb'. - * - * 'type' is the OpenFlow message type. If 'sender' is nonnull, then it is - * used as the message's destination. 'dp' must specify the datapath to - * use. */ -static void * -alloc_openflow_skb(struct datapath *dp, size_t openflow_len, uint8_t type, - const struct sender *sender, struct sk_buff **pskb) +static int dp_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + int event, unsigned int flags) { - struct ofp_header *oh; - size_t genl_len; - struct sk_buff *skb; - int max_openflow_len; - - if ((openflow_len + sizeof(struct ofp_header)) > UINT16_MAX) { - if (net_ratelimit()) - printk(KERN_ERR "%s: alloc_openflow_skb: openflow " - "message too large: %zu\n", - dp->netdev->name, openflow_len); - return NULL; - } - - genl_len = nlmsg_total_size(GENL_HDRLEN + dp_genl_family.hdrsize); - genl_len += nla_total_size(sizeof(uint32_t)); /* DP_GENL_A_DP_IDX */ - genl_len += nla_total_size(openflow_len); /* DP_GENL_A_OPENFLOW */ - skb = *pskb = genlmsg_new(genl_len, GFP_ATOMIC); - if (!skb) { - return NULL; - } + const struct datapath *dp = port->dp; + const struct net_device *dev = port->dev; + struct ifinfomsg *hdr; + struct nlmsghdr *nlh; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; - oh = put_openflow_headers(dp, skb, type, sender, &max_openflow_len); - BUG_ON(!oh || IS_ERR(oh)); - resize_openflow_skb(skb, oh, openflow_len); + nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -EMSGSIZE; - return oh; -} + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_BRIDGE; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; -/* Returns the ID of the multicast group used by datapath 'dp'. */ -static u32 -dp_mc_group(const struct datapath *dp) -{ - return mc_groups[dp->dp_idx & (N_MC_GROUPS - 1)].id; -} + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + NLA_PUT_U32(skb, IFLA_MASTER, dp->netdev->ifindex); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_U8(skb, IFLA_OPERSTATE, operstate); -/* Sends 'skb' to 'sender' if it is nonnull, otherwise multicasts 'skb' to all - * listeners. */ -static int -send_openflow_skb(const struct datapath *dp, - struct sk_buff *skb, const struct sender *sender) -{ - return (sender - ? genlmsg_unicast(skb, sender->pid) - : genlmsg_multicast(skb, 0, dp_mc_group(dp), GFP_ATOMIC)); -} + if (dev->addr_len) + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); -/* Retrieves the datapath id, which is the MAC address of the "of" device. */ -static -uint64_t get_datapath_id(struct net_device *dev) -{ - uint64_t id = 0; - int i; + if (dev->ifindex != dev->iflink) + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - for (i=0; idev_addr[i] << (8*(ETH_ALEN-1 - i)); + return nlmsg_end(skb, nlh); - return id; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } -/* Find the first free datapath index. Return the index or -1 if a free - * index could not be found. */ -int gen_dp_idx(void) +static void dp_ifinfo_notify(int event, struct net_bridge_port *port) { - int i; + struct net *net = dev_net(port->dev); + struct sk_buff *skb; + int err = -ENOBUFS; - for (i=0; i= DP_MAX) - goto err_unlock; - err = -ENODEV; if (!try_module_get(THIS_MODULE)) goto err_unlock; /* Exit early if a datapath with that number already exists. */ err = -EEXIST; - if (dps[dp_idx]) - goto err_put; + if (get_dp(dp_idx)) + goto err_put_module; err = -ENOMEM; dp = kzalloc(sizeof *dp, GFP_KERNEL); if (dp == NULL) - goto err_put; + goto err_put_module; + mutex_init(&dp->mutex); dp->dp_idx = dp_idx; + for (i = 0; i < DP_N_QUEUES; i++) + skb_queue_head_init(&dp->queues[i]); + init_waitqueue_head(&dp->waitqueue); /* Setup our datapath device */ - err = dp_dev_setup(dp, dp_name); + err = dp_dev_setup(dp, devname); if (err) goto err_free_dp; - dp->chain = chain_create(dp); - if (dp->chain == NULL) + err = -ENOMEM; + rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE)); + if (!dp->table) goto err_destroy_dp_dev; INIT_LIST_HEAD(&dp->port_list); - dp->local_port = new_nbp(dp, dp->netdev, OFPP_LOCAL); - if (IS_ERR(dp->local_port)) { - err = PTR_ERR(dp->local_port); - goto err_destroy_local_port; - } + err = new_nbp(dp, dp->netdev, ODPP_LOCAL); + if (err) + goto err_destroy_table; - dp->flags = 0; - dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + dp->drop_frags = 0; + dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); + if (!dp->stats_percpu) + goto err_destroy_local_port; +#ifdef SUPPORT_SNAT dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx); if (IS_ERR(dp->dp_task)) - goto err_destroy_chain; + goto err_destroy_stats_percpu; +#endif - dps[dp_idx] = dp; + rcu_assign_pointer(dps[dp_idx], dp); mutex_unlock(&dp_mutex); rtnl_unlock(); @@ -356,123 +252,204 @@ static int new_dp(int dp_idx, const char *dp_name) return 0; +#ifdef SUPPORT_SNAT +err_destroy_stats_percpu: + free_percpu(dp->stats_percpu); +#endif err_destroy_local_port: - dp_del_switch_port(dp->local_port); -err_destroy_chain: - chain_destroy(dp->chain); + dp_del_port(dp->ports[ODPP_LOCAL]); +err_destroy_table: + dp_table_destroy(dp->table, 0); err_destroy_dp_dev: dp_dev_destroy(dp); err_free_dp: kfree(dp); -err_put: +err_put_module: module_put(THIS_MODULE); err_unlock: mutex_unlock(&dp_mutex); rtnl_unlock(); +err: return err; } -/* Find and return a free port number under 'dp'. */ -static int find_portno(struct datapath *dp) +static void do_destroy_dp(struct datapath *dp) { + struct net_bridge_port *p, *n; int i; - for (i = 0; i < DP_MAX_PORTS; i++) - if (dp->ports[i] == NULL) - return i; - return -EXFULL; + +#ifdef SUPPORT_SNAT + send_sig(SIGKILL, dp->dp_task, 0); + kthread_stop(dp->dp_task); +#endif + + /* Drop references to DP. */ + list_for_each_entry_safe (p, n, &dp->port_list, node) + dp_del_port(p); + + if (dp_del_dp_hook) + dp_del_dp_hook(dp); + + rcu_assign_pointer(dps[dp->dp_idx], NULL); + synchronize_rcu(); + + /* Destroy dp->netdev. (Must follow deleting switch ports since the + * ODPP_LOCAL port has a reference to it.) */ + dp_dev_destroy(dp); + + /* Wait until no longer in use, then destroy it. */ + synchronize_rcu(); + dp_table_destroy(dp->table, 1); + for (i = 0; i < DP_N_QUEUES; i++) + skb_queue_purge(&dp->queues[i]); + free_percpu(dp->stats_percpu); + kfree(dp); + module_put(THIS_MODULE); +} + +static int destroy_dp(int dp_idx) +{ + struct net_device *dev = NULL; + struct datapath *dp; + int err; + + rtnl_lock(); + mutex_lock(&dp_mutex); + dp = get_dp(dp_idx); + err = -ENODEV; + if (!dp) + goto err_unlock; + + dev = dp->netdev; + do_destroy_dp(dp); + err = 0; + +err_unlock: + mutex_unlock(&dp_mutex); + rtnl_unlock(); + if (dev) + free_netdev(dev); + return err; } /* Called with RTNL lock and dp_mutex. */ -static struct net_bridge_port *new_nbp(struct datapath *dp, - struct net_device *dev, int port_no) +static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) { struct net_bridge_port *p; if (dev->br_port != NULL) - return ERR_PTR(-EBUSY); + return -EBUSY; p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); + if (!p) + return -ENOMEM; dev_set_promiscuity(dev, 1); dev_hold(dev); - p->dp = dp; - p->dev = dev; p->port_no = port_no; spin_lock_init(&p->lock); - if (port_no != OFPP_LOCAL) + p->dp = dp; + p->dev = dev; + if (port_no != ODPP_LOCAL) rcu_assign_pointer(dev->br_port, p); - if (port_no < DP_MAX_PORTS) - rcu_assign_pointer(dp->ports[port_no], p); + rcu_assign_pointer(dp->ports[port_no], p); list_add_rcu(&p->node, &dp->port_list); + dp->n_ports++; - return p; + dp_ifinfo_notify(RTM_NEWLINK, p); + + return 0; } -/* Called with RTNL lock and dp_mutex. */ -int add_switch_port(struct datapath *dp, struct net_device *dev) +static int add_port(int dp_idx, struct odp_port __user *portp) { - struct net_bridge_port *p; + struct net_device *dev; + struct datapath *dp; + struct odp_port port; int port_no; + int err; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER - || is_dp_dev(dev)) - return -EINVAL; + err = -EFAULT; + if (copy_from_user(&port, portp, sizeof port)) + goto out; + port.devname[IFNAMSIZ - 1] = '\0'; + port_no = port.port; + + err = -EINVAL; + if (port_no < 0 || port_no >= DP_MAX_PORTS) + goto out; + + rtnl_lock(); + dp = get_dp_locked(dp_idx); + err = -ENODEV; + if (!dp) + goto out_unlock_rtnl; + + err = -ENODEV; + dev = dev_get_by_name(&init_net, port.devname); + if (!dev) + goto out_unlock_dp; - port_no = find_portno(dp); - if (port_no < 0) - return port_no; + err = -EINVAL; + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER || + is_dp_dev(dev)) + goto out_put; - p = new_nbp(dp, dev, port_no); - if (IS_ERR(p)) - return PTR_ERR(p); + err = -EEXIST; + if (dp->ports[port_no]) + goto out_put; - init_port_status(p); + err = new_nbp(dp, dev, port_no); + if (err) + goto out_put; if (dp_add_if_hook) - dp_add_if_hook(p); + dp_add_if_hook(dp->ports[port_no]); - /* Notify the ctlpath that this port has been added */ - dp_send_port_status(p, OFPPR_ADD); - - return 0; +out_put: + dev_put(dev); +out_unlock_dp: + mutex_unlock(&dp->mutex); +out_unlock_rtnl: + rtnl_unlock(); +out: + return err; } -/* Delete 'p' from switch. - * Called with RTNL lock and dp_mutex. */ -int dp_del_switch_port(struct net_bridge_port *p) +/* Free any SNAT configuration on the port. */ +static void free_snat(struct net_bridge_port *p) { #ifdef SUPPORT_SNAT - unsigned long flags; -#endif + unsigned long int flags; + spin_lock_irqsave(&p->lock, flags); + snat_free_conf(p); + spin_unlock_irqrestore(&p->lock, flags); +#endif /* !SUPPORT_SNAT */ +} + +int dp_del_port(struct net_bridge_port *p) +{ + ASSERT_RTNL(); #if CONFIG_SYSFS - if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook) + if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook) sysfs_remove_link(&p->dp->ifobj, p->dev->name); #endif + dp_ifinfo_notify(RTM_DELLINK, p); + + p->dp->n_ports--; /* First drop references to device. */ dev_set_promiscuity(p->dev, -1); list_del_rcu(&p->node); - if (p->port_no != OFPP_LOCAL) - rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + rcu_assign_pointer(p->dp->ports[p->port_no], NULL); rcu_assign_pointer(p->dev->br_port, NULL); /* Then wait until no one is still using it, and destroy it. */ synchronize_rcu(); + free_snat(p); -#ifdef SUPPORT_SNAT - /* Free any SNAT configuration on the port. */ - spin_lock_irqsave(&p->lock, flags); - snat_free_conf(p); - spin_unlock_irqrestore(&p->lock, flags); -#endif - - /* Notify the ctlpath that this port no longer exists */ - dp_send_port_status(p, OFPPR_DELETE); - - if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook) { + if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook) { dp_del_if_hook(p); } else { dev_put(p->dev); @@ -482,45 +459,44 @@ int dp_del_switch_port(struct net_bridge_port *p) return 0; } -static void del_dp(struct datapath *dp) +static int del_port(int dp_idx, int port_no) { - struct net_bridge_port *p, *n; - - send_sig(SIGKILL, dp->dp_task, 0); - kthread_stop(dp->dp_task); - - /* Drop references to DP. */ - list_for_each_entry_safe (p, n, &dp->port_list, node) - dp_del_switch_port(p); + struct net_bridge_port *p; + struct datapath *dp; + int err; - if (dp_del_dp_hook) - dp_del_dp_hook(dp); + err = -EINVAL; + if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL) + goto out; - rcu_assign_pointer(dps[dp->dp_idx], NULL); + rtnl_lock(); + dp = get_dp_locked(dp_idx); + err = -ENODEV; + if (!dp) + goto out_unlock_rtnl; - /* Kill off local_port dev references from buffered packets that have - * associated dst entries. */ - synchronize_rcu(); - fwd_discard_all(); + p = dp->ports[port_no]; + err = -ENOENT; + if (!p) + goto out_unlock_dp; - /* Destroy dp->netdev. (Must follow deleting switch ports since - * dp->local_port has a reference to it.) */ - dp_dev_destroy(dp); + err = dp_del_port(p); - /* Wait until no longer in use, then destroy it. */ - synchronize_rcu(); - chain_destroy(dp->chain); - kfree(dp); - module_put(THIS_MODULE); +out_unlock_dp: + mutex_unlock(&dp->mutex); +out_unlock_rtnl: + rtnl_unlock(); +out: + return err; } +#ifdef SUPPORT_SNAT static int dp_maint_func(void *data) { struct datapath *dp = (struct datapath *) data; allow_signal(SIGKILL); while (!signal_pending(current)) { -#ifdef SUPPORT_SNAT struct net_bridge_port *p; /* Expire old SNAT entries */ @@ -528,10 +504,6 @@ static int dp_maint_func(void *data) list_for_each_entry_rcu (p, &dp->port_list, node) snat_maint(p); rcu_read_unlock(); -#endif - - /* Timeout old entries */ - chain_timeout(dp->chain); msleep_interruptible(MAINT_SLEEP_MSECS); } while (!kthread_should_stop()) { @@ -540,6 +512,7 @@ static int dp_maint_func(void *data) } return 0; } +#endif static void do_port_input(struct net_bridge_port *p, struct sk_buff *skb) @@ -561,7 +534,40 @@ do_port_input(struct net_bridge_port *p, struct sk_buff *skb) /* Push the Ethernet header back on. */ skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); - fwd_port_input(p->dp->chain, skb, p); + dp_process_received_packet(skb, p); +} + +void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p) +{ + struct datapath *dp = p->dp; + struct dp_stats_percpu *stats; + struct odp_flow_key key; + struct sw_flow *flow; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(skb->destructor); + + /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */ + stats = percpu_ptr(dp->stats_percpu, smp_processor_id()); + + if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) { + if (dp->drop_frags) { + kfree_skb(skb); + stats->n_frags++; + return; + } + } + + flow = dp_table_lookup(rcu_dereference(dp->table), &key); + if (flow) { + flow_used(flow, skb); + execute_actions(dp, skb, &key, rcu_dereference(flow->sf_acts), + GFP_ATOMIC); + stats->n_hit++; + } else { + stats->n_missed++; + dp_output_control(dp, skb, _ODPL_MISS_NR, 0); + } } /* @@ -596,276 +602,71 @@ static void dp_frame_hook(struct sk_buff *skb) } #endif -/* Forwarding output path. - * Based on net/bridge/br_forward.c. */ - -static inline unsigned packet_length(const struct sk_buff *skb) +#ifdef CONFIG_XEN +/* This code is copied verbatim from net/dev/core.c in Xen's + * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions + * directly because they aren't exported. */ +static int skb_pull_up_to(struct sk_buff *skb, void *ptr) { - unsigned length = skb->len - ETH_HLEN; - if (skb->protocol == htons(ETH_P_8021Q)) - length -= VLAN_HLEN; - return length; + if (ptr < (void *)skb->tail) + return 1; + if (__pskb_pull_tail(skb, + ptr - (void *)skb->data - skb_headlen(skb))) { + return 1; + } else { + return 0; + } } -/* Send packets out all the ports except the originating one. If the - * "flood" argument is set, only send along the minimum spanning tree. - */ -static int -output_all(struct datapath *dp, struct sk_buff *skb, int flood) +inline int skb_checksum_setup(struct sk_buff *skb) { - u32 disable = flood ? OFPPC_NO_FLOOD : 0; - struct net_bridge_port *p; - int prev_port = -1; - - list_for_each_entry_rcu (p, &dp->port_list, node) { - if (skb->dev == p->dev || p->config & disable) - continue; - if (prev_port != -1) { - struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - kfree_skb(skb); - return -ENOMEM; - } - dp_output_port(dp, clone, prev_port, 0); + if (skb->proto_csum_blank) { + if (skb->protocol != htons(ETH_P_IP)) + goto out; + if (!skb_pull_up_to(skb, skb->nh.iph + 1)) + goto out; + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + skb->csum = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + skb->csum = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", skb->nh.iph->protocol); + goto out; } - prev_port = p->port_no; + if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2)) + goto out; + skb->ip_summed = CHECKSUM_HW; + skb->proto_csum_blank = 0; } - if (prev_port != -1) - dp_output_port(dp, skb, prev_port, 0); - else - kfree_skb(skb); - return 0; -} - -/* Marks 'skb' as having originated from 'in_port' in 'dp'. - FIXME: how are devices reference counted? */ -void dp_set_origin(struct datapath *dp, uint16_t in_port, - struct sk_buff *skb) -{ - struct net_bridge_port *p; - p = (in_port < DP_MAX_PORTS ? dp->ports[in_port] - : in_port == OFPP_LOCAL ? dp->local_port - : NULL); - if (p) - skb->dev = p->dev; - else - skb->dev = NULL; -} - -#ifdef SUPPORT_SNAT -static int -dp_xmit_skb_finish(struct sk_buff *skb) -{ - /* Copy back the Ethernet header that was stowed earlier. */ - if (skb->protocol == htons(ETH_P_IP) && snat_copy_header(skb)) { - kfree_skb(skb); - return -EINVAL; - } - skb_reset_mac_header(skb); - - if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { - printk("dropped over-mtu packet: %d > %d\n", - packet_length(skb), skb->dev->mtu); - kfree_skb(skb); - return -E2BIG; - } - - skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); - - return 0; -} - -int -dp_xmit_skb(struct sk_buff *skb) -{ - int len = skb->len; - int err; - - skb_pull(skb, ETH_HLEN); - - /* The ip_fragment function does not copy the Ethernet header into - * the newly generated frames, so stow the original. */ - if (skb->protocol == htons(ETH_P_IP)) - snat_save_header(skb); - - if (skb->protocol == htons(ETH_P_IP) && - skb->len > skb->dev->mtu && - !skb_is_gso(skb)) { - err = ip_fragment(skb, dp_xmit_skb_finish); - } else { - err = dp_xmit_skb_finish(skb); - } - if (err) - return err; - - return len; -} -#else -int -dp_xmit_skb(struct sk_buff *skb) -{ - struct datapath *dp = skb->dev->br_port->dp; - int len = skb->len; - - if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { - printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", - dp->netdev->name, packet_length(skb), skb->dev->mtu); - kfree_skb(skb); - return -E2BIG; - } - - dev_queue_xmit(skb); - - return len; -} -#endif - -/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'. - */ -int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port, - int ignore_no_fwd) -{ - BUG_ON(!skb); - switch (out_port){ - case OFPP_IN_PORT: - /* Send it out the port it came in on, which is already set in - * the skb. */ - if (!skb->dev) { - if (net_ratelimit()) - printk(KERN_NOTICE "%s: skb device not set " - "forwarding to in_port\n", - dp->netdev->name); - kfree_skb(skb); - return -ESRCH; - } - return dp_xmit_skb(skb); - - case OFPP_TABLE: { - int retval = run_flow_through_tables(dp->chain, skb, - skb->dev->br_port); - if (retval) - kfree_skb(skb); - return retval; - } - - case OFPP_FLOOD: - return output_all(dp, skb, 1); - - case OFPP_ALL: - return output_all(dp, skb, 0); - - case OFPP_CONTROLLER: - return dp_output_control(dp, skb, 0, OFPR_ACTION); - - case OFPP_LOCAL: { - struct net_device *dev = dp->netdev; -#ifdef SUPPORT_SNAT - snat_local_in(skb); -#endif - return dev ? dp_dev_recv(dev, skb) : -ESRCH; - } - - case 0 ... DP_MAX_PORTS - 1: { - struct net_bridge_port *p = dp->ports[out_port]; - if (p == NULL) - goto bad_port; - if (p->dev == skb->dev) { - /* To send to the input port, must use OFPP_IN_PORT */ - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_NOTICE "%s: can't directly " - "forward to input port\n", - dp->netdev->name); - return -EINVAL; - } - if (p->config & OFPPC_NO_FWD && !ignore_no_fwd) { - kfree_skb(skb); - return 0; - } - skb->dev = p->dev; - return dp_xmit_skb(skb); - } - - default: - goto bad_port; - } - -bad_port: - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_NOTICE "%s: can't forward to bad port %d\n", - dp->netdev->name, out_port); - return -ENOENT; -} - -#ifdef CONFIG_XEN -/* This code is copied verbatim from net/dev/core.c in Xen's - * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions - * directly because they aren't exported. */ -static int skb_pull_up_to(struct sk_buff *skb, void *ptr) -{ - if (ptr < (void *)skb->tail) - return 1; - if (__pskb_pull_tail(skb, - ptr - (void *)skb->data - skb_headlen(skb))) { - return 1; - } else { - return 0; - } -} - -inline int skb_checksum_setup(struct sk_buff *skb) -{ - if (skb->proto_csum_blank) { - if (skb->protocol != htons(ETH_P_IP)) - goto out; - if (!skb_pull_up_to(skb, skb->nh.iph + 1)) - goto out; - skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; - switch (skb->nh.iph->protocol) { - case IPPROTO_TCP: - skb->csum = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - skb->csum = offsetof(struct udphdr, check); - break; - default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", skb->nh.iph->protocol); - goto out; - } - if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2)) - goto out; - skb->ip_summed = CHECKSUM_HW; - skb->proto_csum_blank = 0; - } - return 0; -out: - return -EPROTO; +out: + return -EPROTO; } #endif -/* Takes ownership of 'skb' and transmits it to 'dp''s control path. 'reason' - * indicates why 'skb' is being sent. 'max_len' sets the maximum number of - * bytes that the caller wants to be sent; a value of 0 indicates the entire - * packet should be sent. */ int -dp_output_control(struct datapath *dp, struct sk_buff *skb, - size_t max_len, int reason) +dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, + u32 arg) { - /* FIXME? Can we avoid creating a new skbuff in the case where we - * forward the whole packet? */ - struct sk_buff *f_skb; - struct ofp_packet_in *opi; - size_t fwd_len, opi_len; - uint32_t buffer_id; + struct dp_stats_percpu *stats; + struct sk_buff_head *queue; + struct odp_msg *header; int err; WARN_ON_ONCE(skb_shared(skb)); + BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR); + + queue = &dp->queues[queue_no]; + err = -ENOBUFS; + if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN) + goto err; #ifdef CONFIG_XEN /* If a checksum-deferred packet is forwarded to the controller, @@ -873,1210 +674,820 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, */ err = skb_checksum_setup(skb); if (err) - goto out; + goto err; if (skb->ip_summed == CHECKSUM_HW) { err = skb_checksum_help(skb, 0); if (err) - goto out; + goto err; } #endif - buffer_id = fwd_save_skb(skb); - - fwd_len = skb->len; - if ((buffer_id != (uint32_t) -1) && max_len) - fwd_len = min(fwd_len, max_len); + err = skb_cow(skb, sizeof *header); + if (err) + goto err; + + header = (struct odp_msg*)__skb_push(skb, sizeof *header); + header->type = queue_no; + header->length = skb->len; + header->port = (skb->dev && skb->dev->br_port + ? skb->dev->br_port->port_no + : ODPP_LOCAL); + header->reserved = 0; + header->arg = arg; + skb_queue_tail(queue, skb); + wake_up_interruptible(&dp->waitqueue); + return 0; - opi_len = offsetof(struct ofp_packet_in, data) + fwd_len; - opi = alloc_openflow_skb(dp, opi_len, OFPT_PACKET_IN, NULL, &f_skb); - if (!opi) { - err = -ENOMEM; - goto out; - } - opi->buffer_id = htonl(buffer_id); - opi->total_len = htons(skb->len); - opi->in_port = htons(skb->dev && skb->dev->br_port - ? skb->dev->br_port->port_no - : OFPP_LOCAL); - opi->reason = reason; - opi->pad = 0; - skb_copy_bits(skb, 0, opi->data, fwd_len); - err = send_openflow_skb(dp, f_skb, NULL); +err: + stats = percpu_ptr(dp->stats_percpu, get_cpu()); + stats->n_lost++; + put_cpu(); -out: kfree_skb(skb); return err; } -static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc) +static int flush_flows(struct datapath *dp) { - unsigned long flags; - desc->port_no = htons(p->port_no); - strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN); - desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0'; - memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN); - desc->curr = 0; - desc->supported = 0; - desc->advertised = 0; - desc->peer = 0; - - spin_lock_irqsave(&p->lock, flags); - desc->config = htonl(p->config); - desc->state = htonl(p->state); - spin_unlock_irqrestore(&p->lock, flags); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24) - if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) { - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; - - if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) { - /* Set the supported features */ - if (ecmd.supported & SUPPORTED_10baseT_Half) - desc->supported |= OFPPF_10MB_HD; - if (ecmd.supported & SUPPORTED_10baseT_Full) - desc->supported |= OFPPF_10MB_FD; - if (ecmd.supported & SUPPORTED_100baseT_Half) - desc->supported |= OFPPF_100MB_HD; - if (ecmd.supported & SUPPORTED_100baseT_Full) - desc->supported |= OFPPF_100MB_FD; - if (ecmd.supported & SUPPORTED_1000baseT_Half) - desc->supported |= OFPPF_1GB_HD; - if (ecmd.supported & SUPPORTED_1000baseT_Full) - desc->supported |= OFPPF_1GB_FD; - if (ecmd.supported & SUPPORTED_10000baseT_Full) - desc->supported |= OFPPF_10GB_FD; - if (ecmd.supported & SUPPORTED_TP) - desc->supported |= OFPPF_COPPER; - if (ecmd.supported & SUPPORTED_FIBRE) - desc->supported |= OFPPF_FIBER; - if (ecmd.supported & SUPPORTED_Autoneg) - desc->supported |= OFPPF_AUTONEG; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) - if (ecmd.supported & SUPPORTED_Pause) - desc->supported |= OFPPF_PAUSE; - if (ecmd.supported & SUPPORTED_Asym_Pause) - desc->supported |= OFPPF_PAUSE_ASYM; -#endif /* kernel >= 2.6.14 */ - - /* Set the advertised features */ - if (ecmd.advertising & ADVERTISED_10baseT_Half) - desc->advertised |= OFPPF_10MB_HD; - if (ecmd.advertising & ADVERTISED_10baseT_Full) - desc->advertised |= OFPPF_10MB_FD; - if (ecmd.advertising & ADVERTISED_100baseT_Half) - desc->advertised |= OFPPF_100MB_HD; - if (ecmd.advertising & ADVERTISED_100baseT_Full) - desc->advertised |= OFPPF_100MB_FD; - if (ecmd.advertising & ADVERTISED_1000baseT_Half) - desc->advertised |= OFPPF_1GB_HD; - if (ecmd.advertising & ADVERTISED_1000baseT_Full) - desc->advertised |= OFPPF_1GB_FD; - if (ecmd.advertising & ADVERTISED_10000baseT_Full) - desc->advertised |= OFPPF_10GB_FD; - if (ecmd.advertising & ADVERTISED_TP) - desc->advertised |= OFPPF_COPPER; - if (ecmd.advertising & ADVERTISED_FIBRE) - desc->advertised |= OFPPF_FIBER; - if (ecmd.advertising & ADVERTISED_Autoneg) - desc->advertised |= OFPPF_AUTONEG; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) - if (ecmd.advertising & ADVERTISED_Pause) - desc->advertised |= OFPPF_PAUSE; - if (ecmd.advertising & ADVERTISED_Asym_Pause) - desc->advertised |= OFPPF_PAUSE_ASYM; -#endif /* kernel >= 2.6.14 */ - - /* Set the current features */ - if (ecmd.speed == SPEED_10) - desc->curr = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD; - else if (ecmd.speed == SPEED_100) - desc->curr = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD; - else if (ecmd.speed == SPEED_1000) - desc->curr = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD; - else if (ecmd.speed == SPEED_10000) - desc->curr = OFPPF_10GB_FD; - - if (ecmd.port == PORT_TP) - desc->curr |= OFPPF_COPPER; - else if (ecmd.port == PORT_FIBRE) - desc->curr |= OFPPF_FIBER; - - if (ecmd.autoneg) - desc->curr |= OFPPF_AUTONEG; - } - } -#endif - desc->curr = htonl(desc->curr); - desc->supported = htonl(desc->supported); - desc->advertised = htonl(desc->advertised); - desc->peer = htonl(desc->peer); + dp->n_flows = 0; + return dp_table_flush(dp); } -static int -fill_features_reply(struct datapath *dp, struct ofp_switch_features *ofr) +static int validate_actions(const struct sw_flow_actions *actions) { - struct net_bridge_port *p; - uint64_t dpid = get_datapath_id(dp->netdev); - int port_count = 0; + unsigned int i; - ofr->datapath_id = cpu_to_be64(dpid); + for (i = 0; i < actions->n_actions; i++) { + const union odp_action *a = &actions->actions[i]; + switch (a->type) { + case ODPAT_SNAT: + if (a->snat.port >= DP_MAX_PORTS) + return -EINVAL; +#ifndef SUPPORT_SNAT + if (net_ratelimit()) + printk(KERN_ERR "SNAT not supported\n"); + return -EOPNOTSUPP; +#endif - ofr->n_buffers = htonl(N_PKT_BUFFERS); - ofr->n_tables = dp->chain->n_tables; - ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES); - ofr->actions = htonl(OFP_SUPPORTED_ACTIONS); - memset(ofr->pad, 0, sizeof ofr->pad); + case ODPAT_OUTPUT: + if (a->output.port >= DP_MAX_PORTS) + return -EINVAL; + break; - list_for_each_entry_rcu (p, &dp->port_list, node) { - fill_port_desc(p, &ofr->ports[port_count]); - port_count++; + case ODPAT_OUTPUT_GROUP: + if (a->output_group.group >= DP_MAX_GROUPS) + return -EINVAL; + break; + + default: + if (a->type >= ODPAT_N_ACTIONS) + return -EOPNOTSUPP; + break; + } } - return port_count; + return 0; } -int -dp_send_features_reply(struct datapath *dp, const struct sender *sender) +static int set_flow_actions(struct datapath *dp, struct odp_flow __user *ufp) { - struct sk_buff *skb; - struct ofp_switch_features *ofr; - size_t ofr_len, port_max_len; - int port_count; - - /* Overallocate. */ - port_max_len = sizeof(struct ofp_phy_port) * DP_MAX_PORTS; - ofr = alloc_openflow_skb(dp, sizeof(*ofr) + port_max_len, - OFPT_FEATURES_REPLY, sender, &skb); - if (!ofr) - return -ENOMEM; - - /* Fill. */ - port_count = fill_features_reply(dp, ofr); - - /* Shrink to fit. */ - ofr_len = sizeof(*ofr) + (sizeof(struct ofp_phy_port) * port_count); - resize_openflow_skb(skb, &ofr->header, ofr_len); - return send_openflow_skb(dp, skb, sender); -} + struct sw_flow_actions *new_acts, *old_acts; + struct sw_flow *flow; + struct odp_flow uf; + int error; -int -dp_send_config_reply(struct datapath *dp, const struct sender *sender) -{ - struct sk_buff *skb; - struct ofp_switch_config *osc; + error = -EFAULT; + if (copy_from_user(&uf, ufp, sizeof uf)) + goto error; - osc = alloc_openflow_skb(dp, sizeof *osc, OFPT_GET_CONFIG_REPLY, sender, - &skb); - if (!osc) - return -ENOMEM; + /* Get actions. */ + new_acts = flow_actions_alloc(uf.n_actions); + error = -ENOMEM; + if (!new_acts) + goto error; + if (copy_from_user(new_acts->actions, uf.actions, + uf.n_actions * sizeof *uf.actions)) + goto error_free_actions; + error = validate_actions(new_acts); + if (error) + goto error_free_actions; + + /* Replace actions. */ + flow = dp_table_lookup(dp->table, &uf.key); + error = -ENOENT; + if (!flow) + goto error_free_actions; + old_acts = rcu_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, new_acts); + synchronize_rcu(); /* XXX expensive! */ + kfree(old_acts); - osc->flags = htons(dp->flags); - osc->miss_send_len = htons(dp->miss_send_len); + return 0; - return send_openflow_skb(dp, skb, sender); +error_free_actions: + kfree(new_acts); +error: + return error; } -int -dp_send_hello(struct datapath *dp, const struct sender *sender, - const struct ofp_header *request) +static int put_stats(struct sw_flow *flow, struct __user odp_flow *ufp) { - if (request->version < OFP_VERSION) { - char err[64]; - sprintf(err, "Only version 0x%02x supported", OFP_VERSION); - dp_send_error_msg(dp, sender, OFPET_HELLO_FAILED, - OFPHFC_INCOMPATIBLE, err, strlen(err)); - return -EINVAL; + struct odp_flow_stats stats; + unsigned long flags; + + if (flow->used.tv_sec) { + stats.used_sec = flow->used.tv_sec; + stats.used_nsec = flow->used.tv_nsec; } else { - struct sk_buff *skb; - struct ofp_header *reply; + stats.used_sec = 0; + stats.used_nsec = 0; + } - reply = alloc_openflow_skb(dp, sizeof *reply, - OFPT_HELLO, sender, &skb); - if (!reply) - return -ENOMEM; + spin_lock_irqsave(&flow->lock, flags); + stats.n_packets = flow->packet_count; + stats.n_bytes = flow->byte_count; + stats.ip_tos = flow->ip_tos; + stats.tcp_flags = flow->tcp_flags; + spin_unlock_irqrestore(&flow->lock, flags); - return send_openflow_skb(dp, skb, sender); - } + return __copy_to_user(&ufp->stats, &stats, sizeof ufp->stats); } -int -dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm) +static int add_flow(struct datapath *dp, struct odp_flow __user *ufp) { - unsigned long int flags; - int port_no = ntohs(opm->port_no); - struct net_bridge_port *p; - p = (port_no < DP_MAX_PORTS ? dp->ports[port_no] - : port_no == OFPP_LOCAL ? dp->local_port - : NULL); + struct odp_flow uf; + struct sw_flow *flow, **bucket; + struct dp_table *table; + struct sw_flow_actions *sf_acts; + int error; - /* Make sure the port id hasn't changed since this was sent */ - if (!p || memcmp(opm->hw_addr, p->dev->dev_addr, ETH_ALEN)) - return -1; + error = -EFAULT; + if (copy_from_user(&uf, ufp, sizeof uf)) + goto error; - spin_lock_irqsave(&p->lock, flags); - if (opm->mask) { - uint32_t config_mask = ntohl(opm->mask); - p->config &= ~config_mask; - p->config |= ntohl(opm->config) & config_mask; + flow = flow_alloc(uf.n_actions); + if (flow == NULL) + goto error; + sf_acts = rcu_dereference(flow->sf_acts); + + /* Initialize flow. */ + flow->key = uf.key; + if (copy_from_user(sf_acts->actions, uf.actions, + uf.n_actions * sizeof *uf.actions)) + goto error_free_flow; + error = validate_actions(sf_acts); + if (error) + goto error_free_flow; + + flow->used.tv_sec = flow->used.tv_nsec = 0; + flow->tcp_flags = 0; + flow->ip_tos = 0; + spin_lock_init(&flow->lock); + flow->packet_count = 0; + flow->byte_count = 0; + + /* Add to table. */ + table = rcu_dereference(dp->table); + if (dp->n_flows * 4 >= table->n_buckets && + table->n_buckets < DP_MAX_BUCKETS) { + error = dp_table_expand(dp); + if (error) + goto error_free_flow; + table = dp->table; } - spin_unlock_irqrestore(&p->lock, flags); - return 0; + bucket = dp_table_lookup_for_insert(table, flow); + error = -EXFULL; + if (!bucket) + goto error_free_flow; + else if (!*bucket) { + error = 0; + rcu_assign_pointer(*bucket, flow); + dp->n_flows++; + } else { + /* Replace 'old_flow' by 'flow'. */ + struct sw_flow *old_flow = *rcu_dereference(bucket); + rcu_assign_pointer(*bucket, flow); + synchronize_rcu(); /* XXX expensive! */ + error = put_stats(old_flow, ufp) ? -EFAULT : 0; + flow_free(old_flow); + } + + return error; + +error_free_flow: + flow_free(flow); +error: + return error; } -/* Initialize the port status field of the bridge port. */ -static void -init_port_status(struct net_bridge_port *p) +static int put_actions(const struct sw_flow *flow, struct odp_flow __user *ufp) { - unsigned long int flags; + union odp_action __user *actions; + struct sw_flow_actions *sf_acts; + u32 n_actions; - spin_lock_irqsave(&p->lock, flags); + if (__get_user(actions, &ufp->actions) || + __get_user(n_actions, &ufp->n_actions)) + return -EFAULT; - if (p->dev->flags & IFF_UP) - p->config &= ~OFPPC_PORT_DOWN; - else - p->config |= OFPPC_PORT_DOWN; + if (!n_actions) + return 0; + if (ufp->n_actions > INT_MAX / sizeof(union odp_action)) + return -EINVAL; - if (netif_carrier_ok(p->dev)) - p->state &= ~OFPPS_LINK_DOWN; - else - p->state |= OFPPS_LINK_DOWN; + sf_acts = rcu_dereference(flow->sf_acts); + if (__put_user(sf_acts->n_actions, &ufp->n_actions) || + (actions && copy_to_user(actions, sf_acts->actions, + sizeof(union odp_action) * + min(sf_acts->n_actions, n_actions)))) + return -EFAULT; - spin_unlock_irqrestore(&p->lock, flags); + return 0; } -int -dp_send_port_status(struct net_bridge_port *p, uint8_t status) +static int answer_query(struct sw_flow *flow, struct odp_flow __user *ufp) { - struct sk_buff *skb; - struct ofp_port_status *ops; - - ops = alloc_openflow_skb(p->dp, sizeof *ops, OFPT_PORT_STATUS, NULL, - &skb); - if (!ops) - return -ENOMEM; - ops->reason = status; - memset(ops->pad, 0, sizeof ops->pad); - fill_port_desc(p, &ops->desc); - - return send_openflow_skb(p->dp, skb, NULL); + if (put_stats(flow, ufp)) + return -EFAULT; + return put_actions(flow, ufp); } -/* Convert jiffies_64 to milliseconds. */ -static u64 inline jiffies_64_to_msecs(const u64 j) +static int del_or_query_flow(struct datapath *dp, + struct odp_flow __user *ufp, + unsigned int cmd) { -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (MSEC_PER_SEC / HZ) * j; -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) - return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); -#else - return (j * MSEC_PER_SEC) / HZ; -#endif + struct dp_table *table = rcu_dereference(dp->table); + struct odp_flow uf; + struct sw_flow *flow; + int error; + + error = -EFAULT; + if (copy_from_user(&uf, ufp, sizeof uf)) + goto error; + + flow = dp_table_lookup(table, &uf.key); + error = -ENOENT; + if (!flow) + goto error; + + if (cmd == ODP_FLOW_DEL) { + /* XXX redundant lookup */ + error = dp_table_delete(table, flow); + if (error) + goto error; + dp->n_flows--; + synchronize_rcu(); /* XXX expensive! */ + error = answer_query(flow, ufp); + flow_free(flow); + } else { + error = answer_query(flow, ufp); + } + +error: + return error; } -int -dp_send_flow_end(struct datapath *dp, struct sw_flow *flow, - enum nx_flow_end_reason reason) +static int query_multiple_flows(struct datapath *dp, + const struct odp_flowvec *flowvec) { - struct sk_buff *skb; - struct nx_flow_end *nfe; - - if (!dp->send_flow_end) - return 0; + struct dp_table *table = rcu_dereference(dp->table); + int i; + for (i = 0; i < flowvec->n_flows; i++) { + struct __user odp_flow *ufp = &flowvec->flows[i]; + struct odp_flow uf; + struct sw_flow *flow; + int error; - nfe = alloc_openflow_skb(dp, sizeof *nfe, OFPT_VENDOR, 0, &skb); - if (!nfe) - return -ENOMEM; + if (__copy_from_user(&uf, ufp, sizeof uf)) + return -EFAULT; - nfe->header.vendor = htonl(NX_VENDOR_ID); - nfe->header.subtype = htonl(NXT_FLOW_END); + flow = dp_table_lookup(table, &uf.key); + if (!flow) + error = __clear_user(&ufp->stats, sizeof ufp->stats); + else + error = answer_query(flow, ufp); + if (error) + return -EFAULT; + } + return flowvec->n_flows; +} - flow_fill_match(&nfe->match, &flow->key); +struct list_flows_cbdata { + struct odp_flow __user *uflows; + int n_flows; + int listed_flows; +}; - nfe->priority = htons(flow->priority); - nfe->reason = reason; +static int list_flow(struct sw_flow *flow, void *cbdata_) +{ + struct list_flows_cbdata *cbdata = cbdata_; + struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; + int error; - nfe->tcp_flags = flow->tcp_flags; - nfe->ip_tos = flow->ip_tos; + if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) + return -EFAULT; + error = answer_query(flow, ufp); + if (error) + return error; - memset(nfe->pad, 0, sizeof nfe->pad); + if (cbdata->listed_flows >= cbdata->n_flows) + return cbdata->listed_flows; + return 0; +} - nfe->init_time = cpu_to_be64(jiffies_64_to_msecs(flow->created)); - nfe->used_time = cpu_to_be64(jiffies_64_to_msecs(flow->used)); - nfe->end_time = cpu_to_be64(jiffies_64_to_msecs(get_jiffies_64())); +static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec) +{ + struct list_flows_cbdata cbdata; + int error; - nfe->packet_count = cpu_to_be64(flow->packet_count); - nfe->byte_count = cpu_to_be64(flow->byte_count); + if (!flowvec->n_flows) + return 0; - return send_openflow_skb(dp, skb, NULL); + cbdata.uflows = flowvec->flows; + cbdata.n_flows = flowvec->n_flows; + cbdata.listed_flows = 0; + error = dp_table_foreach(rcu_dereference(dp->table), + list_flow, &cbdata); + return error ? error : cbdata.listed_flows; } -EXPORT_SYMBOL(dp_send_flow_end); -int -dp_send_error_msg(struct datapath *dp, const struct sender *sender, - uint16_t type, uint16_t code, const void *data, size_t len) +static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp, + int (*function)(struct datapath *, + const struct odp_flowvec *)) { - struct sk_buff *skb; - struct ofp_error_msg *oem; + struct odp_flowvec __user *uflowvec; + struct odp_flowvec flowvec; + int retval; + uflowvec = (struct odp_flowvec __user *)argp; + if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) || + copy_from_user(&flowvec, uflowvec, sizeof flowvec)) + return -EFAULT; - oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR, - sender, &skb); - if (!oem) - return -ENOMEM; + if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow)) + return -EINVAL; - oem->type = htons(type); - oem->code = htons(code); - memcpy(oem->data, data, len); + if (!access_ok(VERIFY_WRITE, flowvec.flows, + flowvec.n_flows * sizeof(struct odp_flow))) + return -EFAULT; - return send_openflow_skb(dp, skb, sender); + retval = function(dp, &flowvec); + return (retval < 0 ? retval + : retval == flowvec.n_flows ? 0 + : __put_user(retval, &uflowvec->n_flows)); } -int -dp_send_echo_reply(struct datapath *dp, const struct sender *sender, - const struct ofp_header *rq) +static int do_execute(struct datapath *dp, const struct odp_execute *executep) { + struct odp_execute execute; + struct odp_flow_key key; struct sk_buff *skb; - struct ofp_header *reply; - - reply = alloc_openflow_skb(dp, ntohs(rq->length), OFPT_ECHO_REPLY, - sender, &skb); - if (!reply) - return -ENOMEM; + struct sw_flow_actions *actions; + int err; - memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq); - return send_openflow_skb(dp, skb, sender); -} + err = -EFAULT; + if (copy_from_user(&execute, executep, sizeof execute)) + goto error; -/* Generic Netlink interface. - * - * See netlink(7) for an introduction to netlink. See - * http://linux-net.osdl.org/index.php/Netlink for more information and - * pointers on how to work with netlink and Generic Netlink in the kernel and - * in userspace. */ - -static struct genl_family dp_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = DP_GENL_FAMILY_NAME, - .version = 1, - .maxattr = DP_GENL_A_MAX, -}; + err = -EINVAL; + if (execute.length < ETH_HLEN || execute.length > 65535) + goto error; -/* Attribute policy: what each attribute may contain. */ -static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = { - [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, - [DP_GENL_A_DP_NAME] = { .type = NLA_NUL_STRING }, - [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 }, - [DP_GENL_A_PORTNAME] = { .type = NLA_NUL_STRING } -}; + err = -ENOMEM; + actions = flow_actions_alloc(execute.n_actions); + if (!actions) + goto error; -static int dp_genl_add(struct sk_buff *skb, struct genl_info *info) -{ - int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ? - nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1; - const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ? - nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL; + err = -EFAULT; + if (copy_from_user(actions->actions, execute.actions, + execute.n_actions * sizeof *execute.actions)) + goto error_free_actions; - if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME])) - return -EINVAL; + err = validate_actions(actions); + if (err) + goto error_free_actions; - if ((dp_idx == -1) && (!dp_name)) - return -EINVAL; + err = -ENOMEM; + skb = alloc_skb(execute.length, GFP_KERNEL); + if (!skb) + goto error_free_actions; + if (execute.in_port < DP_MAX_PORTS) { + struct net_bridge_port *p = dp->ports[execute.in_port]; + if (p) + skb->dev = p->dev; + } - return new_dp(dp_idx, dp_name); -} + err = -EFAULT; + if (copy_from_user(skb_put(skb, execute.length), execute.data, + execute.length)) + goto error_free_skb; -static struct genl_ops dp_genl_ops_add_dp = { - .cmd = DP_GENL_C_ADD_DP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_policy, - .doit = dp_genl_add, - .dumpit = NULL, -}; + flow_extract(skb, execute.in_port, &key); + err = execute_actions(dp, skb, &key, actions, GFP_KERNEL); + kfree(actions); + return err; -/* Must be called with rcu_read_lock or dp_mutex. */ -struct datapath *dp_get_by_idx(int dp_idx) -{ - if (dp_idx < 0 || dp_idx >= DP_MAX) - return NULL; - return rcu_dereference(dps[dp_idx]); +error_free_skb: + kfree_skb(skb); +error_free_actions: + kfree(actions); +error: + return err; } -EXPORT_SYMBOL(dp_get_by_idx); -/* Must be called with rcu_read_lock or dp_mutex. */ -struct datapath *dp_get_by_name(const char *dp_name) +static int +get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) { + struct odp_stats stats; int i; - for (i=0; inetdev->name, dp_name)) - return dp; + + stats.n_flows = dp->n_flows; + stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2; + stats.max_capacity = DP_MAX_BUCKETS * 2; + stats.n_ports = dp->n_ports; + stats.max_ports = DP_MAX_PORTS; + stats.max_groups = DP_MAX_GROUPS; + stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0; + for_each_possible_cpu(i) { + const struct dp_stats_percpu *s; + s = percpu_ptr(dp->stats_percpu, i); + stats.n_frags += s->n_frags; + stats.n_hit += s->n_hit; + stats.n_missed += s->n_missed; + stats.n_lost += s->n_lost; } - return NULL; + return copy_to_user(statsp, &stats, sizeof stats); } -/* Must be called with rcu_read_lock or dp_mutex. */ -static struct datapath * -lookup_dp(struct genl_info *info) +static int +put_port(const struct net_bridge_port *p, struct odp_port __user *uop) { - int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ? - nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1; - const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ? - nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL; - - if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME])) - return ERR_PTR(-EINVAL); - - if (dp_idx != -1) { - struct datapath *dp = dp_get_by_idx(dp_idx); - if (!dp) - return ERR_PTR(-ENOENT); - else if (dp_name && strcmp(dp->netdev->name, dp_name)) - return ERR_PTR(-EINVAL); - else - return dp; - } else if (dp_name) { - struct datapath *dp = dp_get_by_name(dp_name); - return dp ? dp : ERR_PTR(-ENOENT); - } else { - return ERR_PTR(-EINVAL); - } + struct odp_port op; + memset(&op, 0, sizeof op); + strncpy(op.devname, p->dev->name, sizeof op.devname); + op.port = p->port_no; + return copy_to_user(uop, &op, sizeof op); } -static int dp_genl_del(struct sk_buff *skb, struct genl_info *info) +static int +query_port(struct datapath *dp, struct odp_port __user *uport) { - struct net_device *dev = NULL; - struct datapath *dp; - int err; + struct odp_port port; - rtnl_lock(); - mutex_lock(&dp_mutex); - dp = lookup_dp(info); - if (IS_ERR(dp)) - err = PTR_ERR(dp); - else { - dev = dp->netdev; - del_dp(dp); - err = 0; - } - mutex_unlock(&dp_mutex); - rtnl_unlock(); - if (dev) - free_netdev(dev); - return err; -} + if (copy_from_user(&port, uport, sizeof port)) + return -EFAULT; + if (port.devname[0]) { + struct net_bridge_port *p; + struct net_device *dev; + int err; -static struct genl_ops dp_genl_ops_del_dp = { - .cmd = DP_GENL_C_DEL_DP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_policy, - .doit = dp_genl_del, - .dumpit = NULL, -}; + port.devname[IFNAMSIZ - 1] = '\0'; -/* Queries a datapath for related information. Currently the only relevant - * information is the datapath's multicast group ID, datapath ID, and - * datapath device name. */ -static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) -{ - struct datapath *dp; - struct sk_buff *ans_skb = NULL; - int err; + dev = dev_get_by_name(&init_net, port.devname); + if (!dev) + return -ENODEV; - rcu_read_lock(); - dp = lookup_dp(info); - if (IS_ERR(dp)) - err = PTR_ERR(dp); - else { - void *data; - ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!ans_skb) { - err = -ENOMEM; - goto err; - } - err = -ENOMEM; - data = genlmsg_put_reply(ans_skb, info, &dp_genl_family, - 0, DP_GENL_C_QUERY_DP); - if (data == NULL) - goto err; - NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx); - NLA_PUT_STRING(ans_skb, DP_GENL_A_DP_NAME, dp->netdev->name); - NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, dp_mc_group(dp)); + p = dev->br_port; + err = p && p->dp == dp ? put_port(p, uport) : -ENOENT; + dev_put(dev); - genlmsg_end(ans_skb, data); - err = genlmsg_reply(ans_skb, info); - ans_skb = NULL; + return err; + } else { + if (port.port >= DP_MAX_PORTS) + return -EINVAL; + if (!dp->ports[port.port]) + return -ENOENT; + return put_port(dp->ports[port.port], uport); } -err: -nla_put_failure: - kfree_skb(ans_skb); - rcu_read_unlock(); - return err; } -static struct genl_ops dp_genl_ops_query_dp = { - .cmd = DP_GENL_C_QUERY_DP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_policy, - .doit = dp_genl_query, - .dumpit = NULL, -}; - -static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info) +static int +list_ports(struct datapath *dp, struct odp_portvec __user *pvp) { - struct datapath *dp; - struct net_device *port; - int err; - - if (!info->attrs[DP_GENL_A_PORTNAME] || - VERIFY_NUL_STRING(info->attrs[DP_GENL_A_PORTNAME])) - return -EINVAL; - - rtnl_lock(); - mutex_lock(&dp_mutex); - - /* Get datapath. */ - dp = lookup_dp(info); - if (IS_ERR(dp)) { - err = PTR_ERR(dp); - goto out_unlock; - } - - /* Get interface to add/remove. */ - port = dev_get_by_name(&init_net, - nla_data(info->attrs[DP_GENL_A_PORTNAME])); - if (!port) { - err = -ENOENT; - goto out_unlock; - } - - /* Execute operation. */ - if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT) - err = add_switch_port(dp, port); - else { - if (port->br_port == NULL || port->br_port->dp != dp) { - err = -ENOENT; - goto out_put; + struct odp_portvec pv; + struct net_bridge_port *p; + int idx; + + if (copy_from_user(&pv, pvp, sizeof pv)) + return -EFAULT; + + idx = 0; + if (pv.n_ports) { + list_for_each_entry_rcu (p, &dp->port_list, node) { + if (put_port(p, &pv.ports[idx])) + return -EFAULT; + if (idx++ >= pv.n_ports) + break; } - err = dp_del_switch_port(port->br_port); } - -out_put: - dev_put(port); -out_unlock: - mutex_unlock(&dp_mutex); - rtnl_unlock(); - return err; + return put_user(idx, &pvp->n_ports); } -static struct genl_ops dp_genl_ops_add_port = { - .cmd = DP_GENL_C_ADD_PORT, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_policy, - .doit = dp_genl_add_del_port, - .dumpit = NULL, -}; - -static struct genl_ops dp_genl_ops_del_port = { - .cmd = DP_GENL_C_DEL_PORT, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_policy, - .doit = dp_genl_add_del_port, - .dumpit = NULL, -}; - -static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info) +static int +set_port_group(struct datapath *dp, const struct odp_port_group __user *upg) { - struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW]; - struct datapath *dp; - struct ofp_header *oh; - struct sender sender; - int err; - - if (!info->attrs[DP_GENL_A_DP_IDX] || !va) - return -EINVAL; + struct odp_port_group pg; + struct odp_port_group *pgp, *old_pg; + int error; - dp = dp_get_by_idx(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); - if (!dp) - return -ENOENT; - - if (nla_len(va) < sizeof(struct ofp_header)) - return -EINVAL; - oh = nla_data(va); - - sender.xid = oh->xid; - sender.pid = info->snd_pid; - sender.seq = info->snd_seq; - - mutex_lock(&dp_mutex); - err = fwd_control_input(dp->chain, &sender, - nla_data(va), nla_len(va)); - mutex_unlock(&dp_mutex); - return err; -} - -static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = { - [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, -}; + error = -EFAULT; + if (copy_from_user(&pg, upg, sizeof pg)) + goto error; -static int desc_stats_dump(struct datapath *dp, void *state, - void *body, int *body_len) -{ - struct ofp_desc_stats *ods = body; - int n_bytes = sizeof *ods; + error = -EINVAL; + if (pg.n_ports > DP_MAX_PORTS || pg.group >= DP_MAX_GROUPS) + goto error; - if (n_bytes > *body_len) { - return -ENOBUFS; + error = -ENOMEM; + pgp = kmalloc(sizeof *pgp, GFP_KERNEL); + if (!pgp) + goto error; + pgp->ports = kmalloc(sizeof(u16) * pg.n_ports, GFP_KERNEL); + if (!pgp->ports) + goto error_free_pgp; + + pgp->n_ports = pg.n_ports; + error = -EFAULT; + if (copy_from_user(pgp->ports, pg.ports, sizeof(u16) * pg.n_ports)) + goto error_free_pgp_ports; + + old_pg = rcu_dereference(dp->groups[pg.group]); + rcu_assign_pointer(dp->groups[pg.group], pgp); + if (old_pg) { + synchronize_rcu(); /* XXX expensive! */ + kfree(old_pg->ports); + kfree(old_pg); } - *body_len = n_bytes; - - strncpy(ods->mfr_desc, mfr_desc, sizeof ods->mfr_desc); - strncpy(ods->hw_desc, hw_desc, sizeof ods->hw_desc); - strncpy(ods->sw_desc, sw_desc, sizeof ods->sw_desc); - strncpy(ods->serial_num, serial_num, sizeof ods->serial_num); - return 0; -} - -struct flow_stats_state { - int table_idx; - struct sw_table_position position; - const struct ofp_flow_stats_request *rq; - void *body; - int bytes_used, bytes_allocated; -}; - -static int flow_stats_init(struct datapath *dp, const void *body, int body_len, - void **state) -{ - const struct ofp_flow_stats_request *fsr = body; - struct flow_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC); - if (!s) - return -ENOMEM; - s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id; - memset(&s->position, 0, sizeof s->position); - s->rq = fsr; - *state = s; - return 0; +error_free_pgp_ports: + kfree(pgp->ports); +error_free_pgp: + kfree(pgp); +error: + return error; } -static int flow_stats_dump_callback(struct sw_flow *flow, void *private) +static int +get_port_group(struct datapath *dp, struct odp_port_group *upg) { - struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts); - struct flow_stats_state *s = private; - struct ofp_flow_stats *ofs; - int length; - uint64_t duration; - - length = sizeof *ofs + sf_acts->actions_len; - if (length + s->bytes_used > s->bytes_allocated) - return 1; + struct odp_port_group pg, *g; + u16 n_copy; - ofs = s->body + s->bytes_used; - ofs->length = htons(length); - ofs->table_id = s->table_idx; - ofs->pad = 0; - ofs->match.wildcards = htonl(flow->key.wildcards); - ofs->match.in_port = flow->key.in_port; - memcpy(ofs->match.dl_src, flow->key.dl_src, ETH_ALEN); - memcpy(ofs->match.dl_dst, flow->key.dl_dst, ETH_ALEN); - ofs->match.dl_vlan = flow->key.dl_vlan; - ofs->match.dl_type = flow->key.dl_type; - ofs->match.nw_src = flow->key.nw_src; - ofs->match.nw_dst = flow->key.nw_dst; - ofs->match.nw_proto = flow->key.nw_proto; - ofs->match.pad = 0; - ofs->match.tp_src = flow->key.tp_src; - ofs->match.tp_dst = flow->key.tp_dst; - - /* The kernel doesn't support 64-bit division, so use the 'do_div' - * macro instead. The first argument is replaced with the quotient, - * while the remainder is the return value. */ - duration = get_jiffies_64() - flow->created; - do_div(duration, HZ); - ofs->duration = htonl(duration); - - ofs->priority = htons(flow->priority); - ofs->idle_timeout = htons(flow->idle_timeout); - ofs->hard_timeout = htons(flow->hard_timeout); - memset(ofs->pad2, 0, sizeof ofs->pad2); - ofs->packet_count = cpu_to_be64(flow->packet_count); - ofs->byte_count = cpu_to_be64(flow->byte_count); - memcpy(ofs->actions, sf_acts->actions, sf_acts->actions_len); - - s->bytes_used += length; - return 0; -} + if (copy_from_user(&pg, upg, sizeof pg)) + return -EFAULT; -static int flow_stats_dump(struct datapath *dp, void *state, - void *body, int *body_len) -{ - struct flow_stats_state *s = state; - struct sw_flow_key match_key; - int error = 0; - - s->bytes_used = 0; - s->bytes_allocated = *body_len; - s->body = body; - - flow_extract_match(&match_key, &s->rq->match); - while (s->table_idx < dp->chain->n_tables - && (s->rq->table_id == 0xff || s->rq->table_id == s->table_idx)) - { - struct sw_table *table = dp->chain->tables[s->table_idx]; - - error = table->iterate(table, &match_key, s->rq->out_port, - &s->position, flow_stats_dump_callback, s); - if (error) - break; + if (pg.group >= DP_MAX_GROUPS) + return -EINVAL; - s->table_idx++; - memset(&s->position, 0, sizeof s->position); - } - *body_len = s->bytes_used; - - /* If error is 0, we're done. - * Otherwise, if some bytes were used, there are more flows to come. - * Otherwise, we were not able to fit even a single flow in the body, - * which indicates that we have a single flow with too many actions to - * fit. We won't ever make any progress at that rate, so give up. */ - return !error ? 0 : s->bytes_used ? 1 : -ENOMEM; -} + g = dp->groups[pg.group]; + n_copy = g ? min(g->n_ports, pg.n_ports) : 0; + if (n_copy && copy_to_user(pg.ports, g->ports, n_copy * sizeof(u16))) + return -EFAULT; -static void flow_stats_done(void *state) -{ - kfree(state); -} + if (put_user(g ? g->n_ports : 0, &upg->n_ports)) + return -EFAULT; -static int aggregate_stats_init(struct datapath *dp, - const void *body, int body_len, - void **state) -{ - *state = (void *)body; return 0; } -static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private) +static long openflow_ioctl(struct file *f, unsigned int cmd, + unsigned long argp) { - struct ofp_aggregate_stats_reply *rpy = private; - rpy->packet_count += flow->packet_count; - rpy->byte_count += flow->byte_count; - rpy->flow_count++; - return 0; -} + int dp_idx = iminor(f->f_path.dentry->d_inode); + struct datapath *dp; + int drop_frags, listeners, port_no; +#ifdef SUPPORT_SNAT + struct odp_snat_config osc; +#endif + int err; -static int aggregate_stats_dump(struct datapath *dp, void *state, - void *body, int *body_len) -{ - struct ofp_aggregate_stats_request *rq = state; - struct ofp_aggregate_stats_reply *rpy; - struct sw_table_position position; - struct sw_flow_key match_key; - int table_idx; - - if (*body_len < sizeof *rpy) - return -ENOBUFS; - rpy = body; - *body_len = sizeof *rpy; - - memset(rpy, 0, sizeof *rpy); - - flow_extract_match(&match_key, &rq->match); - table_idx = rq->table_id == 0xff ? 0 : rq->table_id; - memset(&position, 0, sizeof position); - while (table_idx < dp->chain->n_tables - && (rq->table_id == 0xff || rq->table_id == table_idx)) - { - struct sw_table *table = dp->chain->tables[table_idx]; - int error; + /* Handle commands with special locking requirements up front. */ + switch (cmd) { + case ODP_DP_CREATE: + return create_dp(dp_idx, (char __user *)argp); - error = table->iterate(table, &match_key, rq->out_port, &position, - aggregate_stats_dump_callback, rpy); - if (error) - return error; + case ODP_DP_DESTROY: + return destroy_dp(dp_idx); + + case ODP_PORT_ADD: + return add_port(dp_idx, (struct odp_port __user *)argp); - table_idx++; - memset(&position, 0, sizeof position); + case ODP_PORT_DEL: + err = get_user(port_no, (int __user *)argp); + if (err) + break; + return del_port(dp_idx, port_no); } - rpy->packet_count = cpu_to_be64(rpy->packet_count); - rpy->byte_count = cpu_to_be64(rpy->byte_count); - rpy->flow_count = htonl(rpy->flow_count); - return 0; -} + dp = get_dp_locked(dp_idx); + if (!dp) + return -ENODEV; -static int table_stats_dump(struct datapath *dp, void *state, - void *body, int *body_len) -{ - struct ofp_table_stats *ots; - int n_bytes = dp->chain->n_tables * sizeof *ots; - int i; - if (n_bytes > *body_len) - return -ENOBUFS; - *body_len = n_bytes; - for (i = 0, ots = body; i < dp->chain->n_tables; i++, ots++) { - struct sw_table_stats stats; - dp->chain->tables[i]->stats(dp->chain->tables[i], &stats); - strncpy(ots->name, stats.name, sizeof ots->name); - ots->table_id = i; - ots->wildcards = htonl(stats.wildcards); - memset(ots->pad, 0, sizeof ots->pad); - ots->max_entries = htonl(stats.max_flows); - ots->active_count = htonl(stats.n_flows); - ots->lookup_count = cpu_to_be64(stats.n_lookup); - ots->matched_count = cpu_to_be64(stats.n_matched); - } - return 0; -} + switch (cmd) { + case ODP_DP_STATS: + err = get_dp_stats(dp, (struct odp_stats __user *)argp); + break; -struct port_stats_state { - int port; -}; + case ODP_GET_DROP_FRAGS: + err = put_user(dp->drop_frags, (int __user *)argp); + break; -static int port_stats_init(struct datapath *dp, const void *body, int body_len, - void **state) -{ - struct port_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC); - if (!s) - return -ENOMEM; - s->port = 0; - *state = s; - return 0; -} + case ODP_SET_DROP_FRAGS: + err = get_user(drop_frags, (int __user *)argp); + if (err) + break; + err = -EINVAL; + if (drop_frags != 0 && drop_frags != 1) + break; + dp->drop_frags = drop_frags; + err = 0; + break; -static int port_stats_dump(struct datapath *dp, void *state, - void *body, int *body_len) -{ - struct port_stats_state *s = state; - struct ofp_port_stats *ops; - int n_ports, max_ports; - int i; + case ODP_GET_LISTEN_MASK: + err = put_user((int)f->private_data, (int __user *)argp); + break; - max_ports = *body_len / sizeof *ops; - if (!max_ports) - return -ENOMEM; - ops = body; - - n_ports = 0; - for (i = s->port; i < DP_MAX_PORTS && n_ports < max_ports; i++) { - struct net_bridge_port *p = dp->ports[i]; - struct net_device_stats *stats; - if (!p) - continue; - stats = p->dev->get_stats(p->dev); - ops->port_no = htons(p->port_no); - memset(ops->pad, 0, sizeof ops->pad); - ops->rx_packets = cpu_to_be64(stats->rx_packets); - ops->tx_packets = cpu_to_be64(stats->tx_packets); - ops->rx_bytes = cpu_to_be64(stats->rx_bytes); - ops->tx_bytes = cpu_to_be64(stats->tx_bytes); - ops->rx_dropped = cpu_to_be64(stats->rx_dropped); - ops->tx_dropped = cpu_to_be64(stats->tx_dropped); - ops->rx_errors = cpu_to_be64(stats->rx_errors); - ops->tx_errors = cpu_to_be64(stats->tx_errors); - ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors); - ops->rx_over_err = cpu_to_be64(stats->rx_over_errors); - ops->rx_crc_err = cpu_to_be64(stats->rx_crc_errors); - ops->collisions = cpu_to_be64(stats->collisions); - n_ports++; - ops++; - } - s->port = i; - *body_len = n_ports * sizeof *ops; - return n_ports >= max_ports; -} + case ODP_SET_LISTEN_MASK: + err = get_user(listeners, (int __user *)argp); + if (err) + break; + err = -EINVAL; + if (listeners & ~ODPL_ALL) + break; + err = 0; + f->private_data = (void*)listeners; + break; -static void port_stats_done(void *state) -{ - kfree(state); -} + case ODP_PORT_QUERY: + err = query_port(dp, (struct odp_port __user *)argp); + break; -struct stats_type { - /* Minimum and maximum acceptable number of bytes in body member of - * struct ofp_stats_request. */ - size_t min_body, max_body; - - /* Prepares to dump some kind of statistics on 'dp'. 'body' and - * 'body_len' are the 'body' member of the struct ofp_stats_request. - * Returns zero if successful, otherwise a negative error code. - * May initialize '*state' to state information. May be null if no - * initialization is required.*/ - int (*init)(struct datapath *dp, const void *body, int body_len, - void **state); - - /* Dumps statistics for 'dp' into the '*body_len' bytes at 'body', and - * modifies '*body_len' to reflect the number of bytes actually used. - * ('body' will be transmitted as the 'body' member of struct - * ofp_stats_reply.) */ - int (*dump)(struct datapath *dp, void *state, - void *body, int *body_len); - - /* Cleans any state created by the init or dump functions. May be null - * if no cleanup is required. */ - void (*done)(void *state); -}; + case ODP_PORT_LIST: + err = list_ports(dp, (struct odp_portvec __user *)argp); + break; -static const struct stats_type stats[] = { - [OFPST_DESC] = { - 0, - 0, - NULL, - desc_stats_dump, - NULL - }, - [OFPST_FLOW] = { - sizeof(struct ofp_flow_stats_request), - sizeof(struct ofp_flow_stats_request), - flow_stats_init, - flow_stats_dump, - flow_stats_done - }, - [OFPST_AGGREGATE] = { - sizeof(struct ofp_aggregate_stats_request), - sizeof(struct ofp_aggregate_stats_request), - aggregate_stats_init, - aggregate_stats_dump, - NULL - }, - [OFPST_TABLE] = { - 0, - 0, - NULL, - table_stats_dump, - NULL - }, - [OFPST_PORT] = { - 0, - 0, - port_stats_init, - port_stats_dump, - port_stats_done - }, -}; + case ODP_PORT_GROUP_SET: + err = set_port_group(dp, (struct odp_port_group __user *)argp); + break; -static int -dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct datapath *dp; - struct sender sender; - const struct stats_type *s; - struct ofp_stats_reply *osr; - int dp_idx; - int max_openflow_len, body_len; - void *body; - int err; + case ODP_PORT_GROUP_GET: + err = get_port_group(dp, (struct odp_port_group __user *)argp); + break; - /* Set up the cleanup function for this dump. Linux 2.6.20 and later - * support setting up cleanup functions via the .doneit member of - * struct genl_ops. This kluge supports earlier versions also. */ - cb->done = dp_genl_openflow_done; - - sender.pid = NETLINK_CB(cb->skb).pid; - sender.seq = cb->nlh->nlmsg_seq; - if (!cb->args[0]) { - struct nlattr *attrs[DP_GENL_A_MAX + 1]; - struct ofp_stats_request *rq; - struct nlattr *va; - size_t len, body_len; - int type; - - err = nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, DP_GENL_A_MAX, - dp_genl_openflow_policy); - if (err < 0) - return err; - - if (!attrs[DP_GENL_A_DP_IDX]) - return -EINVAL; - dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]); - dp = dp_get_by_idx(dp_idx); - if (!dp) - return -ENOENT; + case ODP_FLOW_FLUSH: + err = flush_flows(dp); + break; - va = attrs[DP_GENL_A_OPENFLOW]; - len = nla_len(va); - if (!va || len < sizeof *rq) - return -EINVAL; + case ODP_FLOW_ADD: + err = add_flow(dp, (struct odp_flow __user *)argp); + break; - rq = nla_data(va); - sender.xid = rq->header.xid; - type = ntohs(rq->type); - if (rq->header.version != OFP_VERSION) { - dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_VERSION, rq, len); - return -EINVAL; - } - if (rq->header.type != OFPT_STATS_REQUEST - || ntohs(rq->header.length) != len) - return -EINVAL; + case ODP_FLOW_SET_ACTS: + err = set_flow_actions(dp, (struct odp_flow __user *)argp); + break; - if (type >= ARRAY_SIZE(stats) || !stats[type].dump) { - dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_STAT, rq, len); - return -EINVAL; - } + case ODP_FLOW_DEL: + case ODP_FLOW_QUERY: + err = del_or_query_flow(dp, (struct odp_flow __user *)argp, + cmd); + break; - s = &stats[type]; - body_len = len - offsetof(struct ofp_stats_request, body); - if (body_len < s->min_body || body_len > s->max_body) - return -EINVAL; + case ODP_FLOW_QUERY_MULTIPLE: + err = do_flowvec_ioctl(dp, argp, query_multiple_flows); + break; - cb->args[0] = 1; - cb->args[1] = dp_idx; - cb->args[2] = type; - cb->args[3] = rq->header.xid; - if (s->init) { - void *state; - err = s->init(dp, rq->body, body_len, &state); - if (err) - return err; - cb->args[4] = (long) state; - } - } else if (cb->args[0] == 1) { - sender.xid = cb->args[3]; - dp_idx = cb->args[1]; - s = &stats[cb->args[2]]; + case ODP_FLOW_LIST: + err = do_flowvec_ioctl(dp, argp, list_flows); + break; - dp = dp_get_by_idx(dp_idx); - if (!dp) - return -ENOENT; - } else { - return 0; - } + case ODP_EXECUTE: + err = do_execute(dp, (struct odp_execute __user *)argp); + break; - osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender, - &max_openflow_len); - if (IS_ERR(osr)) - return PTR_ERR(osr); - osr->type = htons(s - stats); - osr->flags = 0; - resize_openflow_skb(skb, &osr->header, max_openflow_len); - body = osr->body; - body_len = max_openflow_len - offsetof(struct ofp_stats_reply, body); - - err = s->dump(dp, (void *) cb->args[4], body, &body_len); - if (err >= 0) { - if (!err) - cb->args[0] = 2; - else - osr->flags = ntohs(OFPSF_REPLY_MORE); - resize_openflow_skb(skb, &osr->header, - (offsetof(struct ofp_stats_reply, body) - + body_len)); - err = skb->len; - } +#ifdef SUPPORT_SNAT + case ODP_SNAT_ADD_PORT: + err = -EFAULT; + if (copy_from_user(&osc, (struct odp_snat_config __user *)argp, + sizeof osc)) + break; + err = snat_add_port(dp, &osc); + break; + case ODP_SNAT_DEL_PORT: + err = get_user(port_no, (int __user *)argp); + if (err) + break; + err = snat_del_port(dp, port_no); + break; +#endif + + default: + err = -ENOIOCTLCMD; + break; + } + mutex_unlock(&dp->mutex); return err; } -static int -dp_genl_openflow_done(struct netlink_callback *cb) +static int dp_has_packet_of_interest(struct datapath *dp, int listeners) { - if (cb->args[0]) { - const struct stats_type *s = &stats[cb->args[2]]; - if (s->done) - s->done((void *) cb->args[4]); + int i; + for (i = 0; i < DP_N_QUEUES; i++) { + if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i])) + return 1; } return 0; } -static struct genl_ops dp_genl_ops_openflow = { - .cmd = DP_GENL_C_OPENFLOW, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = dp_genl_openflow_policy, - .doit = dp_genl_openflow, - .dumpit = dp_genl_openflow_dumpit, -}; +ssize_t openflow_read(struct file *f, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + int listeners = (int) f->private_data; + int dp_idx = iminor(f->f_path.dentry->d_inode); + struct datapath *dp = get_dp(dp_idx); + struct sk_buff *skb; + struct iovec __user iov; + size_t copy_bytes; + int retval; -static struct genl_ops *dp_genl_all_ops[] = { - /* Keep this operation first. Generic Netlink dispatching - * looks up operations with linear search, so we want it at the - * front. */ - &dp_genl_ops_openflow, - - &dp_genl_ops_add_dp, - &dp_genl_ops_del_dp, - &dp_genl_ops_query_dp, - &dp_genl_ops_add_port, - &dp_genl_ops_del_port, -}; + if (!dp) + return -ENODEV; -static int dp_init_netlink(void) -{ - int err; - int i; + if (nbytes == 0 || !listeners) + return 0; - err = genl_register_family(&dp_genl_family); - if (err) - return err; + for (;;) { + int i; - for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) { - err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]); - if (err) - goto err_unregister; - } + for (i = 0; i < DP_N_QUEUES; i++) { + if (listeners & (1 << i)) { + skb = skb_dequeue(&dp->queues[i]); + if (skb) + goto success; + } + } - for (i = 0; i < N_MC_GROUPS; i++) { - snprintf(mc_groups[i].name, sizeof mc_groups[i].name, - "openflow%d", i); - err = genl_register_mc_group(&dp_genl_family, &mc_groups[i]); - if (err < 0) - goto err_unregister; - } + if (f->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto error; + } - return 0; + wait_event_interruptible(dp->waitqueue, + dp_has_packet_of_interest(dp, + listeners)); -err_unregister: - genl_unregister_family(&dp_genl_family); - return err; -} + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto error; + } + } +success: + copy_bytes = min(skb->len, nbytes); + iov.iov_base = buf; + iov.iov_len = copy_bytes; + retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len); + if (!retval) + retval = copy_bytes; + kfree_skb(skb); -static void dp_uninit_netlink(void) -{ - genl_unregister_family(&dp_genl_family); +error: + return retval; } -/* Set the description strings if appropriate values are available from - * the DMI. */ -static void set_desc(void) +static unsigned int openflow_poll(struct file *file, poll_table *wait) { - const char *uuid = dmi_get_system_info(DMI_PRODUCT_UUID); - const char *vendor = dmi_get_system_info(DMI_SYS_VENDOR); - const char *name = dmi_get_system_info(DMI_PRODUCT_NAME); - const char *version = dmi_get_system_info(DMI_PRODUCT_VERSION); - const char *serial = dmi_get_system_info(DMI_PRODUCT_SERIAL); - const char *uptr; - - if (!uuid || *uuid == '\0' || strlen(uuid) != 36) - return; + int dp_idx = iminor(file->f_path.dentry->d_inode); + struct datapath *dp = get_dp(dp_idx); + unsigned int mask; - /* We are only interested version 1 UUIDs, since the last six bytes - * are an IEEE 802 MAC address. */ - if (uuid[14] != '1') - return; + if (dp) { + mask = 0; + poll_wait(file, &dp->waitqueue, wait); + if (dp_has_packet_of_interest(dp, (int)file->private_data)) + mask |= POLLIN | POLLRDNORM; + } else { + mask = POLLIN | POLLRDNORM | POLLHUP; + } + return mask; +} - /* Only set if the UUID is from Nicira. */ - uptr = uuid + 24; - if (strncmp(uptr, NICIRA_OUI_STR, strlen(NICIRA_OUI_STR))) - return; +const struct file_operations openflow_fops = { + /* XXX .aio_read = openflow_aio_read, */ + .read = openflow_read, + .poll = openflow_poll, + .unlocked_ioctl = openflow_ioctl, + /* XXX .fasync = openflow_fasync, */ +}; - if (vendor) - strlcpy(mfr_desc, vendor, sizeof(mfr_desc)); - if (name || version) - snprintf(hw_desc, sizeof(hw_desc), "%s %s", - name ? name : "", - version ? version : ""); - if (serial) - strlcpy(serial_num, serial, sizeof(serial_num)); -} +static int major; static int __init dp_init(void) { int err; - printk("OpenFlow %s, built "__DATE__" "__TIME__", " - "protocol 0x%02x\n", VERSION BUILDNR, OFP_VERSION); + printk("OpenFlow %s, built "__DATE__" "__TIME__, VERSION BUILDNR); err = flow_init(); if (err) @@ -2086,20 +1497,10 @@ static int __init dp_init(void) if (err) goto error_flow_exit; - err = dp_init_netlink(); - if (err) + major = register_chrdev(0, "openflow", &openflow_fops); + if (err < 0) goto error_unreg_notifier; - dp_ioctl_hook = NULL; - dp_add_dp_hook = NULL; - dp_del_dp_hook = NULL; - dp_add_if_hook = NULL; - dp_del_if_hook = NULL; - - /* Check if better descriptions of the switch are available than the - * defaults. */ - set_desc(); - /* Hook into callback used by the bridge to intercept packets. * Parasites we are. */ if (br_handle_frame_hook) @@ -2113,14 +1514,12 @@ error_unreg_notifier: error_flow_exit: flow_exit(); error: - printk(KERN_EMERG "openflow: failed to install!"); return err; } static void dp_cleanup(void) { - fwd_exit(); - dp_uninit_netlink(); + unregister_chrdev(major, "openflow"); unregister_netdevice_notifier(&dp_device_notifier); flow_exit(); br_handle_frame_hook = NULL; @@ -2130,5 +1529,4 @@ module_init(dp_init); module_exit(dp_cleanup); MODULE_DESCRIPTION("OpenFlow switching datapath"); -MODULE_AUTHOR("Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"); MODULE_LICENSE("GPL"); diff --git a/datapath/datapath.h b/datapath/datapath.h index babf10e0..886296c2 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -3,77 +3,81 @@ #ifndef DATAPATH_H #define DATAPATH_H 1 +#include #include #include #include #include #include #include -#include "openflow/openflow.h" -#include "openflow/nicira-ext.h" #include "flow.h" +struct sk_buff; -#define NL_FLOWS_PER_MESSAGE 100 +#define DP_MAX_PORTS 256 +#define DP_MAX_GROUPS 16 -/* Capabilities supported by this implementation. */ -#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \ - | OFPC_TABLE_STATS \ - | OFPC_PORT_STATS \ - | OFPC_MULTI_PHY_TX ) +#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*))) +#define DP_L2_SIZE (1 << DP_L2_BITS) +#define DP_L2_SHIFT 0 -/* Actions supported by this implementation. */ -#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ - | (1 << OFPAT_SET_VLAN_VID) \ - | (1 << OFPAT_SET_VLAN_PCP) \ - | (1 << OFPAT_STRIP_VLAN) \ - | (1 << OFPAT_SET_DL_SRC) \ - | (1 << OFPAT_SET_DL_DST) \ - | (1 << OFPAT_SET_NW_SRC) \ - | (1 << OFPAT_SET_NW_DST) \ - | (1 << OFPAT_SET_TP_SRC) \ - | (1 << OFPAT_SET_TP_DST) ) +#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**))) +#define DP_L1_SIZE (1 << DP_L1_BITS) +#define DP_L1_SHIFT DP_L2_BITS -struct sk_buff; +#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) + +struct dp_table { + unsigned int n_buckets; + struct sw_flow ***flows[2]; +}; + +#define DP_N_QUEUES 2 +#define DP_MAX_QUEUE_LEN 100 -#define DP_MAX_PORTS 255 +struct dp_stats_percpu { + u64 n_frags; + u64 n_hit; + u64 n_missed; + u64 n_lost; +}; struct datapath { + struct mutex mutex; int dp_idx; - struct timer_list timer; /* Expiration timer. */ - struct sw_chain *chain; /* Forwarding rules. */ +#ifdef SUPPORT_SNAT struct task_struct *dp_task; /* Kernel thread for maintenance. */ +#endif - /* Data related to the "of" device of this datapath */ - struct net_device *netdev; + struct net_device *netdev; /* ofX network device. */ - /* Configuration set from controller */ - uint16_t flags; - uint16_t miss_send_len; + struct kobject ifobj; - /* Flag controlling whether Flow End messages are generated. */ - uint8_t send_flow_end; + int drop_frags; - struct kobject ifobj; + /* Queued data. */ + struct sk_buff_head queues[DP_N_QUEUES]; + wait_queue_head_t waitqueue; + + /* Flow table. */ + unsigned int n_flows; + struct dp_table *table; + + /* Port groups. */ + struct odp_port_group *groups[DP_MAX_GROUPS]; /* Switch ports. */ + unsigned int n_ports; struct net_bridge_port *ports[DP_MAX_PORTS]; - struct net_bridge_port *local_port; /* OFPP_LOCAL port. */ struct list_head port_list; /* All ports, including local_port. */ -}; -/* Information necessary to reply to the sender of an OpenFlow message. */ -struct sender { - uint32_t xid; /* OpenFlow transaction ID of request. */ - uint32_t pid; /* Netlink process ID of sending socket. */ - uint32_t seq; /* Netlink sequence ID of request. */ + /* Stats. */ + struct dp_stats_percpu *stats_percpu; }; struct net_bridge_port { - u16 port_no; - u32 config; /* Some subset of OFPPC_* flags. */ - u32 state; /* Some subset of OFPPS_* flags. */ + u16 port_no; spinlock_t lock; struct datapath *dp; struct net_device *dev; @@ -90,27 +94,27 @@ extern int (*dp_del_dp_hook)(struct datapath *dp); extern int (*dp_add_if_hook)(struct net_bridge_port *p); extern int (*dp_del_if_hook)(struct net_bridge_port *p); -int dp_del_switch_port(struct net_bridge_port *); -int dp_xmit_skb(struct sk_buff *skb); +/* Flow table. */ +struct dp_table *dp_table_create(unsigned int n_buckets); +void dp_table_destroy(struct dp_table *, int free_flows); +struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); +struct sw_flow **dp_table_lookup_for_insert(struct dp_table *table, + struct sw_flow *target); +int dp_table_delete(struct dp_table *, struct sw_flow *); +int dp_table_expand(struct datapath *); +int dp_table_flush(struct datapath *); +int dp_table_foreach(struct dp_table *table, + int (*callback)(struct sw_flow *flow, void *aux), + void *aux); + +void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *); +int dp_del_port(struct net_bridge_port *); int dp_output_port(struct datapath *, struct sk_buff *, int out_port, int ignore_no_fwd); -int dp_output_control(struct datapath *, struct sk_buff *, size_t, int); -void dp_set_origin(struct datapath *, uint16_t, struct sk_buff *); -int dp_send_features_reply(struct datapath *, const struct sender *); -int dp_send_config_reply(struct datapath *, const struct sender *); -int dp_send_port_status(struct net_bridge_port *p, uint8_t status); -int dp_send_flow_end(struct datapath *, struct sw_flow *, - enum nx_flow_end_reason); -int dp_send_error_msg(struct datapath *, const struct sender *, - uint16_t, uint16_t, const void *, size_t); -int dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm); -int dp_send_echo_reply(struct datapath *, const struct sender *, - const struct ofp_header *); -int dp_send_hello(struct datapath *, const struct sender *, - const struct ofp_header *); +int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg); +void dp_set_origin(struct datapath *, u16, struct sk_buff *); /* Should hold at least RCU read lock when calling */ -struct datapath *dp_get_by_idx(int dp_idx); -struct datapath *dp_get_by_name(const char *dp_name); +struct datapath *get_dp(int dp_idx); #endif /* datapath.h */ diff --git a/datapath/dp_act.c b/datapath/dp_act.c deleted file mode 100644 index f2964222..00000000 --- a/datapath/dp_act.c +++ /dev/null @@ -1,515 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland - * Stanford Junior University - */ - -/* Functions for executing OpenFlow actions. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "forward.h" -#include "dp_act.h" -#include "openflow/nicira-ext.h" -#include "nx_act.h" - - -static uint16_t -validate_output(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_output *oa = (struct ofp_action_output *)ah; - - if (oa->port == htons(OFPP_NONE) || - (!(key->wildcards & OFPFW_IN_PORT) && oa->port == key->in_port)) - return OFPBAC_BAD_OUT_PORT; - - return ACT_VALIDATION_OK; -} - -static int -do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len, - int out_port, int ignore_no_fwd) -{ - if (!skb) - return -ENOMEM; - return (likely(out_port != OFPP_CONTROLLER) - ? dp_output_port(dp, skb, out_port, ignore_no_fwd) - : dp_output_control(dp, skb, max_len, OFPR_ACTION)); -} - - -static struct sk_buff * -vlan_pull_tag(struct sk_buff *skb) -{ - struct vlan_ethhdr *vh = vlan_eth_hdr(skb); - struct ethhdr *eh; - - - /* Verify we were given a vlan packet */ - if (vh->h_vlan_proto != htons(ETH_P_8021Q)) - return skb; - - memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); - - eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); - - skb->protocol = eh->h_proto; - skb->mac_header += VLAN_HLEN; - - return skb; -} - - -static struct sk_buff * -modify_vlan_tci(struct sk_buff *skb, struct sw_flow_key *key, - uint16_t tci, uint16_t mask) -{ - struct vlan_ethhdr *vh = vlan_eth_hdr(skb); - - if (key->dl_vlan != htons(OFP_VLAN_NONE)) { - /* Modify vlan id, but maintain other TCI values */ - vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci); - } else { - /* Add vlan header */ - - /* xxx The vlan_put_tag function, doesn't seem to work - * xxx reliably when it attempts to use the hardware-accelerated - * xxx version. We'll directly use the software version - * xxx until the problem can be diagnosed. - */ - skb = __vlan_put_tag(skb, tci); - vh = vlan_eth_hdr(skb); - } - key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK); - - return skb; -} - -static struct sk_buff * -set_vlan_vid(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah; - uint16_t tci = ntohs(va->vlan_vid); - - return modify_vlan_tci(skb, key, tci, VLAN_VID_MASK); -} - -/* Mask for the priority bits in a vlan header. The kernel doesn't - * define this like it does for VID. */ -#define VLAN_PCP_MASK 0xe000 - -static struct sk_buff * -set_vlan_pcp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah; - uint16_t tci = (uint16_t)va->vlan_pcp << 13; - - return modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK); -} - -static struct sk_buff * -strip_vlan(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - vlan_pull_tag(skb); - key->dl_vlan = htons(OFP_VLAN_NONE); - - return skb; -} - -static struct sk_buff * -set_dl_addr(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah; - struct ethhdr *eh = eth_hdr(skb); - - if (da->type == htons(OFPAT_SET_DL_SRC)) - memcpy(eh->h_source, da->dl_addr, sizeof eh->h_source); - else - memcpy(eh->h_dest, da->dl_addr, sizeof eh->h_dest); - - return skb; -} - -/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field - * covered by the sum has been changed from 'from' to 'to'. If set, - * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. - * Based on nf_proto_csum_replace4. */ -static void update_csum(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr) -{ - __be32 diff[] = { ~from, to }; - if (skb->ip_summed != CHECKSUM_PARTIAL) { - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), - ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial((char *)diff, sizeof(diff), - ~skb->csum); - } else if (pseudohdr) - *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), - csum_unfold(*sum))); -} - -static struct sk_buff * -set_nw_addr(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah; - uint16_t eth_proto = ntohs(key->dl_type); - - if (eth_proto == ETH_P_IP) { - struct iphdr *nh = ip_hdr(skb); - uint32_t new, *field; - - new = na->nw_addr; - - if (ah->type == htons(OFPAT_SET_NW_SRC)) - field = &nh->saddr; - else - field = &nh->daddr; - - if (key->nw_proto == IPPROTO_TCP) { - struct tcphdr *th = tcp_hdr(skb); - update_csum(&th->check, skb, *field, new, 1); - } else if (key->nw_proto == IPPROTO_UDP) { - struct udphdr *th = udp_hdr(skb); - update_csum(&th->check, skb, *field, new, 1); - } - update_csum(&nh->check, skb, *field, new, 0); - *field = new; - } - - return skb; -} - -static struct sk_buff * -set_tp_port(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah; - uint16_t eth_proto = ntohs(key->dl_type); - - if (eth_proto == ETH_P_IP) { - uint16_t new, *field; - - new = ta->tp_port; - - if (key->nw_proto == IPPROTO_TCP) { - struct tcphdr *th = tcp_hdr(skb); - - if (ah->type == htons(OFPAT_SET_TP_SRC)) - field = &th->source; - else - field = &th->dest; - - update_csum(&th->check, skb, *field, new, 1); - *field = new; - } else if (key->nw_proto == IPPROTO_UDP) { - struct udphdr *th = udp_hdr(skb); - - if (ah->type == htons(OFPAT_SET_TP_SRC)) - field = &th->source; - else - field = &th->dest; - - update_csum(&th->check, skb, *field, new, 1); - *field = new; - } - } - - return skb; -} - -struct openflow_action { - size_t min_size; - size_t max_size; - uint16_t (*validate)(struct datapath *dp, - const struct sw_flow_key *key, - const struct ofp_action_header *ah); - struct sk_buff *(*execute)(struct sk_buff *skb, - struct sw_flow_key *key, - const struct ofp_action_header *ah); -}; - -static const struct openflow_action of_actions[] = { - [OFPAT_OUTPUT] = { - sizeof(struct ofp_action_output), - sizeof(struct ofp_action_output), - validate_output, - NULL /* This is optimized into execute_actions */ - }, - [OFPAT_SET_VLAN_VID] = { - sizeof(struct ofp_action_vlan_vid), - sizeof(struct ofp_action_vlan_vid), - NULL, - set_vlan_vid - }, - [OFPAT_SET_VLAN_PCP] = { - sizeof(struct ofp_action_vlan_pcp), - sizeof(struct ofp_action_vlan_pcp), - NULL, - set_vlan_pcp - }, - [OFPAT_STRIP_VLAN] = { - sizeof(struct ofp_action_header), - sizeof(struct ofp_action_header), - NULL, - strip_vlan - }, - [OFPAT_SET_DL_SRC] = { - sizeof(struct ofp_action_dl_addr), - sizeof(struct ofp_action_dl_addr), - NULL, - set_dl_addr - }, - [OFPAT_SET_DL_DST] = { - sizeof(struct ofp_action_dl_addr), - sizeof(struct ofp_action_dl_addr), - NULL, - set_dl_addr - }, - [OFPAT_SET_NW_SRC] = { - sizeof(struct ofp_action_nw_addr), - sizeof(struct ofp_action_nw_addr), - NULL, - set_nw_addr - }, - [OFPAT_SET_NW_DST] = { - sizeof(struct ofp_action_nw_addr), - sizeof(struct ofp_action_nw_addr), - NULL, - set_nw_addr - }, - [OFPAT_SET_TP_SRC] = { - sizeof(struct ofp_action_tp_port), - sizeof(struct ofp_action_tp_port), - NULL, - set_tp_port - }, - [OFPAT_SET_TP_DST] = { - sizeof(struct ofp_action_tp_port), - sizeof(struct ofp_action_tp_port), - NULL, - set_tp_port - } - /* OFPAT_VENDOR is not here, since it would blow up the array size. */ -}; - -/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK - * or an OFPET_BAD_ACTION error code. */ -static uint16_t -validate_ofpat(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t type, uint16_t len) -{ - uint16_t ret = ACT_VALIDATION_OK; - const struct openflow_action *act = &of_actions[type]; - - if ((len < act->min_size) || (len > act->max_size)) - return OFPBAC_BAD_LEN; - - if (act->validate) - ret = act->validate(dp, key, ah); - - return ret; -} - -/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK - * or an OFPET_BAD_ACTION error code. */ -static uint16_t -validate_vendor(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t len) -{ - struct ofp_action_vendor_header *avh; - int ret = ACT_VALIDATION_OK; - - if (len < sizeof(struct ofp_action_vendor_header)) - return OFPBAC_BAD_LEN; - - avh = (struct ofp_action_vendor_header *)ah; - - switch(ntohl(avh->vendor)) { - case NX_VENDOR_ID: - ret = nx_validate_act(dp, key, (struct nx_action_header *)avh, len); - break; - - default: - return OFPBAC_BAD_VENDOR; - } - - return ret; -} - -/* Validates a list of actions. If a problem is found, a code for the - * OFPET_BAD_ACTION error type is returned. If the action list validates, - * ACT_VALIDATION_OK is returned. */ -uint16_t -validate_actions(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *actions, size_t actions_len) -{ - uint8_t *p = (uint8_t *)actions; - int err; - - while (actions_len >= sizeof(struct ofp_action_header)) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = ntohs(ah->len); - uint16_t type; - - /* Make there's enough remaining data for the specified length - * and that the action length is a multiple of 64 bits. */ - if (!len || (actions_len < len) || (len % 8) != 0) - return OFPBAC_BAD_LEN; - - type = ntohs(ah->type); - if (type < ARRAY_SIZE(of_actions)) { - err = validate_ofpat(dp, key, ah, type, len); - if (err != ACT_VALIDATION_OK) - return err; - } else if (type == OFPAT_VENDOR) { - err = validate_vendor(dp, key, ah, len); - if (err != ACT_VALIDATION_OK) - return err; - } else - return OFPBAC_BAD_TYPE; - - p += len; - actions_len -= len; - } - - /* Check if there's any trailing garbage. */ - if (actions_len != 0) - return OFPBAC_BAD_LEN; - - return ACT_VALIDATION_OK; -} - -/* Execute a built-in OpenFlow action against 'skb'. */ -static struct sk_buff * -execute_ofpat(struct sk_buff *skb, struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t type) -{ - const struct openflow_action *act = &of_actions[type]; - if (act->execute && make_writable(&skb)) - skb = act->execute(skb, key, ah); - return skb; -} - -/* Execute a vendor-defined action against 'skb'. */ -static struct sk_buff * -execute_vendor(struct sk_buff *skb, const struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vendor_header *avh - = (struct ofp_action_vendor_header *)ah; - struct datapath *dp = skb->dev->br_port->dp; - - /* NB: If changes need to be made to the packet, a call should be - * made to make_writable or its equivalent first. */ - - switch(ntohl(avh->vendor)) { - case NX_VENDOR_ID: - skb = nx_execute_act(skb, key, (struct nx_action_header *)avh); - break; - - default: - /* This should not be possible due to prior validation. */ - if (net_ratelimit()) - printk(KERN_WARNING "%s: attempt to execute action " - "with unknown vendor: %#x\n", - dp->netdev->name, ntohl(avh->vendor)); - break; - } - - return skb; -} - -/* Execute a list of actions against 'skb'. */ -void execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, - const struct ofp_action_header *actions, size_t actions_len, - int ignore_no_fwd) -{ - /* Every output action needs a separate clone of 'skb', but the common - * case is just a single output action, so that doing a clone and - * then freeing the original skbuff is wasteful. So the following code - * is slightly obscure just to avoid that. */ - int prev_port; - size_t max_len=0; /* Initialze to make compiler happy */ - uint8_t *p = (uint8_t *)actions; - - prev_port = -1; - - /* The action list was already validated, so we can be a bit looser - * in our sanity-checking. */ - while (actions_len > 0) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = htons(ah->len); - - WARN_ON_ONCE(skb_shared(skb)); - if (prev_port != -1) { - do_output(dp, skb_clone(skb, GFP_ATOMIC), - max_len, prev_port, ignore_no_fwd); - prev_port = -1; - } - - if (likely(ah->type == htons(OFPAT_OUTPUT))) { - struct ofp_action_output *oa = (struct ofp_action_output *)p; - prev_port = ntohs(oa->port); - max_len = ntohs(oa->max_len); - } else { - uint16_t type = ntohs(ah->type); - - if (type < ARRAY_SIZE(of_actions)) - skb = execute_ofpat(skb, key, ah, type); - else if (type == OFPAT_VENDOR) - skb = execute_vendor(skb, key, ah); - - if (!skb) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: " - "execute_actions lost skb\n", - dp->netdev->name); - return; - } - } - - p += len; - actions_len -= len; - } - if (prev_port != -1) - do_output(dp, skb, max_len, prev_port, ignore_no_fwd); - else - kfree_skb(skb); -} - -/* Utility functions. */ - -/* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to - * the copy. - * Returns 1 if successful, 0 on failure. */ -int -make_writable(struct sk_buff **pskb) -{ - struct sk_buff *skb = *pskb; - if (skb_shared(skb) || skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - if (!nskb) - return 0; - kfree_skb(skb); - *pskb = nskb; - return 1; - } else { - unsigned int hdr_len = (skb_transport_offset(skb) - + sizeof(struct tcphdr)); - return pskb_may_pull(skb, min(hdr_len, skb->len)); - } -} diff --git a/datapath/dp_act.h b/datapath/dp_act.h deleted file mode 100644 index d601eca0..00000000 --- a/datapath/dp_act.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef DP_ACT_H -#define DP_ACT_H 1 - -#include "datapath.h" - -#define ACT_VALIDATION_OK ((uint16_t)-1) - -uint16_t validate_actions(struct datapath *, const struct sw_flow_key *, - const struct ofp_action_header *, size_t); -void execute_actions(struct datapath *, struct sk_buff *, - struct sw_flow_key *, const struct ofp_action_header *, - size_t action_len, int ignore_no_fwd); -int make_writable(struct sk_buff **pskb); - -#endif /* dp_act.h */ diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index 58a24698..d522347f 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -8,7 +8,6 @@ #include "datapath.h" #include "dp_dev.h" -#include "forward.h" static struct dp_dev *dp_dev_priv(struct net_device *netdev) @@ -104,7 +103,7 @@ static void dp_dev_do_xmit(struct work_struct *work) while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) { skb_reset_mac_header(skb); rcu_read_lock(); - fwd_port_input(dp->chain, skb, dp->local_port); + dp_process_received_packet(skb, dp->ports[ODPP_LOCAL]); rcu_read_unlock(); } netif_wake_queue(dp->netdev); @@ -122,12 +121,11 @@ static int dp_dev_stop(struct net_device *netdev) return 0; } -static void dp_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) +static void dp_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { + struct dp_dev *dp_dev = dp_dev_priv(netdev); strcpy(info->driver, "openflow"); - sprintf(info->version, "0x%d", OFP_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "N/A"); + sprintf(info->bus_info, "%d", dp_dev->dp->dp_idx); } static struct ethtool_ops dp_ethtool_ops = { diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index 54c88402..425c0146 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -16,25 +16,12 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, { struct net_device *dev = ptr; struct net_bridge_port *p = dev->br_port; - unsigned long int flags; - - - /* Check if monitored port */ - if (!p) - return NOTIFY_DONE; - - spin_lock_irqsave(&p->lock, flags); - switch (event) { - case NETDEV_UNREGISTER: - spin_unlock_irqrestore(&p->lock, flags); - mutex_lock(&dp_mutex); - dp_del_switch_port(p); - mutex_unlock(&dp_mutex); - return NOTIFY_DONE; - break; + if (event == NETDEV_UNREGISTER && p) { + struct datapath *dp = p->dp; + mutex_lock(&dp->mutex); + dp_del_port(p); + mutex_unlock(&dp->mutex); } - spin_unlock_irqrestore(&p->lock, flags); - return NOTIFY_DONE; } diff --git a/datapath/flow.c b/datapath/flow.c index 8be44bd1..024b61cd 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008 The Board of Trustees of The Leland + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland * Stanford Junior University */ @@ -10,211 +10,118 @@ #include #include #include -#include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include -#include "openflow/openflow.h" -#include "openflow/nicira-ext.h" #include "compat.h" struct kmem_cache *flow_cache; -/* Internal function used to compare fields in flow. */ -static inline -int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b, - uint32_t w, uint32_t src_mask, uint32_t dst_mask) +static inline int iphdr_ok(struct sk_buff *skb) { - return ((w & OFPFW_IN_PORT || a->in_port == b->in_port) - && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan) - && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN)) - && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN)) - && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type) - && !((a->nw_src ^ b->nw_src) & src_mask) - && !((a->nw_dst ^ b->nw_dst) & dst_mask) - && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto) - && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src) - && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst)); + int nh_ofs = skb_network_offset(skb); + if (skb->len >= nh_ofs + sizeof(struct iphdr)) { + int ip_len = ip_hdrlen(skb); + return (ip_len >= sizeof(struct iphdr) + && pskb_may_pull(skb, nh_ofs + ip_len)); + } + return 0; } -/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal - * modulo wildcards in 'b', zero otherwise. */ -int flow_matches_1wild(const struct sw_flow_key *a, - const struct sw_flow_key *b) +static inline int tcphdr_ok(struct sk_buff *skb) { - return flow_fields_match(a, b, b->wildcards, - b->nw_src_mask, b->nw_dst_mask); + int th_ofs = skb_transport_offset(skb); + if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) { + int tcp_len = tcp_hdrlen(skb); + return (tcp_len >= sizeof(struct tcphdr) + && skb->len >= th_ofs + tcp_len); + } + return 0; } -EXPORT_SYMBOL(flow_matches_1wild); -/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal - * modulo wildcards in 'a' or 'b', zero otherwise. */ -int flow_matches_2wild(const struct sw_flow_key *a, - const struct sw_flow_key *b) +static inline int udphdr_ok(struct sk_buff *skb) { - return flow_fields_match(a, b, - a->wildcards | b->wildcards, - a->nw_src_mask & b->nw_src_mask, - a->nw_dst_mask & b->nw_dst_mask); + int th_ofs = skb_transport_offset(skb); + return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr)); } -EXPORT_SYMBOL(flow_matches_2wild); - -/* Returns nonzero if 't' (the table entry's key) and 'd' (the key - * describing the match) match, that is, if their fields are - * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the - * wildcards must match in both 't_key' and 'd_key'. Note that the - * table's wildcards are ignored unless 'strict' is set. */ -int flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d, - int strict) + +static inline int icmphdr_ok(struct sk_buff *skb) { - if (strict && d->wildcards != t->wildcards) - return 0; - return flow_matches_1wild(t, d); + int th_ofs = skb_transport_offset(skb); + return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr)); } -EXPORT_SYMBOL(flow_matches_desc); -static uint32_t make_nw_mask(int n_wild_bits) +#define TCP_FLAGS_OFFSET 13 +#define TCP_FLAG_MASK 0x3f + +static inline struct ofp_tcphdr *ofp_tcp_hdr(const struct sk_buff *skb) { - n_wild_bits &= (1u << OFPFW_NW_SRC_BITS) - 1; - return n_wild_bits < 32 ? htonl(~((1u << n_wild_bits) - 1)) : 0; + return (struct ofp_tcphdr *)skb_transport_header(skb); } -void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) +void flow_used(struct sw_flow *flow, struct sk_buff *skb) { - to->wildcards = ntohl(from->wildcards) & OFPFW_ALL; - to->pad = 0; - to->in_port = from->in_port; - to->dl_vlan = from->dl_vlan; - memcpy(to->dl_src, from->dl_src, ETH_ALEN); - memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); - to->dl_type = from->dl_type; - - to->nw_src = to->nw_dst = to->nw_proto = 0; - to->tp_src = to->tp_dst = 0; - -#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST) -#define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO) - if (to->wildcards & OFPFW_DL_TYPE) { - /* Can't sensibly match on network or transport headers if the - * data link type is unknown. */ - to->wildcards |= OFPFW_NW | OFPFW_TP; - } else if (from->dl_type == htons(ETH_P_IP)) { - to->nw_src = from->nw_src; - to->nw_dst = from->nw_dst; - to->nw_proto = from->nw_proto; - - if (to->wildcards & OFPFW_NW_PROTO) { - /* Can't sensibly match on transport headers if the - * network protocol is unknown. */ - to->wildcards |= OFPFW_TP; - } else if (from->nw_proto == IPPROTO_TCP - || from->nw_proto == IPPROTO_UDP - || from->nw_proto == IPPROTO_ICMP) { - to->tp_src = from->tp_src; - to->tp_dst = from->tp_dst; - } else { - /* Transport layer fields are undefined. Mark them as - * exact-match to allow such flows to reside in - * table-hash, instead of falling into table-linear. */ - to->wildcards &= ~OFPFW_TP; + unsigned long flags; + u8 tcp_flags = 0; + + if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { + struct iphdr *nh = ip_hdr(skb); + flow->ip_tos = nh->tos; + if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) { + u8 *tcp = (u8 *)tcp_hdr(skb); + tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; } - } else { - /* Network and transport layer fields are undefined. Mark them - * as exact-match to allow such flows to reside in table-hash, - * instead of falling into table-linear. */ - to->wildcards &= ~(OFPFW_NW | OFPFW_TP); } - /* We set these late because code above adjusts to->wildcards. */ - to->nw_src_mask = make_nw_mask(to->wildcards >> OFPFW_NW_SRC_SHIFT); - to->nw_dst_mask = make_nw_mask(to->wildcards >> OFPFW_NW_DST_SHIFT); -} - -void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from) -{ - to->wildcards = htonl(from->wildcards); - to->in_port = from->in_port; - to->dl_vlan = from->dl_vlan; - memcpy(to->dl_src, from->dl_src, ETH_ALEN); - memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); - to->dl_type = from->dl_type; - to->nw_src = from->nw_src; - to->nw_dst = from->nw_dst; - to->nw_proto = from->nw_proto; - to->tp_src = from->tp_src; - to->tp_dst = from->tp_dst; - to->pad = 0; + spin_lock_irqsave(&flow->lock, flags); + getnstimeofday(&flow->used); + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock_irqrestore(&flow->lock, flags); } -int flow_timeout(struct sw_flow *flow) +struct sw_flow_actions *flow_actions_alloc(size_t n_actions) { - if (flow->idle_timeout != OFP_FLOW_PERMANENT - && time_after64(get_jiffies_64(), flow->used + flow->idle_timeout * HZ)) - return NXFER_IDLE_TIMEOUT; - else if (flow->hard_timeout != OFP_FLOW_PERMANENT - && time_after64(get_jiffies_64(), - flow->created + flow->hard_timeout * HZ)) - return NXFER_HARD_TIMEOUT; - else - return -1; -} -EXPORT_SYMBOL(flow_timeout); - -/* Returns nonzero if 'flow' contains an output action to 'out_port' or - * has the value OFPP_NONE. 'out_port' is in network-byte order. */ -int flow_has_out_port(struct sw_flow *flow, uint16_t out_port) -{ - struct sw_flow_actions *sf_acts; - size_t actions_len; - uint8_t *p; - - if (out_port == htons(OFPP_NONE)) - return 1; - - sf_acts = rcu_dereference(flow->sf_acts); - - actions_len = sf_acts->actions_len; - p = (uint8_t *)sf_acts->actions; - - while (actions_len > 0) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = ntohs(ah->len); - - if (ah->type == htons(OFPAT_OUTPUT)) { - struct ofp_action_output *oa = (struct ofp_action_output *)p; - if (oa->port == out_port) - return 1; - } + struct sw_flow_actions *sfa; - p += len; - actions_len -= len; - } + if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) + return NULL; - return 0; + sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), + GFP_KERNEL); + if (sfa) + sfa->n_actions = n_actions; + return sfa; } -EXPORT_SYMBOL(flow_has_out_port); -/* Allocates and returns a new flow with room for 'actions_len' actions, - * using allocation flags 'flags'. Returns the new flow or a null pointer - * on failure. */ -struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags) + +/* Allocates and returns a new flow with room for 'n_actions' actions. Returns + * the new flow or a null pointer on failure. */ +struct sw_flow *flow_alloc(size_t n_actions) { struct sw_flow_actions *sfa; - size_t size = sizeof *sfa + actions_len; - struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags); - if (unlikely(!flow)) - return NULL; + struct sw_flow *flow; - sfa = kmalloc(size, flags); - if (unlikely(!sfa)) { - kmem_cache_free(flow_cache, flow); + sfa = flow_actions_alloc(n_actions); + if (!sfa) return NULL; - } - sfa->actions_len = actions_len; - flow->sf_acts = sfa; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (flow) + rcu_assign_pointer(flow->sf_acts, sfa); + else + kfree(sfa); return flow; } @@ -260,63 +167,16 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) } EXPORT_SYMBOL(flow_deferred_free_acts); -/* Copies 'actions' into a newly allocated structure for use by 'flow' - * and safely frees the structure that defined the previous actions. */ -void flow_replace_acts(struct sw_flow *flow, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_flow_actions *sfa; - struct sw_flow_actions *orig_sfa = flow->sf_acts; - size_t size = sizeof *sfa + actions_len; - - sfa = kmalloc(size, GFP_ATOMIC); - if (unlikely(!sfa)) - return; - - sfa->actions_len = actions_len; - memcpy(sfa->actions, actions, actions_len); - - rcu_assign_pointer(flow->sf_acts, sfa); - flow_deferred_free_acts(orig_sfa); - - return; -} -EXPORT_SYMBOL(flow_replace_acts); - -/* Prints a representation of 'key' to the kernel log. */ -void print_flow(const struct sw_flow_key *key) -{ - printk("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x" - "->%02x:%02x:%02x:%02x:%02x:%02x " - "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n", - key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan), - key->dl_src[0], key->dl_src[1], key->dl_src[2], - key->dl_src[3], key->dl_src[4], key->dl_src[5], - key->dl_dst[0], key->dl_dst[1], key->dl_dst[2], - key->dl_dst[3], key->dl_dst[4], key->dl_dst[5], - ntohs(key->dl_type), - ((unsigned char *)&key->nw_src)[0], - ((unsigned char *)&key->nw_src)[1], - ((unsigned char *)&key->nw_src)[2], - ((unsigned char *)&key->nw_src)[3], - ((unsigned char *)&key->nw_dst)[0], - ((unsigned char *)&key->nw_dst)[1], - ((unsigned char *)&key->nw_dst)[2], - ((unsigned char *)&key->nw_dst)[3], - ntohs(key->tp_src), ntohs(key->tp_dst)); -} -EXPORT_SYMBOL(print_flow); - #define SNAP_OUI_LEN 3 struct eth_snap_hdr { struct ethhdr eth; - uint8_t dsap; /* Always 0xAA */ - uint8_t ssap; /* Always 0xAA */ - uint8_t ctrl; - uint8_t oui[SNAP_OUI_LEN]; - uint16_t ethertype; + u8 dsap; /* Always 0xAA */ + u8 ssap; /* Always 0xAA */ + u8 ctrl; + u8 oui[SNAP_OUI_LEN]; + u16 ethertype; } __attribute__ ((packed)); static int is_snap(const struct eth_snap_hdr *esh) @@ -329,8 +189,7 @@ static int is_snap(const struct eth_snap_hdr *esh) /* Parses the Ethernet frame in 'skb', which was received on 'in_port', * and initializes 'key' to match. Returns 1 if 'skb' contains an IP * fragment, 0 otherwise. */ -int flow_extract(struct sk_buff *skb, uint16_t in_port, - struct sw_flow_key *key) +int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) { struct ethhdr *eth; struct eth_snap_hdr *esh; @@ -338,8 +197,8 @@ int flow_extract(struct sk_buff *skb, uint16_t in_port, int nh_ofs; memset(key, 0, sizeof *key); - key->dl_vlan = htons(OFP_VLAN_NONE); - key->in_port = htons(in_port); + key->dl_vlan = htons(ODP_VLAN_NONE); + key->in_port = in_port; if (skb->len < sizeof *eth) return 0; @@ -351,13 +210,13 @@ int flow_extract(struct sk_buff *skb, uint16_t in_port, eth = eth_hdr(skb); esh = (struct eth_snap_hdr *) eth; nh_ofs = sizeof *eth; - if (likely(ntohs(eth->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF)) + if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF)) key->dl_type = eth->h_proto; else if (skb->len >= sizeof *esh && is_snap(esh)) { key->dl_type = esh->ethertype; nh_ofs = sizeof *esh; } else { - key->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE); + key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE); if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) { nh_ofs += sizeof(struct llc_pdu_un); } @@ -414,8 +273,8 @@ int flow_extract(struct sk_buff *skb, uint16_t in_port, /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store them * in 16-bit network byte order. */ - key->icmp_type = htons(icmp->type); - key->icmp_code = htons(icmp->code); + key->tp_src = htons(icmp->type); + key->tp_dst = htons(icmp->code); } else { /* Avoid tricking other code into * thinking that this packet has an L4 @@ -450,3 +309,15 @@ void flow_exit(void) kmem_cache_destroy(flow_cache); } +void print_flow(const struct odp_flow_key *key) +{ +#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" +#define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5] + printk("port%04x:vlan%d mac"MAC_FMT"->"MAC_FMT" " + "type%04x proto%d ip%x->%x port%d->%d\n", + key->in_port, ntohs(key->dl_vlan), + MAC_ARG(key->dl_src), MAC_ARG(key->dl_dst), + ntohs(key->dl_type), key->nw_proto, + key->nw_src, key->nw_dst, + ntohs(key->tp_src), ntohs(key->tp_dst)); +} diff --git a/datapath/flow.h b/datapath/flow.h index df7d543e..f44774fe 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -3,197 +3,44 @@ #include #include -#include #include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include "openflow/openflow.h" +#include "openflow/datapath-protocol.h" struct sk_buff; -struct ofp_flow_mod; -/* Identification data for a flow. - * Network byte order except for the "wildcards" field. - * Ordered to make bytewise comparisons (e.g. with memcmp()) fail quickly and - * to keep the amount of padding to a minimum. - * If you change the ordering of fields here, change flow_keys_equal() to - * compare the proper fields. - */ -struct sw_flow_key { - uint32_t nw_src; /* IP source address. */ - uint32_t nw_dst; /* IP destination address. */ - uint16_t in_port; /* Input switch port */ - uint16_t dl_vlan; /* Input VLAN. */ - uint16_t dl_type; /* Ethernet frame type. */ - uint16_t tp_src; /* TCP/UDP source port. */ - uint16_t tp_dst; /* TCP/UDP destination port. */ - uint8_t dl_src[ETH_ALEN]; /* Ethernet source address. */ - uint8_t dl_dst[ETH_ALEN]; /* Ethernet destination address. */ - uint8_t nw_proto; /* IP protocol. */ - uint8_t pad; /* Pad to 32-bit alignment. */ - uint32_t wildcards; /* Wildcard fields (host byte order). */ - uint32_t nw_src_mask; /* 1-bit in each significant nw_src bit. */ - uint32_t nw_dst_mask; /* 1-bit in each significant nw_dst bit. */ -}; - -/* The match fields for ICMP type and code use the transport source and - * destination port fields, respectively. */ -#define icmp_type tp_src -#define icmp_code tp_dst - -/* Compare two sw_flow_keys and return true if they are the same flow, false - * otherwise. Wildcards and netmasks are not considered. */ -static inline int flow_keys_equal(const struct sw_flow_key *a, - const struct sw_flow_key *b) -{ - return !memcmp(a, b, offsetof(struct sw_flow_key, wildcards)); -} - -/* We need to manually make sure that the structure is 32-bit aligned, - * since we don't want garbage values in compiler-generated pads from - * messing up hash matches. - */ -static inline void check_key_align(void) -{ - BUILD_BUG_ON(sizeof(struct sw_flow_key) != 44); -} - -/* We keep actions as a separate structure because we need to be able to - * swap them out atomically when the modify command comes from a Flow - * Modify message. */ struct sw_flow_actions { - size_t actions_len; struct rcu_head rcu; - - struct ofp_action_header actions[0]; + unsigned int n_actions; + union odp_action actions[]; }; -/* Locking: - * - * - Readers must take rcu_read_lock and hold it the entire time that the flow - * must continue to exist. - * - * - Writers must hold dp_mutex. - */ struct sw_flow { - struct sw_flow_key key; - - uint16_t priority; /* Only used on entries with wildcards. */ - uint16_t idle_timeout; /* Idle time before discarding (seconds). */ - uint16_t hard_timeout; /* Hard expiration time (seconds) */ - uint64_t used; /* Last used time (in jiffies). */ - + struct rcu_head rcu; + struct odp_flow_key key; struct sw_flow_actions *sf_acts; - /* For use by table implementation. */ - struct list_head node; - struct list_head iter_node; - unsigned long serial; - void *private; + struct timespec used; /* Last used time. */ - spinlock_t lock; /* Lock this entry...mostly for stat updates */ - uint64_t created; /* When the flow was created (in jiffies_64). */ - uint64_t packet_count; /* Number of packets associated with this entry */ - uint64_t byte_count; /* Number of bytes associated with this entry */ + u8 ip_tos; /* IP TOS value. */ - uint8_t tcp_flags; /* Union of seen TCP flags. */ - uint8_t ip_tos; /* IP TOS value. */ - - struct rcu_head rcu; + spinlock_t lock; /* Lock for values below. */ + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + u8 tcp_flags; /* Union of seen TCP flags. */ }; -int flow_matches_1wild(const struct sw_flow_key *, const struct sw_flow_key *); -int flow_matches_2wild(const struct sw_flow_key *, const struct sw_flow_key *); -int flow_matches_desc(const struct sw_flow_key *, const struct sw_flow_key *, - int); -int flow_has_out_port(struct sw_flow *, uint16_t); -struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags); +struct sw_flow_actions *flow_actions_alloc(size_t n_actions); +struct sw_flow *flow_alloc(size_t n_actions); void flow_free(struct sw_flow *); void flow_deferred_free(struct sw_flow *); void flow_deferred_free_acts(struct sw_flow_actions *); -void flow_replace_acts(struct sw_flow *, const struct ofp_action_header *, - size_t); -int flow_extract(struct sk_buff *, uint16_t in_port, struct sw_flow_key *); -void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from); -void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from); -int flow_timeout(struct sw_flow *); - -void print_flow(const struct sw_flow_key *); - -static inline int iphdr_ok(struct sk_buff *skb) -{ - int nh_ofs = skb_network_offset(skb); - if (skb->len >= nh_ofs + sizeof(struct iphdr)) { - int ip_len = ip_hdrlen(skb); - return (ip_len >= sizeof(struct iphdr) - && pskb_may_pull(skb, nh_ofs + ip_len)); - } - return 0; -} - -static inline int tcphdr_ok(struct sk_buff *skb) -{ - int th_ofs = skb_transport_offset(skb); - if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) { - int tcp_len = tcp_hdrlen(skb); - return (tcp_len >= sizeof(struct tcphdr) - && skb->len >= th_ofs + tcp_len); - } - return 0; -} - -static inline int udphdr_ok(struct sk_buff *skb) -{ - int th_ofs = skb_transport_offset(skb); - return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr)); -} - -static inline int icmphdr_ok(struct sk_buff *skb) -{ - int th_ofs = skb_transport_offset(skb); - return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr)); -} - -#define TCP_FLAGS_OFFSET 13 -#define TCP_FLAG_MASK 0x3f - -static inline struct ofp_tcphdr *ofp_tcp_hdr(const struct sk_buff *skb) -{ - return (struct ofp_tcphdr *)skb_transport_header(skb); -} - -static inline void flow_used(struct sw_flow *flow, struct sk_buff *skb) -{ - unsigned long flags; - - flow->used = get_jiffies_64(); - - spin_lock_irqsave(&flow->lock, flags); - if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { - struct iphdr *nh = ip_hdr(skb); - flow->ip_tos = nh->tos; - - if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) { - uint8_t *tcp = (uint8_t *)tcp_hdr(skb); - flow->tcp_flags |= *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; - } - } - - flow->packet_count++; - flow->byte_count += skb->len; - spin_unlock_irqrestore(&flow->lock, flags); -} +int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); +void flow_used(struct sw_flow *, struct sk_buff *); -extern struct kmem_cache *flow_cache; +void print_flow(const struct odp_flow_key *); int flow_init(void); void flow_exit(void); diff --git a/datapath/forward.c b/datapath/forward.c deleted file mode 100644 index 14b88972..00000000 --- a/datapath/forward.c +++ /dev/null @@ -1,571 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland - * Stanford Junior University - */ - -#include -#include -#include -#include -#include -#include -#include "forward.h" -#include "datapath.h" -#include "openflow/nicira-ext.h" -#include "dp_act.h" -#include "nx_msg.h" -#include "chain.h" -#include "flow.h" - -/* FIXME: do we need to use GFP_ATOMIC everywhere here? */ - - -static struct sk_buff *retrieve_skb(uint32_t id); -static void discard_skb(uint32_t id); - -/* 'skb' was received on port 'p', which may be a physical switch port, the - * local port, or a null pointer. Process it according to 'chain'. Returns 0 - * if successful, in which case 'skb' is destroyed, or -ESRCH if there is no - * matching flow, in which case 'skb' still belongs to the caller. */ -int run_flow_through_tables(struct sw_chain *chain, struct sk_buff *skb, - struct net_bridge_port *p) -{ - /* Ethernet address used as the destination for STP frames. */ - static const uint8_t stp_eth_addr[ETH_ALEN] - = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 }; - struct sw_flow_key key; - struct sw_flow *flow; - - if (flow_extract(skb, p ? p->port_no : OFPP_NONE, &key) - && (chain->dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) { - /* Drop fragment. */ - kfree_skb(skb); - return 0; - } - if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && - p->config & (compare_ether_addr(key.dl_dst, stp_eth_addr) - ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) { - kfree_skb(skb); - return 0; - } - - flow = chain_lookup(chain, &key); - if (likely(flow != NULL)) { - struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts); - flow_used(flow, skb); - execute_actions(chain->dp, skb, &key, - sf_acts->actions, sf_acts->actions_len, 0); - return 0; - } else { - return -ESRCH; - } -} - -/* 'skb' was received on port 'p', which may be a physical switch port, the - * local port, or a null pointer. Process it according to 'chain', sending it - * up to the controller if no flow matches. Takes ownership of 'skb'. */ -void fwd_port_input(struct sw_chain *chain, struct sk_buff *skb, - struct net_bridge_port *p) -{ - WARN_ON_ONCE(skb_shared(skb)); - WARN_ON_ONCE(skb->destructor); - if (run_flow_through_tables(chain, skb, p)) - dp_output_control(chain->dp, skb, chain->dp->miss_send_len, - OFPR_NO_MATCH); -} - -static int -recv_hello(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - return dp_send_hello(chain->dp, sender, msg); -} - -static int -recv_features_request(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - return dp_send_features_reply(chain->dp, sender); -} - -static int -recv_get_config_request(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - return dp_send_config_reply(chain->dp, sender); -} - -static int -recv_set_config(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - const struct ofp_switch_config *osc = msg; - int flags; - - flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK); - if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL - && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) { - flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP; - } - chain->dp->flags = flags; - - chain->dp->miss_send_len = ntohs(osc->miss_send_len); - - return 0; -} - -static int -recv_packet_out(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - const struct ofp_packet_out *opo = msg; - struct sk_buff *skb; - uint16_t v_code; - struct sw_flow_key key; - size_t actions_len = ntohs(opo->actions_len); - - if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) { - if (net_ratelimit()) - printk(KERN_NOTICE "%s: message too short for number " - "of actions\n", chain->dp->netdev->name); - return -EINVAL; - } - - if (ntohl(opo->buffer_id) == (uint32_t) -1) { - int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len; - - /* FIXME: there is likely a way to reuse the data in msg. */ - skb = alloc_skb(data_len, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - /* FIXME? We don't reserve NET_IP_ALIGN or NET_SKB_PAD since - * we're just transmitting this raw without examining anything - * at those layers. */ - skb_put(skb, data_len); - skb_copy_to_linear_data(skb, - (uint8_t *)opo->actions + actions_len, - data_len); - skb_reset_mac_header(skb); - } else { - skb = retrieve_skb(ntohl(opo->buffer_id)); - if (!skb) - return -ESRCH; - } - - dp_set_origin(chain->dp, ntohs(opo->in_port), skb); - - flow_extract(skb, ntohs(opo->in_port), &key); - - v_code = validate_actions(chain->dp, &key, opo->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code, - msg, ntohs(opo->header.length)); - goto error; - } - - execute_actions(chain->dp, skb, &key, opo->actions, actions_len, 1); - - return 0; - -error: - kfree_skb(skb); - return -EINVAL; -} - -static int -recv_port_mod(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - const struct ofp_port_mod *opm = msg; - - dp_update_port_flags(chain->dp, opm); - - return 0; -} - -static int -recv_echo_request(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - return dp_send_echo_reply(chain->dp, sender, msg); -} - -static int -recv_echo_reply(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - return 0; -} - -static int -add_flow(struct sw_chain *chain, const struct sender *sender, - const struct ofp_flow_mod *ofm) -{ - int error = -ENOMEM; - uint16_t v_code; - struct sw_flow *flow; - size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm; - - /* Allocate memory. */ - flow = flow_alloc(actions_len, GFP_ATOMIC); - if (flow == NULL) - goto error; - - flow_extract_match(&flow->key, &ofm->match); - - v_code = validate_actions(chain->dp, &flow->key, ofm->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code, - ofm, ntohs(ofm->header.length)); - goto error_free_flow; - } - - /* Fill out flow. */ - flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1; - flow->idle_timeout = ntohs(ofm->idle_timeout); - flow->hard_timeout = ntohs(ofm->hard_timeout); - flow->used = flow->created = get_jiffies_64(); - flow->byte_count = 0; - flow->packet_count = 0; - flow->tcp_flags = 0; - flow->ip_tos = 0; - spin_lock_init(&flow->lock); - memcpy(flow->sf_acts->actions, ofm->actions, actions_len); - - /* Act. */ - error = chain_insert(chain, flow); - if (error == -ENOBUFS) { - dp_send_error_msg(chain->dp, sender, OFPET_FLOW_MOD_FAILED, - OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length)); - goto error_free_flow; - } else if (error) - goto error_free_flow; - error = 0; - if (ntohl(ofm->buffer_id) != (uint32_t) -1) { - struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id)); - if (skb) { - struct sw_flow_key key; - flow_used(flow, skb); - dp_set_origin(chain->dp, ntohs(ofm->match.in_port), skb); - flow_extract(skb, ntohs(ofm->match.in_port), &key); - execute_actions(chain->dp, skb, &key, ofm->actions, actions_len, 0); - } - else - error = -ESRCH; - } - return error; - -error_free_flow: - flow_free(flow); -error: - if (ntohl(ofm->buffer_id) != (uint32_t) -1) - discard_skb(ntohl(ofm->buffer_id)); - return error; -} - -static int -mod_flow(struct sw_chain *chain, const struct sender *sender, - const struct ofp_flow_mod *ofm) -{ - int error = -ENOMEM; - uint16_t v_code; - size_t actions_len; - struct sw_flow_key key; - uint16_t priority; - int strict; - - flow_extract_match(&key, &ofm->match); - - actions_len = ntohs(ofm->header.length) - sizeof *ofm; - - v_code = validate_actions(chain->dp, &key, ofm->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code, - ofm, ntohs(ofm->header.length)); - goto error; - } - - priority = key.wildcards ? ntohs(ofm->priority) : -1; - strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0; - chain_modify(chain, &key, priority, strict, ofm->actions, actions_len); - - if (ntohl(ofm->buffer_id) != (uint32_t) -1) { - struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id)); - if (skb) { - struct sw_flow_key skb_key; - flow_extract(skb, ntohs(ofm->match.in_port), &skb_key); - execute_actions(chain->dp, skb, &skb_key, - ofm->actions, actions_len, 0); - } - else - error = -ESRCH; - } - return error; - -error: - if (ntohl(ofm->buffer_id) != (uint32_t) -1) - discard_skb(ntohl(ofm->buffer_id)); - return error; -} - -static int -recv_flow(struct sw_chain *chain, const struct sender *sender, const void *msg) -{ - const struct ofp_flow_mod *ofm = msg; - uint16_t command = ntohs(ofm->command); - - if (command == OFPFC_ADD) { - return add_flow(chain, sender, ofm); - } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) { - return mod_flow(chain, sender, ofm); - } else if (command == OFPFC_DELETE) { - struct sw_flow_key key; - flow_extract_match(&key, &ofm->match); - return chain_delete(chain, &key, ofm->out_port, 0, 0) ? 0 : -ESRCH; - } else if (command == OFPFC_DELETE_STRICT) { - struct sw_flow_key key; - uint16_t priority; - flow_extract_match(&key, &ofm->match); - priority = key.wildcards ? ntohs(ofm->priority) : -1; - return chain_delete(chain, &key, ofm->out_port, - priority, 1) ? 0 : -ESRCH; - } else { - return -ENOTSUPP; - } -} - -static int -recv_vendor(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - const struct ofp_vendor_header *ovh = msg; - - switch(ntohl(ovh->vendor)) - { - case NX_VENDOR_ID: - return nx_recv_msg(chain, sender, msg); - default: - if (net_ratelimit()) - printk(KERN_NOTICE "%s: unknown vendor: 0x%x\n", - chain->dp->netdev->name, ntohl(ovh->vendor)); - dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_VENDOR, msg, ntohs(ovh->header.length)); - return -EINVAL; - } -} - -/* 'msg', which is 'length' bytes long, was received across Netlink from - * 'sender'. Apply it to 'chain'. */ -int -fwd_control_input(struct sw_chain *chain, const struct sender *sender, - const void *msg, size_t length) -{ - - struct openflow_packet { - size_t min_size; - int (*handler)(struct sw_chain *, const struct sender *, - const void *); - }; - - static const struct openflow_packet packets[] = { - [OFPT_HELLO] = { - sizeof (struct ofp_header), - recv_hello, - }, - [OFPT_ECHO_REQUEST] = { - sizeof (struct ofp_header), - recv_echo_request, - }, - [OFPT_ECHO_REPLY] = { - sizeof (struct ofp_header), - recv_echo_reply, - }, - [OFPT_VENDOR] = { - sizeof (struct ofp_vendor_header), - recv_vendor, - }, - [OFPT_FEATURES_REQUEST] = { - sizeof (struct ofp_header), - recv_features_request, - }, - [OFPT_GET_CONFIG_REQUEST] = { - sizeof (struct ofp_header), - recv_get_config_request, - }, - [OFPT_SET_CONFIG] = { - sizeof (struct ofp_switch_config), - recv_set_config, - }, - [OFPT_PACKET_OUT] = { - sizeof (struct ofp_packet_out), - recv_packet_out, - }, - [OFPT_FLOW_MOD] = { - sizeof (struct ofp_flow_mod), - recv_flow, - }, - [OFPT_PORT_MOD] = { - sizeof (struct ofp_port_mod), - recv_port_mod, - } - }; - - struct ofp_header *oh; - - oh = (struct ofp_header *) msg; - if (oh->version != OFP_VERSION - && oh->type != OFPT_HELLO - && oh->type != OFPT_ERROR - && oh->type != OFPT_ECHO_REQUEST - && oh->type != OFPT_ECHO_REPLY - && oh->type != OFPT_VENDOR) - { - dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_VERSION, msg, length); - return -EINVAL; - } - if (ntohs(oh->length) != length) { - if (net_ratelimit()) - printk(KERN_NOTICE "%s: received message length " - "wrong: %d/%d\n", chain->dp->netdev->name, - ntohs(oh->length), length); - return -EINVAL; - } - - if (oh->type < ARRAY_SIZE(packets)) { - const struct openflow_packet *pkt = &packets[oh->type]; - if (pkt->handler) { - if (length < pkt->min_size) - return -EFAULT; - return pkt->handler(chain, sender, msg); - } - } - dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_TYPE, msg, length); - return -EINVAL; -} - -/* Packet buffering. */ - -#define OVERWRITE_SECS 1 -#define OVERWRITE_JIFFIES (OVERWRITE_SECS * HZ) - -struct packet_buffer { - struct sk_buff *skb; - uint32_t cookie; - unsigned long exp_jiffies; -}; - -static struct packet_buffer buffers[N_PKT_BUFFERS]; -static unsigned int buffer_idx; -static DEFINE_SPINLOCK(buffer_lock); - -uint32_t fwd_save_skb(struct sk_buff *skb) -{ - struct sk_buff *old_skb = NULL; - struct packet_buffer *p; - unsigned long int flags; - uint32_t id; - - /* FIXME: Probably just need a skb_clone() here. */ - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) - return -1; - - spin_lock_irqsave(&buffer_lock, flags); - buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; - p = &buffers[buffer_idx]; - if (p->skb) { - /* Don't buffer packet if existing entry is less than - * OVERWRITE_SECS old. */ - if (time_before(jiffies, p->exp_jiffies)) { - spin_unlock_irqrestore(&buffer_lock, flags); - kfree_skb(skb); - return -1; - } else { - /* Defer kfree_skb() until interrupts re-enabled. - * FIXME: we only need to do that if it has a - * destructor, but it never should since we orphan - * sk_buffs on entry. */ - old_skb = p->skb; - } - } - /* Don't use maximum cookie value since the all-bits-1 id is - * special. */ - if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) - p->cookie = 0; - p->skb = skb; - p->exp_jiffies = jiffies + OVERWRITE_JIFFIES; - id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); - spin_unlock_irqrestore(&buffer_lock, flags); - - if (old_skb) - kfree_skb(old_skb); - - return id; -} - -static struct sk_buff *retrieve_skb(uint32_t id) -{ - unsigned long int flags; - struct sk_buff *skb = NULL; - struct packet_buffer *p; - - spin_lock_irqsave(&buffer_lock, flags); - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - skb = p->skb; - p->skb = NULL; - } else { - if (net_ratelimit()) - printk(KERN_NOTICE "cookie mismatch: %x != %x\n", - id >> PKT_BUFFER_BITS, p->cookie); - } - spin_unlock_irqrestore(&buffer_lock, flags); - - return skb; -} - -void fwd_discard_all(void) -{ - int i; - - for (i = 0; i < N_PKT_BUFFERS; i++) { - struct sk_buff *skb; - unsigned long int flags; - - /* Defer kfree_skb() until interrupts re-enabled. */ - spin_lock_irqsave(&buffer_lock, flags); - skb = buffers[i].skb; - buffers[i].skb = NULL; - spin_unlock_irqrestore(&buffer_lock, flags); - - kfree_skb(skb); - } -} - -static void discard_skb(uint32_t id) -{ - struct sk_buff *old_skb = NULL; - unsigned long int flags; - struct packet_buffer *p; - - spin_lock_irqsave(&buffer_lock, flags); - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - /* Defer kfree_skb() until interrupts re-enabled. */ - old_skb = p->skb; - p->skb = NULL; - } - spin_unlock_irqrestore(&buffer_lock, flags); - - if (old_skb) - kfree_skb(old_skb); -} - -void fwd_exit(void) -{ - fwd_discard_all(); -} diff --git a/datapath/forward.h b/datapath/forward.h deleted file mode 100644 index f69a8f07..00000000 --- a/datapath/forward.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef FORWARD_H -#define FORWARD_H 1 - -#include -#include "datapath.h" -#include "flow.h" - -struct sk_buff; -struct sw_chain; -struct sender; - -/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID - * into a buffer number (low bits) and a cookie (high bits). The buffer number - * is an index into an array of buffers. The cookie distinguishes between - * different packets that have occupied a single buffer. Thus, the more - * buffers we have, the lower-quality the cookie... */ -#define PKT_BUFFER_BITS 8 -#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) -#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) - -#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) - - -void fwd_port_input(struct sw_chain *, struct sk_buff *, - struct net_bridge_port *); -int run_flow_through_tables(struct sw_chain *, struct sk_buff *, - struct net_bridge_port *); -int fwd_control_input(struct sw_chain *, const struct sender *, - const void *, size_t); - -uint32_t fwd_save_skb(struct sk_buff *skb); -void fwd_discard_all(void); -void fwd_exit(void); - -#endif /* forward.h */ diff --git a/datapath/hwtable_dummy/hwtable_dummy.c b/datapath/hwtable_dummy/hwtable_dummy.c index f55e9fdc..1e41437f 100644 --- a/datapath/hwtable_dummy/hwtable_dummy.c +++ b/datapath/hwtable_dummy/hwtable_dummy.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -68,7 +68,7 @@ struct sw_table_dummy { static struct sw_flow *table_dummy_lookup(struct sw_table *swt, - const struct sw_flow_key *key) + const struct odp_flow_key *key) { struct sw_table_dummy *td = (struct sw_table_dummy *) swt; struct sw_flow *flow; @@ -98,7 +98,7 @@ static int table_dummy_insert(struct sw_table *swt, struct sw_flow *flow) } static int table_dummy_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, + const struct odp_flow_key *key, uint16_t priority, int strict, const struct ofp_action_header *actions, size_t actions_len) { struct sw_table_dummy *td = (struct sw_table_dummy *) swt; @@ -129,7 +129,7 @@ static int do_delete(struct sw_table *swt, struct sw_flow *flow) } static int table_dummy_delete(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict) + const struct odp_flow_key *key, uint16_t priority, int strict) { struct sw_table_dummy *td = (struct sw_table_dummy *) swt; struct sw_flow *flow; @@ -205,7 +205,7 @@ static void table_dummy_destroy(struct sw_table *swt) } static int table_dummy_iterate(struct sw_table *swt, - const struct sw_flow_key *key, + const struct odp_flow_key *key, struct sw_table_position *position, int (*callback)(struct sw_flow *, void *), void *private) diff --git a/datapath/nx_act.c b/datapath/nx_act.c deleted file mode 100644 index d96c48f9..00000000 --- a/datapath/nx_act.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2008 Nicira Networks - */ - -/* Functions for Nicira-extended actions. */ -#include "openflow/nicira-ext.h" -#include "dp_act.h" -#include "nx_act.h" -#include "nx_act_snat.h" - -uint16_t -nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, - const struct nx_action_header *nah, uint16_t len) -{ - if (len < sizeof *nah) - return OFPBAC_BAD_LEN; - -#ifdef SUPPORT_SNAT - if (nah->subtype == ntohs(NXAST_SNAT)) { - struct nx_action_snat *nas = (struct nx_action_snat *)nah; - if (len != sizeof(*nas)) - return OFPBAC_BAD_LEN; - else if (ntohs(nas->port) >= OFPP_MAX) - return OFPBAC_BAD_ARGUMENT; - - return ACT_VALIDATION_OK; - } -#endif - return OFPBAC_BAD_VENDOR_TYPE; -} - -struct sk_buff * -nx_execute_act(struct sk_buff *skb, const struct sw_flow_key *key, - const struct nx_action_header *nah) -{ -#ifdef SUPPORT_SNAT - if (nah->subtype == ntohs(NXAST_SNAT)) { - struct nx_action_snat *nas = (struct nx_action_snat *)nah; - snat_skb(skb->dev->br_port->dp, skb, ntohs(nas->port)); - } -#endif - - return skb; -} - diff --git a/datapath/nx_act.h b/datapath/nx_act.h deleted file mode 100644 index 6dda65dd..00000000 --- a/datapath/nx_act.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef NX_ACT_H -#define NX_ACT_H 1 - -#include "datapath.h" - - -uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, - const struct nx_action_header *nah, uint16_t len); - -struct sk_buff *nx_execute_act(struct sk_buff *skb, - const struct sw_flow_key *key, - const struct nx_action_header *nah); - -#endif /* nx_act.h */ diff --git a/datapath/nx_act_snat.h b/datapath/nx_act_snat.h deleted file mode 100644 index fc86d38b..00000000 --- a/datapath/nx_act_snat.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifdef SUPPORT_SNAT -#ifndef ACT_SNAT_H -#define ACT_SNAT_H - -#include -#include -#include - -#include "openflow/nicira-ext.h" -#include "datapath.h" - -/* Cache of IP->MAC mappings on the side hidden by the SNAT */ -struct snat_mapping { - struct list_head node; - uint32_t ip_addr; /* Stored in network-order */ - uint8_t hw_addr[ETH_ALEN]; - unsigned long used; /* Last used time (in jiffies). */ - - struct rcu_head rcu; -}; - -struct snat_conf { - uint32_t ip_addr_start; /* Stored in host-order */ - uint32_t ip_addr_end; /* Stored in host-order */ - uint16_t mac_timeout; - - uint8_t mac_addr[ETH_ALEN]; - - struct list_head mappings; /* List of snat_mapping entries */ -}; - -#define MAC_TIMEOUT_DEFAULT 120 - -void snat_local_in(struct sk_buff *skb); -int snat_pre_route(struct sk_buff *skb); -void snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port); -void snat_save_header(struct sk_buff *skb); -int snat_copy_header(struct sk_buff *skb); -void snat_maint(struct net_bridge_port *p); -int snat_mod_config(struct datapath *, const struct nx_act_config *); -int snat_free_conf(struct net_bridge_port *p); - -#endif -#endif diff --git a/datapath/nx_msg.c b/datapath/nx_msg.c deleted file mode 100644 index 839fac8c..00000000 --- a/datapath/nx_msg.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2008 Nicira Networks - */ - -#include "chain.h" -#include "datapath.h" -#include "openflow/nicira-ext.h" -#include "nx_act_snat.h" -#include "nx_msg.h" - - -int -nx_recv_msg(struct sw_chain *chain, const struct sender *sender, - const void *msg) -{ - const struct nicira_header *nh = msg; - - switch (ntohl(nh->subtype)) { - - case NXT_FLOW_END_CONFIG: { - const struct nx_flow_end_config *nfec = msg; - chain->dp->send_flow_end = nfec->enable; - return 0; - } - -#ifdef SUPPORT_SNAT - case NXT_ACT_SET_CONFIG: { - const struct nx_act_config *nac = msg; - if (ntohs(nh->header.length) < sizeof(*nac)) - return -EINVAL; - - if (nac->type == htons(NXAST_SNAT)) - return snat_mod_config(chain->dp, nac); - else - return -EINVAL; - break; - } -#endif - - default: - dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_SUBTYPE, msg, ntohs(nh->header.length)); - return -EINVAL; - } - - return -EINVAL; -} diff --git a/datapath/nx_msg.h b/datapath/nx_msg.h deleted file mode 100644 index 1eb1bb39..00000000 --- a/datapath/nx_msg.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef NX_MSG_H -#define NX_MSG_H 1 - -int nx_recv_msg(struct sw_chain *chain, const struct sender *sender, - const void *msg); - -#endif /* nx_msg.h */ diff --git a/datapath/nx_act_snat.c b/datapath/snat.c similarity index 84% rename from datapath/nx_act_snat.c rename to datapath/snat.c index c7705d49..17da5173 100644 --- a/datapath/nx_act_snat.c +++ b/datapath/snat.c @@ -16,10 +16,30 @@ #include #include -#include "forward.h" -#include "dp_act.h" -#include "nx_act_snat.h" +#include "actions.h" +#include "snat.h" +/* Cache of IP->MAC mappings on the side hidden by the SNAT */ +struct snat_mapping { + struct list_head node; + u32 ip_addr; /* Stored in network-order */ + u8 hw_addr[ETH_ALEN]; + unsigned long used; /* Last used time (in jiffies). */ + + struct rcu_head rcu; +}; + +struct snat_conf { + u32 ip_addr_start; /* Stored in host-order */ + u32 ip_addr_end; /* Stored in host-order */ + u16 mac_timeout; + + u8 mac_addr[ETH_ALEN]; + + struct list_head mappings; /* List of snat_mapping entries */ +}; + +#define MAC_TIMEOUT_DEFAULT 120 /* We need these fake structures to make netfilter happy -- * lots of places assume that skb->dst != NULL, which isn't @@ -48,17 +68,16 @@ static struct rtable __fake_rtable = { /* Define ARP for IP since the Linux headers don't do it cleanly. */ struct ip_arphdr { - uint16_t ar_hrd; - uint16_t ar_pro; - uint8_t ar_hln; - uint8_t ar_pln; - uint16_t ar_op; - uint8_t ar_sha[ETH_ALEN]; - uint32_t ar_sip; - uint8_t ar_tha[ETH_ALEN]; - uint32_t ar_tip; + u16 ar_hrd; + u16 ar_pro; + u8 ar_hln; + u8 ar_pln; + u16 ar_op; + u8 ar_sha[ETH_ALEN]; + u32 ar_sip; + u8 ar_tha[ETH_ALEN]; + u32 ar_tip; } __attribute__((packed)); -OFP_ASSERT(sizeof(struct ip_arphdr) == 28); static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) { @@ -168,8 +187,9 @@ dnat_mac(struct net_bridge_port *p, struct sk_buff *skb) list_for_each_entry (m, &sc->mappings, node) { if (m->ip_addr == iph->daddr){ /* Found it! */ - if (!make_writable(&skb)) - return -EINVAL; + skb = make_writable(skb, GFP_ATOMIC); + if (!skb) + return -1; m->used = jiffies; memcpy(eh->h_dest, m->hw_addr, ETH_ALEN); break; @@ -244,7 +264,7 @@ snat_pre_route_finish(struct sk_buff *skb) * consumes it. */ skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); - fwd_port_input(p->dp->chain, skb, p); + dp_process_received_packet(skb, p); return 0; } @@ -260,7 +280,7 @@ handle_arp_snat(struct sk_buff *skb) struct net_bridge_port *s_nbp = skb->dev->br_port; struct net_bridge_port *nat_nbp; struct ip_arphdr *ah; - uint8_t mac_addr[ETH_ALEN]; + u8 mac_addr[ETH_ALEN]; if (!pskb_may_pull(skb, sizeof *ah)) return 0; @@ -306,8 +326,8 @@ handle_icmp_snat(struct sk_buff *skb) struct ethhdr *eh; struct iphdr *iph; struct icmphdr *icmph; - uint8_t tmp_eth[ETH_ALEN]; - uint32_t tmp_ip; + u8 tmp_eth[ETH_ALEN]; + u32 tmp_ip; struct sk_buff *nskb; /* We're only interested in addresses we rewrite. */ @@ -480,7 +500,8 @@ done: * unmodified. 'skb' is not consumed, so caller will need to free it. */ void -snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port) +snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port, + gfp_t gfp) { struct net_bridge_port *p = dp->ports[out_port]; struct sk_buff *nskb; @@ -491,7 +512,7 @@ snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port) /* FIXME: Expensive. Just need to skb_clone() here? * (However, the skb_copy() does linearize and ensure that the headers * are accessible.) */ - nskb = skb_copy(skb, GFP_ATOMIC); + nskb = skb_copy(skb, gfp); if (!nskb) return; @@ -541,67 +562,60 @@ snat_free_conf(struct net_bridge_port *p) } /* Remove SNAT configuration from an interface. */ -static int -snat_del_port(struct datapath *dp, const struct nx_snat_config *nsc) +int snat_del_port(struct datapath *dp, int port) { unsigned long flags; - uint16_t port = ntohs(nsc->port); - struct net_bridge_port *p = dp->ports[port]; - - if (!p) { - if (net_ratelimit()) - printk(KERN_NOTICE "%s: attempt to remove snat on " - "non-existent port: %d\n", - dp->netdev->name, port); - return -EINVAL; - } + struct net_bridge_port *p; + int error; - spin_lock_irqsave(&p->lock, flags); - if (snat_free_conf(p)) { - /* SNAT not configured on this port */ - spin_unlock_irqrestore(&p->lock, flags); - if (net_ratelimit()) - printk(KERN_NOTICE "%s: attempt to remove snat on " - "non-snat port: %d\n", dp->netdev->name, port); + if (port < 0 || port >= DP_MAX_PORTS) return -EINVAL; - } + p = dp->ports[port]; + if (!p) + return -ENOENT; + + spin_lock_irqsave(&p->lock, flags); + error = snat_free_conf(p); spin_unlock_irqrestore(&p->lock, flags); - return 0; + return error; } /* Add SNAT configuration to an interface. */ -static int -snat_add_port(struct datapath *dp, const struct nx_snat_config *nsc) +int snat_add_port(struct datapath *dp, const struct odp_snat_config *osc) { unsigned long flags; - uint16_t port = ntohs(nsc->port); - struct net_bridge_port *p = dp->ports[port]; - uint16_t mac_timeout = ntohs(nsc->mac_timeout); + struct net_bridge_port *p; struct snat_conf *sc; - - if (mac_timeout == 0) - mac_timeout = MAC_TIMEOUT_DEFAULT; + int mac_timeout; - if (!p) { - if (net_ratelimit()) - printk(KERN_NOTICE "%s: attempt to add snat on " - "non-existent port: %d\n", - dp->netdev->name, port); + if (osc->port < 0 || osc->port >= DP_MAX_PORTS) return -EINVAL; - } - + + p = dp->ports[osc->port]; + if (!p) + return -ENOENT; + + mac_timeout = osc->mac_timeout; + if (!mac_timeout) + mac_timeout = MAC_TIMEOUT_DEFAULT; + + sc = kzalloc(sizeof *sc, GFP_KERNEL); + if (!sc) + return -ENOMEM; + /* If SNAT is already configured on the port, check whether the same * IP addresses are used. If so, just update the mac timeout * configuration. Otherwise, drop all SNAT configuration and * reconfigure it. */ spin_lock_irqsave(&p->lock, flags); if (p->snat) { - if ((p->snat->ip_addr_start == ntohl(nsc->ip_addr_start)) - && (p->snat->ip_addr_end == ntohl(nsc->ip_addr_end))) { + if (p->snat->ip_addr_start == ntohl(osc->ip_start) && + p->snat->ip_addr_end == ntohl(osc->ip_end)) { p->snat->mac_timeout = mac_timeout; spin_unlock_irqrestore(&p->lock, flags); + kfree(sc); return 0; } @@ -609,16 +623,10 @@ snat_add_port(struct datapath *dp, const struct nx_snat_config *nsc) snat_free_conf(p); } - sc = kzalloc(sizeof *sc, GFP_ATOMIC); - if (!sc) { - spin_unlock_irqrestore(&p->lock, flags); - return -ENOMEM; - } - - sc->ip_addr_start = ntohl(nsc->ip_addr_start); - sc->ip_addr_end = ntohl(nsc->ip_addr_end); + sc->ip_addr_start = ntohl(osc->ip_start); + sc->ip_addr_end = ntohl(osc->ip_end); sc->mac_timeout = mac_timeout; - memcpy(sc->mac_addr, nsc->mac_addr, sizeof(sc->mac_addr)); + memcpy(sc->mac_addr, osc->mac_addr, ETH_ALEN); INIT_LIST_HEAD(&sc->mappings); p->snat = sc; @@ -626,31 +634,4 @@ snat_add_port(struct datapath *dp, const struct nx_snat_config *nsc) return 0; } - -/* Handle a SNAT configuration message. - * - * Returns 0 if no problems are found. Otherwise, a negative errno. */ -int -snat_mod_config(struct datapath *dp, const struct nx_act_config *nac) -{ - int n_entries = (ntohs(nac->header.header.length) - sizeof *nac) - / sizeof (struct nx_snat_config); - int ret = 0; - int i; - - for (i=0; isnat[i]; - int r = 0; - - if (nsc->command == NXSC_ADD) - r = snat_add_port(dp, nsc); - else - r = snat_del_port(dp, nsc); - - if (r) - ret = r; - } - - return ret; -} #endif diff --git a/datapath/snat.h b/datapath/snat.h new file mode 100644 index 00000000..0d0dc6d3 --- /dev/null +++ b/datapath/snat.h @@ -0,0 +1,23 @@ +#ifdef SUPPORT_SNAT +#ifndef ACT_SNAT_H +#define ACT_SNAT_H + +#include +#include +#include + +#include "datapath.h" + +void snat_local_in(struct sk_buff *skb); +int snat_pre_route(struct sk_buff *skb); +void snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port, + gfp_t gfp); +void snat_save_header(struct sk_buff *skb); +int snat_copy_header(struct sk_buff *skb); +void snat_maint(struct net_bridge_port *p); +int snat_add_port(struct datapath *, const struct odp_snat_config *); +int snat_del_port(struct datapath *, int port); +int snat_free_conf(struct net_bridge_port *p); + +#endif +#endif diff --git a/datapath/table-hash.c b/datapath/table-hash.c deleted file mode 100644 index 9e27b0c4..00000000 --- a/datapath/table-hash.c +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland - * Stanford Junior University - */ - -#include "table.h" -#include "crc32.h" -#include "flow.h" -#include "datapath.h" - -#include -#include -#include -#include -#include - -static void *kmem_alloc(size_t); -static void *kmem_zalloc(size_t); -static void kmem_free(void *, size_t); - -struct sw_table_hash { - struct sw_table swt; - struct crc32 crc32; - unsigned int n_flows; - unsigned int bucket_mask; /* Number of buckets minus 1. */ - struct sw_flow **buckets; -}; - -static struct sw_flow **find_bucket(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int crc = crc32_calculate(&th->crc32, key, - offsetof(struct sw_flow_key, wildcards)); - return &th->buckets[crc & th->bucket_mask]; -} - -static struct sw_flow *table_hash_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_flow *flow = *find_bucket(swt, key); - return flow && flow_keys_equal(&flow->key, key) ? flow : NULL; -} - -static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - struct sw_flow **bucket; - int retval; - - if (flow->key.wildcards != 0) - return 0; - - bucket = find_bucket(swt, &flow->key); - if (*bucket == NULL) { - th->n_flows++; - rcu_assign_pointer(*bucket, flow); - retval = 1; - } else { - struct sw_flow *old_flow = *bucket; - if (flow_keys_equal(&old_flow->key, &flow->key)) { - rcu_assign_pointer(*bucket, flow); - flow_deferred_free(old_flow); - retval = 1; - } else { - retval = 0; - } - } - return retval; -} - -static int table_hash_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int count = 0; - - if (key->wildcards == 0) { - struct sw_flow **bucket = find_bucket(swt, key); - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count = 1; - } - } else { - unsigned int i; - - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count++; - } - } - } - return count; -} - -/* Caller must update n_flows. */ -static int do_delete(struct datapath *dp, struct sw_flow **bucket, - struct sw_flow *flow, enum nx_flow_end_reason reason) -{ - dp_send_flow_end(dp, flow, reason); - rcu_assign_pointer(*bucket, NULL); - flow_deferred_free(flow); - return 1; -} - -/* Returns number of deleted flows. We can ignore the priority - * argument, since all exact-match entries are the same (highest) - * priority. */ -static int table_hash_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - uint16_t priority, int strict) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int count = 0; - - if (key->wildcards == 0) { - struct sw_flow **bucket = find_bucket(swt, key); - struct sw_flow *flow = *bucket; - if (flow && flow_keys_equal(&flow->key, key) - && flow_has_out_port(flow, out_port)) - count = do_delete(dp, bucket, flow, NXFER_DELETE); - } else { - unsigned int i; - - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && flow_has_out_port(flow, out_port)) - count += do_delete(dp, bucket, flow, NXFER_DELETE); - } - } - th->n_flows -= count; - return count; -} - -static int table_hash_timeout(struct datapath *dp, struct sw_table *swt) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int i; - int count = 0; - - if (mutex_lock_interruptible(&dp_mutex)) - return 0; - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow) { - int reason = flow_timeout(flow); - if (reason >= 0) { - count += do_delete(dp, bucket, flow, reason); - } - } - } - th->n_flows -= count; - mutex_unlock(&dp_mutex); - - return count; -} - -static void table_hash_destroy(struct sw_table *swt) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int i; - for (i = 0; i <= th->bucket_mask; i++) - if (th->buckets[i]) - flow_free(th->buckets[i]); - kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets); - kfree(th); -} - -static int table_hash_iterate(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *private), - void *private) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - - if (position->private[0] > th->bucket_mask) - return 0; - - if (key->wildcards == 0) { - struct sw_flow *flow; - int error; - - flow = table_hash_lookup(swt, key); - if (!flow || !flow_has_out_port(flow, out_port)) - return 0; - - error = callback(flow, private); - if (!error) - position->private[0] = -1; - return error; - } else { - int i; - - for (i = position->private[0]; i <= th->bucket_mask; i++) { - struct sw_flow *flow = th->buckets[i]; - if (flow && flow_matches_1wild(&flow->key, key) - && flow_has_out_port(flow, out_port)) { - int error = callback(flow, private); - if (error) { - position->private[0] = i; - return error; - } - } - } - return 0; - } -} -static void table_hash_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - stats->name = "hash"; - stats->wildcards = 0; /* No wildcards are supported. */ - stats->n_flows = th->n_flows; - stats->max_flows = th->bucket_mask + 1; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - -struct sw_table *table_hash_create(unsigned int polynomial, - unsigned int n_buckets) -{ - struct sw_table_hash *th; - struct sw_table *swt; - - th = kzalloc(sizeof *th, GFP_KERNEL); - if (th == NULL) - return NULL; - - BUG_ON(n_buckets & (n_buckets - 1)); - th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets); - if (th->buckets == NULL) { - printk(KERN_EMERG "failed to allocate %u buckets\n", - n_buckets); - kfree(th); - return NULL; - } - th->bucket_mask = n_buckets - 1; - - swt = &th->swt; - swt->lookup = table_hash_lookup; - swt->insert = table_hash_insert; - swt->delete = table_hash_delete; - swt->timeout = table_hash_timeout; - swt->destroy = table_hash_destroy; - swt->iterate = table_hash_iterate; - swt->stats = table_hash_stats; - - crc32_init(&th->crc32, polynomial); - th->n_flows = 0; - - return swt; -} - -/* Double-hashing table. */ - -struct sw_table_hash2 { - struct sw_table swt; - struct sw_table *subtable[2]; -}; - -static struct sw_flow *table_hash2_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - int i; - - for (i = 0; i < 2; i++) { - struct sw_flow *flow = *find_bucket(t2->subtable[i], key); - if (flow && flow_keys_equal(&flow->key, key)) - return flow; - } - return NULL; -} - -static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - - if (table_hash_insert(t2->subtable[0], flow)) - return 1; - return table_hash_insert(t2->subtable[1], flow); -} - -static int table_hash2_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - return (table_hash_modify(t2->subtable[0], key, priority, strict, - actions, actions_len) - + table_hash_modify(t2->subtable[1], key, priority, strict, - actions, actions_len)); -} - -static int table_hash2_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - uint16_t priority, int strict) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - return (table_hash_delete(dp, t2->subtable[0], key, out_port, - priority, strict) - + table_hash_delete(dp, t2->subtable[1], key, out_port, - priority, strict)); -} - -static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - return (table_hash_timeout(dp, t2->subtable[0]) - + table_hash_timeout(dp, t2->subtable[1])); -} - -static void table_hash2_destroy(struct sw_table *swt) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - table_hash_destroy(t2->subtable[0]); - table_hash_destroy(t2->subtable[1]); - kfree(t2); -} - -static int table_hash2_iterate(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *), - void *private) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - int i; - - for (i = position->private[1]; i < 2; i++) { - int error = table_hash_iterate(t2->subtable[i], key, out_port, - position, callback, private); - if (error) { - return error; - } - position->private[0] = 0; - position->private[1]++; - } - return 0; -} - -static void table_hash2_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - struct sw_table_stats substats[2]; - int i; - - for (i = 0; i < 2; i++) - table_hash_stats(t2->subtable[i], &substats[i]); - stats->name = "hash2"; - stats->wildcards = 0; /* No wildcards are supported. */ - stats->n_flows = substats[0].n_flows + substats[1].n_flows; - stats->max_flows = substats[0].max_flows + substats[1].max_flows; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - -struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, - unsigned int poly1, unsigned int buckets1) - -{ - struct sw_table_hash2 *t2; - struct sw_table *swt; - - t2 = kzalloc(sizeof *t2, GFP_KERNEL); - if (t2 == NULL) - return NULL; - - t2->subtable[0] = table_hash_create(poly0, buckets0); - if (t2->subtable[0] == NULL) - goto out_free_t2; - - t2->subtable[1] = table_hash_create(poly1, buckets1); - if (t2->subtable[1] == NULL) - goto out_free_subtable0; - - swt = &t2->swt; - swt->lookup = table_hash2_lookup; - swt->insert = table_hash2_insert; - swt->modify = table_hash2_modify; - swt->delete = table_hash2_delete; - swt->timeout = table_hash2_timeout; - swt->destroy = table_hash2_destroy; - swt->iterate = table_hash2_iterate; - swt->stats = table_hash2_stats; - - return swt; - -out_free_subtable0: - table_hash_destroy(t2->subtable[0]); -out_free_t2: - kfree(t2); - return NULL; -} - -/* From fs/xfs/linux-2.4/kmem.c. */ - -static void * -kmem_alloc(size_t size) -{ - void *ptr; - -#ifdef KMALLOC_MAX_SIZE - if (size > KMALLOC_MAX_SIZE) - return NULL; -#endif - ptr = kmalloc(size, GFP_KERNEL); - if (!ptr) { - ptr = vmalloc(size); - if (ptr) - printk(KERN_NOTICE "openflow: used vmalloc for %lu " - "bytes\n", (unsigned long)size); - } - return ptr; -} - -static void * -kmem_zalloc(size_t size) -{ - void *ptr = kmem_alloc(size); - if (ptr) - memset(ptr, 0, size); - return ptr; -} - -static void -kmem_free(void *ptr, size_t size) -{ - if (((unsigned long)ptr < VMALLOC_START) || - ((unsigned long)ptr >= VMALLOC_END)) { - kfree(ptr); - } else { - vfree(ptr); - } -} diff --git a/datapath/table-linear.c b/datapath/table-linear.c deleted file mode 100644 index bef58a6b..00000000 --- a/datapath/table-linear.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland - * Stanford Junior University - */ - -#include "table.h" -#include "flow.h" -#include "datapath.h" - -#include -#include -#include - -struct sw_table_linear { - struct sw_table swt; - - unsigned int max_flows; - unsigned int n_flows; - struct list_head flows; - struct list_head iter_flows; - unsigned long int next_serial; -}; - -static struct sw_flow *table_linear_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - list_for_each_entry_rcu (flow, &tl->flows, node) { - if (flow_matches_1wild(key, &flow->key)) - return flow; - } - return NULL; -} - -static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *f; - - - /* Loop through the existing list of entries. New entries will - * always be placed behind those with equal priority. Just replace - * any flows that match exactly. - */ - list_for_each_entry (f, &tl->flows, node) { - if (f->priority == flow->priority - && f->key.wildcards == flow->key.wildcards - && flow_matches_2wild(&f->key, &flow->key)) { - flow->serial = f->serial; - list_replace_rcu(&f->node, &flow->node); - list_replace_rcu(&f->iter_node, &flow->iter_node); - flow_deferred_free(f); - return 1; - } - - if (f->priority < flow->priority) - break; - } - - /* Make sure there's room in the table. */ - if (tl->n_flows >= tl->max_flows) { - return 0; - } - tl->n_flows++; - - /* Insert the entry immediately in front of where we're pointing. */ - flow->serial = tl->next_serial++; - list_add_tail_rcu(&flow->node, &f->node); - list_add_rcu(&flow->iter_node, &tl->iter_flows); - return 1; -} - -static int table_linear_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - unsigned int count = 0; - - list_for_each_entry (flow, &tl->flows, node) { - if (flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count++; - } - } - return count; -} - -static int do_delete(struct datapath *dp, struct sw_table *swt, - struct sw_flow *flow, enum nx_flow_end_reason reason) -{ - dp_send_flow_end(dp, flow, reason); - list_del_rcu(&flow->node); - list_del_rcu(&flow->iter_node); - flow_deferred_free(flow); - return 1; -} - -static int table_linear_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - uint16_t priority, int strict) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - unsigned int count = 0; - - list_for_each_entry (flow, &tl->flows, node) { - if (flow_matches_desc(&flow->key, key, strict) - && flow_has_out_port(flow, out_port) - && (!strict || (flow->priority == priority))) - count += do_delete(dp, swt, flow, NXFER_DELETE); - } - tl->n_flows -= count; - return count; -} - -static int table_linear_timeout(struct datapath *dp, struct sw_table *swt) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - int count = 0; - - if (mutex_lock_interruptible(&dp_mutex)) - return 0; - list_for_each_entry (flow, &tl->flows, node) { - int reason = flow_timeout(flow); - if (reason >= 0) { - count += do_delete(dp, swt, flow, reason); - } - } - tl->n_flows -= count; - mutex_unlock(&dp_mutex); - return count; -} - -static void table_linear_destroy(struct sw_table *swt) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - - while (!list_empty(&tl->flows)) { - struct sw_flow *flow = list_entry(tl->flows.next, - struct sw_flow, node); - list_del(&flow->node); - flow_free(flow); - } - kfree(tl); -} - -static int table_linear_iterate(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *), - void *private) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - unsigned long start; - - start = position->private[0]; - list_for_each_entry (flow, &tl->iter_flows, iter_node) { - if (flow->serial >= start - && flow_matches_2wild(key, &flow->key) - && flow_has_out_port(flow, out_port)) { - int error = callback(flow, private); - if (error) { - position->private[0] = flow->serial; - return error; - } - } - } - return 0; -} - -static void table_linear_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - stats->name = "linear"; - stats->wildcards = OFPFW_ALL; - stats->n_flows = tl->n_flows; - stats->max_flows = tl->max_flows; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - - -struct sw_table *table_linear_create(unsigned int max_flows) -{ - struct sw_table_linear *tl; - struct sw_table *swt; - - tl = kzalloc(sizeof *tl, GFP_KERNEL); - if (tl == NULL) - return NULL; - - swt = &tl->swt; - swt->lookup = table_linear_lookup; - swt->insert = table_linear_insert; - swt->modify = table_linear_modify; - swt->delete = table_linear_delete; - swt->timeout = table_linear_timeout; - swt->destroy = table_linear_destroy; - swt->iterate = table_linear_iterate; - swt->stats = table_linear_stats; - - tl->max_flows = max_flows; - tl->n_flows = 0; - INIT_LIST_HEAD(&tl->flows); - INIT_LIST_HEAD(&tl->iter_flows); - tl->next_serial = 0; - - return swt; -} diff --git a/datapath/table.c b/datapath/table.c new file mode 100644 index 00000000..cc42406a --- /dev/null +++ b/datapath/table.c @@ -0,0 +1,231 @@ +#include "flow.h" +#include "datapath.h" + +#include +#include +#include +#include +#include +#include +#include + +static void free_table(struct sw_flow ***flows, unsigned int n_buckets, + int free_flows) +{ + unsigned int i; + + for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { + struct sw_flow **l2 = flows[i]; + if (free_flows) { + unsigned int j; + for (j = 0; j < DP_L1_SIZE; j++) { + if (l2[j]) + flow_free(l2[j]); + } + } + free_page((unsigned long)l2); + } + kfree(flows); +} + +static struct sw_flow ***alloc_table(unsigned int n_buckets) +{ + struct sw_flow ***flows; + unsigned int i; + + flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**), + GFP_KERNEL); + if (!flows) + return NULL; + for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { + flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL); + if (!flows[i]) { + free_table(flows, i << DP_L1_BITS, 0); + return NULL; + } + } + return flows; +} + +struct dp_table *dp_table_create(unsigned int n_buckets) +{ + struct dp_table *table; + + table = kzalloc(sizeof *table, GFP_KERNEL); + if (!table) + goto err; + + table->n_buckets = n_buckets; + table->flows[0] = alloc_table(n_buckets); + if (!table[0].flows) + goto err_free_tables; + + table->flows[1] = alloc_table(n_buckets); + if (!table->flows[1]) + goto err_free_flows0; + + return table; + +err_free_flows0: + free_table(table->flows[0], table->n_buckets, 0); +err_free_tables: + kfree(table); +err: + return NULL; +} + +void dp_table_destroy(struct dp_table *table, int free_flows) +{ + int i; + for (i = 0; i < 2; i++) + free_table(table->flows[i], table->n_buckets, free_flows); + kfree(table); +} + +static struct sw_flow **find_bucket(struct dp_table *table, + struct sw_flow ***flows, u32 hash) +{ + unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; + unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); + return &flows[l1][l2]; +} + +static struct sw_flow *lookup_table(struct dp_table *table, + struct sw_flow ***flows, u32 hash, + const struct odp_flow_key *key) +{ + struct sw_flow **bucket = find_bucket(table, flows, hash); + struct sw_flow *flow = rcu_dereference(*bucket); + return flow && !memcmp(&flow->key, key, sizeof key) ? flow : NULL; +} + +static u32 flow_hash0(const struct odp_flow_key *key) +{ + return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa); +} + +static u32 flow_hash1(const struct odp_flow_key *key) +{ + return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555); +} + +static void find_buckets(struct dp_table *table, struct odp_flow_key *key, + struct sw_flow **buckets[2]) +{ + buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key)); + buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key)); +} + +struct sw_flow *dp_table_lookup(struct dp_table *table, + const struct odp_flow_key *key) +{ + struct sw_flow *flow; + flow = lookup_table(table, table->flows[0], flow_hash0(key), key); + if (!flow) + flow = lookup_table(table, table->flows[1], + flow_hash1(key), key); + return flow; +} + +static void dp_table_swap(struct datapath *dp, struct dp_table *new_table, + int free_flows) +{ + struct dp_table *old_table = rcu_dereference(dp->table); + rcu_assign_pointer(dp->table, new_table); + synchronize_rcu(); + dp_table_destroy(old_table, free_flows); +} + +int dp_table_foreach(struct dp_table *table, + int (*callback)(struct sw_flow *flow, void *aux), + void *aux) +{ + unsigned int i, j, k; + for (i = 0; i < 2; i++) { + for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) { + struct sw_flow **l2 = table->flows[i][j]; + for (k = 0; k < DP_L1_SIZE; k++) { + struct sw_flow *flow = rcu_dereference(l2[k]); + if (flow) { + int error = callback(flow, aux); + if (error) + return error; + } + } + } + } + return 0; +} + +static int insert_flow(struct sw_flow *flow, void *new_table_) +{ + struct dp_table *new_table = new_table_; + struct sw_flow **buckets[2]; + int i; + + printk("."); + find_buckets(new_table, &flow->key, buckets); + for (i = 0; i < 2; i++) { + if (!*buckets[i]) { + rcu_assign_pointer(*buckets[i], flow); + return 0; + } + } + WARN_ON_ONCE(1); + return 0; +} + +int dp_table_expand(struct datapath *dp) +{ + struct dp_table *old_table = rcu_dereference(dp->table); + struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2); + if (!new_table) + return -ENOMEM; + dp_table_foreach(old_table, insert_flow, new_table); + dp_table_swap(dp, new_table, 0); + return 0; +} + +int dp_table_flush(struct datapath *dp) +{ + struct dp_table *new_table = dp_table_create(DP_L1_SIZE); + if (!new_table) + return -ENOMEM; + dp_table_swap(dp, new_table, 1); + return 0; +} + +struct sw_flow ** +dp_table_lookup_for_insert(struct dp_table *table, struct sw_flow *target) +{ + struct sw_flow **buckets[2]; + struct sw_flow **empty_bucket = NULL; + int i; + + find_buckets(table, &target->key, buckets); + for (i = 0; i < 2; i++) { + struct sw_flow *f = rcu_dereference(*buckets[i]); + if (f) { + if (!memcmp(&f->key, &target->key, sizeof f->key)) + return buckets[i]; + } else if (!empty_bucket) + empty_bucket = buckets[i]; + } + return empty_bucket; +} + +int dp_table_delete(struct dp_table *table, struct sw_flow *target) +{ + struct sw_flow **buckets[2]; + int i; + + find_buckets(table, &target->key, buckets); + for (i = 0; i < 2; i++) { + struct sw_flow *flow = rcu_dereference(*buckets[i]); + if (flow == target) { + rcu_assign_pointer(*buckets[i], NULL); + return 0; + } + } + return -ENOENT; +} diff --git a/datapath/table.h b/datapath/table.h deleted file mode 100644 index 8be895e4..00000000 --- a/datapath/table.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Individual switching tables. Generally grouped together in a chain (see - * chain.h). */ - -#ifndef TABLE_H -#define TABLE_H 1 - -#include - -struct sw_flow; -struct sw_flow_key; -struct ofp_action_header; -struct datapath; - -/* Table statistics. */ -struct sw_table_stats { - const char *name; /* Human-readable name. */ - uint32_t wildcards; /* Bitmap of OFPFW_* wildcards that are - supported by the table. */ - unsigned int n_flows; /* Number of active flows. */ - unsigned int max_flows; /* Flow capacity. */ - unsigned long int n_lookup; /* Number of packets looked up. */ - unsigned long int n_matched; /* Number of packets that have hit. */ -}; - -/* Position within an iteration of a sw_table. - * - * The contents are private to the table implementation, except that a position - * initialized to all-zero-bits represents the start of a table. */ -struct sw_table_position { - unsigned long private[4]; -}; - -/* A single table of flows. - * - * All functions, except destroy, must be called holding the - * rcu_read_lock. destroy must be fully serialized. - */ -struct sw_table { - /* The number of packets that have been looked up and matched, - * respecitvely. To make these 100% accurate, they should be atomic. - * However, we're primarily concerned about speed. */ - unsigned long long n_lookup; - unsigned long long n_matched; - - /* Searches 'table' for a flow matching 'key', which must not have any - * wildcard fields. Returns the flow if successful, a null pointer - * otherwise. */ - struct sw_flow *(*lookup)(struct sw_table *table, - const struct sw_flow_key *key); - - /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns - * 0 if successful or a negative error. Error can be due to an - * over-capacity table or because the flow is not one of the kind that - * the table accepts. - * - * If successful, 'flow' becomes owned by 'table', otherwise it is - * retained by the caller. */ - int (*insert)(struct sw_table *table, struct sw_flow *flow); - - /* Modifies the actions in 'table' that match 'key'. If 'strict' - * set, wildcards and priority must match. Returns the number of flows - * that were modified. */ - int (*modify)(struct sw_table *table, const struct sw_flow_key *key, - uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len); - - /* Deletes from 'table' any and all flows that match 'key' from - * 'table'. If 'out_port' is not OFPP_NONE, then matching entries - * must have that port as an argument for an output action. If - * 'strict' is set, wildcards and priority must match. Returns the - * number of flows that were deleted. */ - int (*delete)(struct datapath *dp, struct sw_table *table, - const struct sw_flow_key *key, - uint16_t out_port, uint16_t priority, int strict); - - /* Performs timeout processing on all the flow entries in 'table'. - * Returns the number of flow entries deleted through expiration. */ - int (*timeout)(struct datapath *dp, struct sw_table *table); - - /* Destroys 'table', which must not have any users. */ - void (*destroy)(struct sw_table *table); - - /* Iterates through the flow entries in 'table', passing each one - * matches 'key' and output port 'out_port' to 'callback'. The - * callback function should return 0 to continue iteration or a - * nonzero error code to stop. The iterator function returns either - * 0 if the table iteration completed or the value returned by the - * callback function otherwise. - * - * The iteration starts at 'position', which may be initialized to - * all-zero-bits to iterate from the beginning of the table. If the - * iteration terminates due to an error from the callback function, - * 'position' is updated to a value that can be passed back to the - * iterator function to continue iteration later from the same position - * that caused the error (assuming that that flow entry has not been - * deleted in the meantime). */ - int (*iterate)(struct sw_table *table, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *flow, void *private), - void *private); - - /* Dumps statistics for 'table' into 'stats'. */ - void (*stats)(struct sw_table *table, struct sw_table_stats *stats); -}; - -struct sw_table *table_hash_create(unsigned int polynomial, - unsigned int n_buckets); -struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, - unsigned int poly1, unsigned int buckets1); -struct sw_table *table_linear_create(unsigned int max_flows); - -#endif /* table.h */ diff --git a/include/openflow/datapath-protocol.h b/include/openflow/datapath-protocol.h new file mode 100644 index 00000000..da620791 --- /dev/null +++ b/include/openflow/datapath-protocol.h @@ -0,0 +1,320 @@ +/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +/* Protocol between secchan and datapath. */ + +#ifndef OPENFLOW_DATAPATH_PROTOCOL_H +#define OPENFLOW_DATAPATH_PROTOCOL_H 1 + +#include +#include + +#define ODP_MAX 256 /* Maximum number of datapaths. */ + +#define ODP_DP_CREATE _IO('O', 0) +#define ODP_DP_DESTROY _IO('O', 1) +#define ODP_DP_STATS _IOW('O', 2, struct odp_stats) + +#define ODP_GET_DROP_FRAGS _IOW('O', 3, int) +#define ODP_SET_DROP_FRAGS _IOR('O', 4, int) + +#define ODP_GET_LISTEN_MASK _IOW('O', 5, int) +#define ODP_SET_LISTEN_MASK _IOR('O', 6, int) + +#define ODP_PORT_ADD _IOR('O', 7, struct odp_port) +#define ODP_PORT_DEL _IOR('O', 8, int) +#define ODP_PORT_QUERY _IOWR('O', 9, struct odp_port) +#define ODP_PORT_LIST _IOWR('O', 10, struct odp_portvec) + +#define ODP_PORT_GROUP_SET _IOR('O', 11, struct odp_port_group) +#define ODP_PORT_GROUP_GET _IOWR('O', 12, struct odp_port_group) + +#define ODP_FLOW_FLUSH _IO('O', 13) +#define ODP_FLOW_ADD _IOR('O', 14, struct odp_flow) +#define ODP_FLOW_SET_ACTS _IOR('O', 15, struct odp_flow) +#define ODP_FLOW_DEL _IOWR('O', 16, struct odp_flow) +#define ODP_FLOW_QUERY _IOWR('O', 17, struct odp_flow) +#define ODP_FLOW_QUERY_MULTIPLE _IOWR('O', 18, struct odp_flowvec) +#define ODP_FLOW_LIST _IOWR('O', 19, struct odp_flowvec) + +#define ODP_EXECUTE _IOR('O', 20, struct odp_execute) + +#define ODP_SNAT_ADD_PORT _IOR('O', 21, struct odp_snat_config) +#define ODP_SNAT_DEL_PORT _IOR('O', 22, int) + +struct odp_stats { + /* Flows. */ + __u32 n_flows; /* Number of flows in flow table. */ + __u32 cur_capacity; /* Current flow table capacity. */ + __u32 max_capacity; /* Maximum expansion of flow table capacity. */ + + /* Ports. */ + __u32 n_ports; /* Current number of ports. */ + __u32 max_ports; /* Maximum supported number of ports. */ + __u16 max_groups; /* Maximum number of port groups. */ + __u16 reserved; + + /* Lookups. */ + __u64 n_frags; /* Number of dropped IP fragments. */ + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ +}; + +/* Logical ports. */ +#define ODPP_LOCAL ((__u16)0) +#define ODPP_NONE ((__u16)-1) + +/* Listening channels. */ +#define _ODPL_MISS_NR 0 /* Packet missed in flow table. */ +#define ODPL_MISS (1 << _ODPL_MISS_NR) +#define _ODPL_ACTION_NR 1 /* Packet output to ODPP_CONTROLLER. */ +#define ODPL_ACTION (1 << _ODPL_ACTION_NR) +#define ODPL_ALL (ODPL_MISS | ODPL_ACTION) + +/* Format of messages read from datapath fd. */ +struct odp_msg { + __u32 type; /* _ODPL_MISS_NR or _ODPL_ACTION_NR. */ + __u32 length; /* Message length, including header. */ + __u16 port; /* Port on which frame was received. */ + __u16 reserved; + __u32 arg; /* Argument value specified in action. */ + /* Followed by packet data. */ +}; + +struct odp_port { + char devname[16]; /* IFNAMSIZ */ + __u16 port; + __u16 reserved1; + __u32 reserved2; +}; + +struct odp_portvec { + struct odp_port *ports; + int n_ports; +}; + +struct odp_port_group { + __u16 *ports; + __u16 n_ports; /* Number of ports. */ + __u16 group; /* Group number. */ +}; + +struct odp_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ + __u64 used_sec; /* Time last used. */ + __u32 used_nsec; + __u8 tcp_flags; + __u8 ip_tos; + __u16 reserved; +}; + +struct odp_flow_key { + __be32 nw_src; /* IP source address. */ + __be32 nw_dst; /* IP destination address. */ + __u16 in_port; /* Input switch port. */ + __be16 dl_vlan; /* Input VLAN. */ + __be16 dl_type; /* Ethernet frame type. */ + __be16 tp_src; /* TCP/UDP source port. */ + __be16 tp_dst; /* TCP/UDP destination port. */ + __u8 dl_src[ETH_ALEN]; /* Ethernet source address. */ + __u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */ + __u8 nw_proto; /* IP protocol. */ + __u8 reserved; /* Pad to 64 bits. */ +}; + +struct odp_flow { + struct odp_flow_stats stats; + struct odp_flow_key key; + union odp_action *actions; + __u32 n_actions; +}; + +struct odp_flowvec { + struct odp_flow *flows; + int n_flows; +}; + +/* The VLAN id is 12 bits, so we can use the entire 16 bits to indicate + * special conditions. All ones is used to match that no VLAN id was + * set. */ +#define ODP_VLAN_NONE 0xffff + +/* Action types. */ +#define ODPAT_OUTPUT 0 /* Output to switch port. */ +#define ODPAT_OUTPUT_GROUP 1 /* Output to all ports in group. */ +#define ODPAT_CONTROLLER 2 /* Send copy to controller. */ +#define ODPAT_SET_VLAN_VID 3 /* Set the 802.1q VLAN id. */ +#define ODPAT_SET_VLAN_PCP 4 /* Set the 802.1q priority. */ +#define ODPAT_STRIP_VLAN 5 /* Strip the 802.1q header. */ +#define ODPAT_SET_DL_SRC 6 /* Ethernet source address. */ +#define ODPAT_SET_DL_DST 7 /* Ethernet destination address. */ +#define ODPAT_SET_NW_SRC 8 /* IP source address. */ +#define ODPAT_SET_NW_DST 9 /* IP destination address. */ +#define ODPAT_SET_TP_SRC 10 /* TCP/UDP source port. */ +#define ODPAT_SET_TP_DST 11 /* TCP/UDP destination port. */ +#define ODPAT_SNAT 12 /* Source NAT. */ +#define ODPAT_N_ACTIONS 13 + +struct odp_action_output { + __u16 type; /* ODPAT_OUTPUT. */ + __u16 port; /* Output port. */ + __u16 reserved1; + __u16 reserved2; +}; + +struct odp_action_output_group { + __u16 type; /* ODPAT_OUTPUT_GROUP. */ + __u16 group; /* Group number. */ + __u16 reserved1; + __u16 reserved2; +}; + +struct odp_action_controller { + __u16 type; /* ODPAT_OUTPUT_CONTROLLER. */ + __u16 reserved; + __u32 arg; /* Copied to struct odp_msg 'arg' member. */ +}; + +/* Action structure for ODPAT_SET_VLAN_VID. */ +struct odp_action_vlan_vid { + __u16 type; /* ODPAT_SET_VLAN_VID. */ + __be16 vlan_vid; /* VLAN id. */ + __u16 reserved1; + __u16 reserved2; +}; + +/* Action structure for ODPAT_SET_VLAN_PCP. */ +struct odp_action_vlan_pcp { + __u16 type; /* ODPAT_SET_VLAN_PCP. */ + __u8 vlan_pcp; /* VLAN priority. */ + __u8 reserved1; + __u16 reserved2; + __u16 reserved3; +}; + +/* Action structure for ODPAT_SET_DL_SRC/DST. */ +struct odp_action_dl_addr { + __u16 type; /* ODPAT_SET_DL_SRC/DST. */ + __u8 dl_addr[ETH_ALEN]; /* Ethernet address. */ +}; + +/* Action structure for ODPAT_SET_NW_SRC/DST. */ +struct odp_action_nw_addr { + __u16 type; /* ODPAT_SET_TW_SRC/DST. */ + __u16 reserved; + __be32 nw_addr; /* IP address. */ +}; + +/* Action structure for ODPAT_SET_TP_SRC/DST. */ +struct odp_action_tp_port { + __u16 type; /* ODPAT_SET_TP_SRC/DST. */ + __be16 tp_port; /* TCP/UDP port. */ + __u16 reserved1; + __u16 reserved2; +}; + +struct odp_action_snat { + __u16 type; /* ODPAT_SNAT. */ + __u16 port; /* Output port. */ + __u16 reserved1; + __u16 reserved2; +}; + +union odp_action { + __u16 type; + struct odp_action_output output; + struct odp_action_output_group output_group; + struct odp_action_controller controller; + struct odp_action_vlan_vid vlan_vid; + struct odp_action_vlan_pcp vlan_pcp; + struct odp_action_dl_addr dl_addr; + struct odp_action_nw_addr nw_addr; + struct odp_action_tp_port tp_port; + struct odp_action_snat snat; +}; + +struct odp_execute { + __u16 in_port; + __u16 reserved1; + __u32 reserved2; + + union odp_action *actions; + __u32 n_actions; + + const void *data; + __u32 length; +}; + +/* Values below this cutoff are 802.3 packets and the two bytes + * following MAC addresses are used as a frame length. Otherwise, the + * two bytes are used as the Ethernet type. + */ +#define ODP_DL_TYPE_ETH2_CUTOFF 0x0600 + +/* Value of dl_type to indicate that the frame does not include an + * Ethernet type. + */ +#define ODP_DL_TYPE_NOT_ETH_TYPE 0x05ff + +/* The VLAN id is 12-bits, so we can use the entire 16 bits to indicate + * special conditions. All ones indicates that no VLAN id was set. + */ +#define ODP_VLAN_NONE 0xffff + +/* Configuration for source-NATing */ +struct odp_snat_config { + __u16 port; + + /* Time to cache MAC addresses of SNAT'd hosts in seconds (0=default). */ + __u16 mac_timeout; + + /* Range of IP addresses to impersonate. Set both values to the same to + * support a single address. */ + __be32 ip_start, ip_end; + + /* Range of transport ports that should be used as new source port. A + * value of zero lets the kernel choose. */ + __be16 tcp_start, tcp_end; + __be16 udp_start, udp_end; + + /* MAC address to use for ARP requests for a SNAT IP address that comes in + * on a different interface than 'port'. A value of all zeros silently + * drops those ARP requests. Requests that arrive on 'port' get a response + * with the mac address of the datapath device. */ + __u8 mac_addr[ETH_ALEN]; + __u16 reserved; +}; + +#endif /* openflow/datapath-protocol.h */ diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index b6f6487a..a836ba7c 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2008 Nicira Networks + * Copyright (c) 2008, 2009 Nicira Networks */ #ifndef OPENFLOW_NICIRA_EXT_H @@ -41,13 +41,6 @@ enum nicira_type { /* Remote command execution reply, sent when the command's execution * completes. The reply body is struct nx_command_reply. */ NXT_COMMAND_REPLY, - - /* Configure whether Flow End messages should be sent. */ - NXT_FLOW_END_CONFIG, - - /* Sent by switch when a flow ends. These messages are turned into - * ofp_flow_expired and NetFlow messages in user-space. */ - NXT_FLOW_END }; struct nicira_header { @@ -153,40 +146,4 @@ struct nx_command_reply { }; OFP_ASSERT(sizeof(struct nx_command_reply) == 20); -enum nx_flow_end_reason { - NXFER_IDLE_TIMEOUT, /* Flow idle time exceeded idle_timeout. */ - NXFER_HARD_TIMEOUT, /* Time exceeded hard_timeout. */ - NXFER_DELETE, /* Flow was removed by delete command. */ - NXFER_EJECT /* Flow was ejected. */ -}; - -struct nx_flow_end_config { - struct nicira_header header; - uint8_t enable; /* Set to 1 to enable Flow End message - generation. 0 to disable. */ - uint8_t pad[3]; -}; -OFP_ASSERT(sizeof(struct nx_flow_end_config) == 20); - -struct nx_flow_end { - struct nicira_header header; - struct ofp_match match; /* Description of fields. */ - - uint16_t priority; /* Priority level of flow entry. */ - uint8_t reason; /* One of NXFER_*. */ - - uint8_t tcp_flags; /* Union of seen TCP flags. */ - uint8_t ip_tos; /* IP TOS value. */ - - uint8_t pad[7]; /* Align to 64-bits. */ - - uint64_t init_time; /* Time flow started in milliseconds. */ - uint64_t used_time; /* Time entry was last used in milliseconds. */ - uint64_t end_time; /* Time flow ended in milliseconds. */ - - uint64_t packet_count; - uint64_t byte_count; -}; -OFP_ASSERT(sizeof(struct nx_flow_end) == 104); - #endif /* openflow/nicira-ext.h */ diff --git a/include/openflow/openflow-netlink.h b/include/openflow/openflow-netlink.h index 931e6972..18b2b139 100644 --- a/include/openflow/openflow-netlink.h +++ b/include/openflow/openflow-netlink.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -63,7 +63,4 @@ enum dp_genl_command { DP_GENL_C_MAX = __DP_GENL_C_MAX - 1 }; -/* Maximum number of datapaths. */ -#define DP_MAX 256 - #endif /* openflow/openflow-netlink.h */ diff --git a/lib/automake.mk b/lib/automake.mk index 0c51e0cf..1c2ed292 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -104,8 +104,7 @@ lib_libopenflow_a_SOURCES += \ lib/dpif.h \ lib/netlink-protocol.h \ lib/netlink.c \ - lib/netlink.h \ - lib/vconn-netlink.c + lib/netlink.h endif if HAVE_OPENSSL diff --git a/lib/classifier.c b/lib/classifier.c index f10718a0..8172f626 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -41,17 +41,17 @@ const struct cls_field cls_fields[CLS_N_FIELDS + 1] = { #define CLS_FIELD(WILDCARDS, MEMBER, NAME) \ - { offsetof(struct flow, MEMBER), \ - sizeof ((struct flow *)0)->MEMBER, \ + { offsetof(flow_t, MEMBER), \ + sizeof ((flow_t *)0)->MEMBER, \ WILDCARDS, \ - #NAME }, \ + #NAME }, CLS_FIELDS #undef CLS_FIELD - { sizeof(struct flow), 0, 0, "exact" }, + { sizeof(flow_t), 0, 0, "exact" }, }; -static uint32_t hash_fields(const struct flow *, int table_idx); -static bool equal_fields(const struct flow *, const struct flow *, int table_idx); +static uint32_t hash_fields(const flow_t *, int table_idx); +static bool equal_fields(const flow_t *, const flow_t *, int table_idx); static int table_idx_from_wildcards(uint32_t wildcards); static struct cls_rule *table_insert(struct hmap *, struct cls_rule *); @@ -62,7 +62,7 @@ static struct cls_bucket *find_bucket(struct hmap *, size_t hash, static struct cls_rule *search_table(const struct hmap *table, int field_idx, const struct cls_rule *); static struct cls_rule *search_exact_table(const struct classifier *, - size_t hash, const struct flow *); + size_t hash, const flow_t *); static bool rules_match_1wild(const struct cls_rule *fixed, const struct cls_rule *wild, int field_idx); @@ -71,7 +71,7 @@ static bool rules_match_1wild(const struct cls_rule *fixed, * * Rules without wildcards always have the maximum priority 65535. */ void -cls_rule_from_flow(struct cls_rule *rule, const struct flow *flow, +cls_rule_from_flow(struct cls_rule *rule, const flow_t *flow, uint32_t wildcards, uint16_t priority) { assert(flow->reserved == 0); @@ -270,7 +270,7 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule) * rules added more recently take priority over rules added less recently, but * this is subject to change and should not be depended upon.) */ struct cls_rule * -classifier_lookup(const struct classifier *cls, const struct flow *flow) +classifier_lookup(const struct classifier *cls, const flow_t *flow) { struct cls_rule *best = NULL; if (!hmap_is_empty(&cls->exact_table)) { @@ -293,7 +293,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow) struct cls_rule * classifier_find_rule_exactly(const struct classifier *cls, - const struct flow *target, uint32_t wildcards, + const flow_t *target, uint32_t wildcards, uint16_t priority) { struct cls_bucket *bucket; @@ -449,7 +449,7 @@ classifier_for_each_with_wildcards(const struct classifier *cls, } static struct cls_bucket *create_bucket(struct hmap *, size_t hash, - const struct flow *fixed); + const flow_t *fixed); static struct cls_rule *bucket_insert(struct cls_bucket *, struct cls_rule *); static inline bool equal_bytes(const void *, const void *, size_t n); @@ -458,7 +458,7 @@ static inline bool equal_bytes(const void *, const void *, size_t n); * (CLS_F_IDX_*) are less than 'table_idx'. (If 'table_idx' is * CLS_F_IDX_EXACT, hashes all the fields in 'flow'). */ static uint32_t -hash_fields(const struct flow *flow, int table_idx) +hash_fields(const flow_t *flow, int table_idx) { /* I just know I'm going to hell for writing code this way. * @@ -524,7 +524,7 @@ finish: * * Returns true if all the compared fields are equal, false otherwise. */ static bool -equal_fields(const struct flow *a, const struct flow *b, int table_idx) +equal_fields(const flow_t *a, const flow_t *b, int table_idx) { /* XXX The generated code could be better here. */ #define CLS_FIELD(WILDCARDS, MEMBER, NAME) \ @@ -629,7 +629,7 @@ find_bucket(struct hmap *table, size_t hash, const struct cls_rule *rule) /* Creates a bucket and inserts it in 'table' with the given 'hash' and 'fixed' * values. Returns the new bucket. */ static struct cls_bucket * -create_bucket(struct hmap *table, size_t hash, const struct flow *fixed) +create_bucket(struct hmap *table, size_t hash, const flow_t *fixed) { struct cls_bucket *bucket = xmalloc(sizeof *bucket); list_init(&bucket->rules); @@ -690,7 +690,7 @@ read_uint32(const void *p) * The compared field is the one with wildcard bit or bits 'field_wc', offset * 'rule_ofs' within cls_rule's "fields" member, and length 'len', in bytes. */ static inline bool ALWAYS_INLINE -field_matches(const struct flow *a_, const struct flow *b_, +field_matches(const flow_t *a_, const flow_t *b_, uint32_t wildcards, uint32_t nw_src_mask, uint32_t nw_dst_mask, uint32_t field_wc, int ofs, int len) { @@ -731,7 +731,7 @@ rules_match(const struct cls_rule *a, const struct cls_rule *b, case CLS_F_IDX_##NAME: \ if (!field_matches(&a->flow, &b->flow, \ wildcards, nw_src_mask, nw_dst_mask, \ - WILDCARDS, offsetof(struct flow, MEMBER), \ + WILDCARDS, offsetof(flow_t, MEMBER), \ sizeof a->flow.MEMBER)) { \ return false; \ } \ @@ -811,7 +811,7 @@ search_table(const struct hmap *table, int field_idx, static struct cls_rule * search_exact_table(const struct classifier *cls, size_t hash, - const struct flow *target) + const flow_t *target) { struct cls_rule *rule; diff --git a/lib/classifier.h b/lib/classifier.h index f374d941..1a9125d4 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -103,7 +103,7 @@ struct classifier { struct cls_bucket { struct hmap_node hmap_node; /* Within struct classifier 'tables'. */ struct list rules; /* In order from highest to lowest priority. */ - struct flow fixed; /* Values for fixed fields. */ + flow_t fixed; /* Values for fixed fields. */ }; /* A flow classification rule. @@ -116,14 +116,14 @@ struct cls_rule { struct list list; /* Within struct cls_bucket 'rules'. */ struct hmap_node hmap; /* Within struct classifier 'exact_table'. */ } node; - struct flow flow; /* All field values. */ + flow_t flow; /* All field values. */ struct flow_wildcards wc; /* Wildcards for fields. */ uint16_t priority; /* Larger numbers are higher priorities. */ unsigned short table_idx; /* Index into struct classifier 'tables'. */ }; -void cls_rule_from_flow(struct cls_rule *, const struct flow *, - uint32_t wildcards, uint16_t priority); +void cls_rule_from_flow(struct cls_rule *, const flow_t *, uint32_t wildcards, + uint16_t priority); void cls_rule_from_match(struct cls_rule *, const struct ofp_match *, uint16_t priority); void cls_rule_print(const struct cls_rule *); @@ -139,8 +139,7 @@ int classifier_count(const struct classifier *); int classifier_count_exact(const struct classifier *); struct cls_rule *classifier_insert(struct classifier *, struct cls_rule *); void classifier_remove(struct classifier *, struct cls_rule *); -struct cls_rule *classifier_lookup(const struct classifier *, - const struct flow *); +struct cls_rule *classifier_lookup(const struct classifier *, const flow_t *); typedef void cls_cb_func(struct cls_rule *, void *aux); void classifier_for_each(const struct classifier *, cls_cb_func *, void *aux); @@ -157,7 +156,7 @@ void classifier_for_each_match(const struct classifier *, int include, cls_cb_func *, void *aux); struct cls_rule *classifier_find_rule_exactly(const struct classifier *, - const struct flow *target, + const flow_t *target, uint32_t wildcards, uint16_t priority); diff --git a/lib/dhcp-client.c b/lib/dhcp-client.c index 5cbd2971..d056feaf 100644 --- a/lib/dhcp-client.c +++ b/lib/dhcp-client.c @@ -928,7 +928,7 @@ do_receive_msg(struct dhclient *cli, struct dhcp_msg *msg) for (; cli->received < 50; cli->received++) { const struct ip_header *ip; const struct dhcp_header *dhcp; - struct flow flow; + flow_t flow; int error; ofpbuf_clear(&b); diff --git a/lib/dpif.c b/lib/dpif.c index 3e4c3ce7..0c94de41 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1,6 +1,6 @@ /* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University - * + * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute @@ -13,10 +13,10 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -25,7 +25,7 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * + * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. @@ -37,356 +37,907 @@ #include #include #include +#include #include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "flow.h" #include "netlink.h" -#include "netlink-protocol.h" +#include "ofp-print.h" #include "ofpbuf.h" -#include "openflow/openflow-netlink.h" -#include "openflow/openflow.h" #include "packets.h" +#include "poll-loop.h" #include "util.h" -#include "xtoxll.h" #include "vlog.h" #define THIS_MODULE VLM_dpif -/* Not really much point in logging many dpif errors. */ -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); +/* Rate limit for individual messages going to or from the datapath, output at + * DBG level. This is very high because, if these are enabled, it is because + * we really need to see them. */ +static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); -/* The Generic Netlink family number used for OpenFlow. */ -static int openflow_family; +/* Not really much point in logging many dpif errors. */ +static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); -static int lookup_openflow_multicast_group(int dp_idx, int *multicast_group); -static int send_mgmt_command(struct dpif *, int dp_idx, int command, - const char *netdev); +static int get_minor_from_name(const char *name, unsigned int *minor); +static int name_to_minor(const char *name, unsigned int *minor); +static int lookup_minor(const char *name, unsigned int *minor); +static int open_by_minor(unsigned int minor, struct dpif *); +static int make_openflow_device(unsigned int minor, char **fnp); +static char *odp_actions_to_string(const union odp_action actions[], + size_t n_actions); -/* Opens a socket for a local datapath, initializing 'dp'. If - * 'subscribe_dp_idx' is nonnegative, listens for asynchronous messages - * (packet-in, etc.) from the datapath with that number; otherwise, 'dp' will - * receive only replies to explicitly initiated requests. */ int -dpif_open(int subscribe_dp_idx, struct dpif *dp) +dpif_open(const char *name, struct dpif *dpif) { - struct nl_sock *sock; - int multicast_group = 0; - int retval; + unsigned int minor; + int listen_mask; + int error; - retval = nl_lookup_genl_family(DP_GENL_FAMILY_NAME, &openflow_family); - if (retval) { - return retval; - } + dpif->fd = -1; - if (subscribe_dp_idx >= 0) { - retval = lookup_openflow_multicast_group(subscribe_dp_idx, - &multicast_group); - if (retval) { - return retval; - } + error = name_to_minor(name, &minor); + if (error) { + return error; } - /* Specify a large so_rcvbuf size because we occasionally need to be able - * to retrieve large collections of flow records. */ - retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, - 4 * 1024u * 1024, &sock); - if (retval) { - return retval; + error = open_by_minor(minor, dpif); + if (error) { + return error; } - dp->sock = sock; + /* We can open the device, but that doesn't mean that it's been created. + * If it hasn't been, then any command other than ODP_DP_CREATE will + * return ENODEV. Try something innocuous. */ + if (ioctl(dpif->fd, ODP_GET_LISTEN_MASK, &listen_mask)) { + error = errno; + if (error != ENODEV) { + VLOG_WARN("dp%u: probe returned unexpected error: %s", + minor, strerror(error)); + } + dpif_close(dpif); + return error; + } return 0; } -/* Closes 'dp'. */ void -dpif_close(struct dpif *dp) +dpif_close(struct dpif *dpif) { - if (dp) { - nl_sock_destroy(dp->sock); + if (dpif) { + close(dpif->fd); + dpif->fd = -1; } } -static const struct nl_policy openflow_policy[] = { - [DP_GENL_A_DP_IDX] = { .type = NL_A_U32, - .optional = false }, - [DP_GENL_A_OPENFLOW] = { .type = NL_A_UNSPEC, - .min_len = sizeof(struct ofp_header), - .max_len = 65535, - .optional = false }, -}; +#define IOCTL(DPIF, CMD, ARG) do_ioctl(DPIF, CMD, #CMD, ARG) + +static int +do_ioctl(const struct dpif *dpif, int cmd, const char *cmd_name, + const void *arg) +{ + if (ioctl(dpif->fd, cmd, arg)) { + VLOG_WARN_RL(&error_rl, "dp%u: ioctl(%s) failed (%s)", + dpif->minor, cmd_name, strerror(errno)); + return errno; + } else { + VLOG_DBG_RL(&dpmsg_rl, "dp%u: ioctl(%s): success", + dpif->minor, cmd_name); + return 0; + } +} -/* Tries to receive an openflow message from datapath 'dp_idx' on 'sock'. If - * successful, stores the received message into '*msgp' and returns 0. The - * caller is responsible for destroying the message with ofpbuf_delete(). On - * failure, returns a positive errno value and stores a null pointer into - * '*msgp'. - * - * Only Netlink messages with embedded OpenFlow messages are accepted. Other - * Netlink messages provoke errors. - * - * If 'wait' is true, dpif_recv_openflow waits for a message to be ready; - * otherwise, returns EAGAIN if the 'sock' receive buffer is empty. */ int -dpif_recv_openflow(struct dpif *dp, int dp_idx, struct ofpbuf **bufferp, - bool wait) +dpif_create(const char *name, struct dpif *dpif) { - struct nlattr *attrs[ARRAY_SIZE(openflow_policy)]; - struct ofpbuf *buffer; - struct ofp_header *oh; - uint16_t ofp_len; + unsigned int minor; + int error; + + if (!get_minor_from_name(name, &minor)) { + /* Minor was specified in 'name', go ahead and create it. */ + dpif->fd = -1; + error = open_by_minor(minor, dpif); + if (error) { + return error; + } - buffer = *bufferp = NULL; - do { - int retval; - - do { - ofpbuf_delete(buffer); - retval = nl_sock_recv(dp->sock, &buffer, wait); - } while (retval == ENOBUFS - || (!retval - && (nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE - || nl_msg_nlmsgerr(buffer, NULL)))); - if (retval) { - if (retval != EAGAIN) { - VLOG_WARN_RL(&rl, "dpif_recv_openflow: %s", strerror(retval)); - } - return retval; + if (!strncmp(name, "nl:", 3)) { + char devname[128]; + sprintf(devname, "of%u", minor); + error = ioctl(dpif->fd, ODP_DP_CREATE, devname) < 0 ? errno : 0; + } else { + error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0; + } + if (error) { + dpif_close(dpif); } + return error; + } else { + for (minor = 0; minor < ODP_MAX; minor++) { + error = open_by_minor(minor, dpif); + if (error) { + return error; + } - if (nl_msg_genlmsghdr(buffer) == NULL) { - VLOG_DBG_RL(&rl, "received packet too short for Generic Netlink"); - goto error; + error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0; + if (!error) { + return 0; + } else if (error != EEXIST) { + dpif_close(dpif); + return error; + } } - if (nl_msg_nlmsghdr(buffer)->nlmsg_type != openflow_family) { - VLOG_DBG_RL(&rl, - "received type (%"PRIu16") != openflow family (%d)", - nl_msg_nlmsghdr(buffer)->nlmsg_type, openflow_family); + return ENOBUFS; + } +} + +int +dpif_delete(struct dpif *dpif) +{ + return IOCTL(dpif, ODP_DP_DESTROY, NULL); +} + +int +dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats) +{ + memset(stats, 0, sizeof *stats); + return IOCTL(dpif, ODP_DP_STATS, stats); +} + +int +dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags) +{ + int tmp; + int error = IOCTL(dpif, ODP_GET_DROP_FRAGS, &tmp); + *drop_frags = error ? tmp & 1 : false; + return error; +} + +int +dpif_set_drop_frags(struct dpif *dpif, bool drop_frags) +{ + int tmp = drop_frags; + return IOCTL(dpif, ODP_SET_DROP_FRAGS, &tmp); +} + +int +dpif_get_listen_mask(const struct dpif *dpif, int *listen_mask) +{ + int error = IOCTL(dpif, ODP_GET_LISTEN_MASK, listen_mask); + if (error) { + *listen_mask = 0; + } + return error; +} + +int +dpif_set_listen_mask(struct dpif *dpif, int listen_mask) +{ + return IOCTL(dpif, ODP_SET_LISTEN_MASK, &listen_mask); +} + +int +dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no) +{ + struct odp_port port; + + memset(&port, 0, sizeof port); + strncpy(port.devname, devname, sizeof port.devname); + port.port = port_no; + return IOCTL(dpif, ODP_PORT_ADD, &port); +} + +int +dpif_port_del(struct dpif *dpif, uint16_t port_no) +{ + int tmp = port_no; + return IOCTL(dpif, ODP_PORT_DEL, &tmp); +} + +int +dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no, + struct odp_port *port) +{ + memset(port, 0, sizeof *port); + port->port = port_no; + return IOCTL(dpif, ODP_PORT_QUERY, port); +} + +int +dpif_port_query_by_name(const struct dpif *dpif, const char *devname, + struct odp_port *port) +{ + memset(port, 0, sizeof *port); + strncpy(port->devname, devname, sizeof port->devname); + return IOCTL(dpif, ODP_PORT_QUERY, port); +} + +int +dpif_port_list(const struct dpif *dpif, + struct odp_port **ports, size_t *n_ports) +{ + struct odp_portvec pv; + struct odp_stats stats; + int error; + + do { + error = dpif_get_dp_stats(dpif, &stats); + if (error) { goto error; } - if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, - openflow_policy, attrs, - ARRAY_SIZE(openflow_policy))) { + *ports = xcalloc(1, stats.n_ports * sizeof **ports); + pv.ports = *ports; + pv.n_ports = stats.n_ports; + error = IOCTL(dpif, ODP_PORT_LIST, &pv); + if (error) { + free(*ports); goto error; } - } while (nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]) != dp_idx); - - oh = buffer->data = (void *) nl_attr_get(attrs[DP_GENL_A_OPENFLOW]); - buffer->size = nl_attr_get_size(attrs[DP_GENL_A_OPENFLOW]); - ofp_len = ntohs(oh->length); - if (ofp_len != buffer->size) { - VLOG_WARN_RL(&rl, - "ofp_header.length %"PRIu16" != attribute length %zu\n", - ofp_len, buffer->size); - buffer->size = MIN(ofp_len, buffer->size); - } - *bufferp = buffer; + } while (pv.n_ports != stats.n_ports); + *n_ports = pv.n_ports; return 0; error: - ofpbuf_delete(buffer); - return EPROTO; + *ports = NULL; + *n_ports = 0; + return error; } -/* Encapsulates 'msg', which must contain an OpenFlow message, in a Netlink - * message, and sends it to the OpenFlow local datapath numbered 'dp_idx' via - * 'sock'. - * - * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN - * if the 'sock' send buffer is full. - * - * If the send is successful, then the kernel module will receive it, but there - * is no guarantee that any reply will not be dropped (see nl_sock_transact() - * for details). - */ int -dpif_send_openflow(struct dpif *dp, int dp_idx, struct ofpbuf *buffer) -{ - struct ofp_header *oh; - unsigned int dump_flag; - struct ofpbuf hdr; - struct nlattr *nla; - uint32_t fixed_buffer[64 / 4]; - struct iovec iov[3]; - int pad_bytes; - int n_iov; - int retval; +dpif_port_group_set(struct dpif *dpif, uint16_t group, + const uint16_t ports[], size_t n_ports) +{ + struct odp_port_group pg; + + assert(n_ports <= UINT16_MAX); + pg.group = group; + pg.ports = (uint16_t *) ports; + pg.n_ports = n_ports; + return IOCTL(dpif, ODP_PORT_GROUP_SET, &pg); +} + +/* Careful: '*n_out' can be greater than 'n_ports' on return, if 'n_ports' is + * less than the number of ports in 'group'. */ +int +dpif_port_group_get(const struct dpif *dpif, uint16_t group, + uint16_t ports[], size_t n_ports, size_t *n_out) +{ + struct odp_port_group pg; + int error; + + assert(n_ports <= UINT16_MAX); + pg.group = group; + pg.ports = ports; + pg.n_ports = n_ports; + error = IOCTL(dpif, ODP_PORT_GROUP_SET, &pg); + *n_out = error ? 0 : pg.n_ports; + return error; +} + +int +dpif_flow_flush(struct dpif *dpif) +{ + return IOCTL(dpif, ODP_FLOW_FLUSH, NULL); +} + +int +dpif_flow_add(struct dpif *dpif, struct odp_flow *flow) +{ + if (VLOG_IS_DBG_ENABLED()) { + char *actions_string = odp_actions_to_string(flow->actions, + flow->n_actions); + char *flow_string = flow_to_string(&flow->key); + VLOG_DBG("adding flow %s with actions %s", + flow_string, actions_string); + free(flow_string); + free(actions_string); + } + return IOCTL(dpif, ODP_FLOW_ADD, flow); +} + +int +dpif_flow_set_actions(struct dpif *dpif, const struct odp_flow_key *key, + const union odp_action *actions, size_t n_actions) +{ + struct odp_flow flow; + + flow.key = *key; + flow.actions = (union odp_action *) actions; + flow.n_actions = n_actions; + return IOCTL(dpif, ODP_FLOW_SET_ACTS, &flow); +} + +int +dpif_flow_del(struct dpif *dpif, struct odp_flow *flow) +{ + return IOCTL(dpif, ODP_FLOW_DEL, flow); +} - /* The reply to OFPT_STATS_REQUEST may be multiple segments long, so we - * need to specify NLM_F_DUMP in the request. */ - oh = ofpbuf_at_assert(buffer, 0, sizeof *oh); - dump_flag = oh->type == OFPT_STATS_REQUEST ? NLM_F_DUMP : 0; - - ofpbuf_use(&hdr, fixed_buffer, sizeof fixed_buffer); - nl_msg_put_genlmsghdr(&hdr, dp->sock, 32, openflow_family, - NLM_F_REQUEST | dump_flag, DP_GENL_C_OPENFLOW, 1); - nl_msg_put_u32(&hdr, DP_GENL_A_DP_IDX, dp_idx); - nla = ofpbuf_put_uninit(&hdr, sizeof *nla); - nla->nla_len = sizeof *nla + buffer->size; - nla->nla_type = DP_GENL_A_OPENFLOW; - pad_bytes = NLA_ALIGN(nla->nla_len) - nla->nla_len; - nl_msg_nlmsghdr(&hdr)->nlmsg_len = hdr.size + buffer->size + pad_bytes; - n_iov = 2; - iov[0].iov_base = hdr.data; - iov[0].iov_len = hdr.size; - iov[1].iov_base = buffer->data; - iov[1].iov_len = buffer->size; - if (pad_bytes) { - static char zeros[NLA_ALIGNTO]; - n_iov++; - iov[2].iov_base = zeros; - iov[2].iov_len = pad_bytes; - } - retval = nl_sock_sendv(dp->sock, iov, n_iov, false); - if (retval && retval != EAGAIN) { - VLOG_WARN_RL(&rl, "dpif_send_openflow: %s", strerror(retval)); - } - return retval; -} - -/* Creates local datapath numbered 'dp_idx' with the name 'dp_name'. A - * 'dp_idx' of -1 or null 'dp_name' will have the kernel module choose values. - * (At least one or the other must be provided, however, so that the caller can - * identify the datapath that was created.) Returns 0 if successful, otherwise - * a positive errno value. */ int -dpif_add_dp(struct dpif *dp, int dp_idx, const char *dp_name) +dpif_flow_query(const struct dpif *dpif, struct odp_flow *flow) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_DP, dp_name); + return IOCTL(dpif, ODP_FLOW_QUERY, flow); } -/* Destroys a local datapath. If 'dp_idx' is not -1, destroys the datapath - * with that number; if 'dp_name' is not NULL, destroys the datapath with that - * name. Exactly one of 'dp_idx' and 'dp_name' should be used. Returns 0 if - * successful, otherwise a positive errno value. */ int -dpif_del_dp(struct dpif *dp, int dp_idx, const char *dp_name) +dpif_flow_query_multiple(const struct dpif *dpif, + struct odp_flow flows[], size_t n) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_DP, dp_name); + struct odp_flowvec fv; + fv.flows = flows; + fv.n_flows = n; + return IOCTL(dpif, ODP_FLOW_QUERY_MULTIPLE, &fv); } -/* Adds the Ethernet device named 'netdev' to the local datapath numbered - * 'dp_idx'. Returns 0 if successful, otherwise a positive errno value. */ int -dpif_add_port(struct dpif *dp, int dp_idx, const char *netdev) +dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n, + size_t *n_out) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_PORT, netdev); + struct odp_flowvec fv; + uint32_t i; + int error; + + fv.flows = flows; + fv.n_flows = n; + for (i = 0; i < n; i++) { + flows[i].actions = NULL; + flows[i].n_actions = 0; + } + error = IOCTL(dpif, ODP_FLOW_LIST, &fv); + *n_out = error ? 0 : fv.n_flows; + return error; } -/* Removes the Ethernet device named 'netdev' from the local datapath numbered - * 'dp_idx'. Returns 0 if successful, otherwise a positive errno value. */ int -dpif_del_port(struct dpif *dp, int dp_idx, const char *netdev) +dpif_flow_list_all(const struct dpif *dpif, + struct odp_flow **flowsp, size_t *np) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_PORT, netdev); + struct odp_stats stats; + struct odp_flow *flows; + size_t n_flows; + int error; + + *flowsp = NULL; + *np = 0; + + error = dpif_get_dp_stats(dpif, &stats); + if (error) { + return error; + } + + flows = xmalloc(sizeof *flows * stats.n_flows); + error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows); + if (error) { + free(flows); + return error; + } + + if (stats.n_flows != n_flows) { + VLOG_WARN_RL(&error_rl, "dp%u: datapath stats reported %"PRIu32" " + "flows but flow listing reported %zu", + dpif->minor, stats.n_flows, n_flows); + } + *flowsp = flows; + *np = n_flows; + return 0; +} + +int +dpif_execute(struct dpif *dpif, uint16_t in_port, + const union odp_action actions[], size_t n_actions, + const struct ofpbuf *buf) +{ + struct odp_execute execute; + memset(&execute, 0, sizeof execute); + execute.in_port = in_port; + execute.actions = (union odp_action *) actions; + execute.n_actions = n_actions; + execute.data = buf->data; + execute.length = buf->size; + if (VLOG_IS_DBG_ENABLED()) { + char *actions_string = odp_actions_to_string(actions, n_actions); + char *packet_string = ofp_packet_to_string(buf->data, buf->size, + buf->size); + VLOG_DBG("executing %s with in_port=%"PRIu16" on packet %s", + actions_string, in_port, packet_string); + free(actions_string); + free(packet_string); + } + return IOCTL(dpif, ODP_EXECUTE, &execute); +} + +int +dpif_snat_add_port(struct dpif *dpif, const struct odp_snat_config *osc) +{ + return IOCTL(dpif, ODP_SNAT_ADD_PORT, osc); +} + +int +dpif_snat_del_port(struct dpif *dpif, uint16_t port) +{ + int tmp = port; + return IOCTL(dpif, ODP_SNAT_DEL_PORT, &tmp); +} + +int +dpif_recv(struct dpif *dpif, struct ofpbuf **bufp) +{ + struct ofpbuf *buf; + int retval; + int error; + + buf = ofpbuf_new(2048); /* XXX scale based on netdev MTUs */ + retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); + if (retval < 0) { + error = errno; + if (error != EAGAIN) { + VLOG_WARN_RL(&error_rl, "dp%u: read failed: %s", + dpif->minor, strerror(error)); + } + } else if (retval >= sizeof(struct odp_msg)) { + struct odp_msg *msg = buf->data; + if (msg->length <= retval) { + buf->size += retval; + if (VLOG_IS_DBG_ENABLED()) { + void *payload = msg + 1; + size_t length = buf->size - sizeof *msg; + char *s = ofp_packet_to_string(payload, length, length); + VLOG_DBG_RL(&dpmsg_rl, "dp%u: received %s message of length " + "%zu on port %"PRIu16": %s", dpif->minor, + (msg->type == _ODPL_MISS_NR ? "miss" + : msg->type == _ODPL_ACTION_NR ? "action" + : ""), + msg->length - sizeof(struct odp_msg), + msg->port, s); + free(s); + } + *bufp = buf; + return 0; + } else { + VLOG_WARN_RL(&error_rl, "dp%u: discarding message truncated " + "from %zu bytes to %d", + dpif->minor, msg->length, retval); + error = ERANGE; + } + } else if (!retval) { + VLOG_WARN_RL(&error_rl, "dp%u: unexpected end of file", dpif->minor); + error = EPROTO; + } else { + VLOG_WARN_RL(&error_rl, + "dp%u: discarding too-short message (%d bytes)", + dpif->minor, retval); + error = ERANGE; + } + + *bufp = NULL; + ofpbuf_delete(buf); + return error; +} + +void +dpif_recv_wait(struct dpif *dpif) +{ + poll_fd_wait(dpif->fd, POLLIN); } -static const struct nl_policy openflow_multicast_policy[] = { - [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, - [DP_GENL_A_DP_NAME] = { .type = NL_A_STRING }, - [DP_GENL_A_MC_GROUP] = { .type = NL_A_U32 }, +struct dpifmon { + const struct dpif *dpif; + struct nl_sock *sock; + int local_ifindex; }; -/* Looks up the Netlink multicast group and datapath index of a datapath - * by either the datapath index or name. If 'dp_idx' points to a value - * of '-1', then 'dp_name' is used to lookup the datapath. If successful, - * stores the multicast group in '*multicast_group' and the index in - * '*dp_idx' and returns 0. Otherwise, returns a positive errno value. */ -static int -query_datapath(int *dp_idx, int *multicast_group, const char *dp_name) +int +dpifmon_create(const struct dpif *dpif, struct dpifmon **monp) { struct nl_sock *sock; - struct ofpbuf request, *reply; - struct nlattr *attrs[ARRAY_SIZE(openflow_multicast_policy)]; - int retval; + struct dpifmon *mon; + struct odp_port local; + unsigned int local_ifindex; + int error; + + *monp = NULL; + + error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &local); + if (error) { + return error; + } - retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); - if (retval) { - return retval; - } - ofpbuf_init(&request, 0); - nl_msg_put_genlmsghdr(&request, sock, 0, openflow_family, NLM_F_REQUEST, - DP_GENL_C_QUERY_DP, 1); - if (*dp_idx != -1) { - nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, *dp_idx); - } - if (dp_name) { - nl_msg_put_string(&request, DP_GENL_A_DP_NAME, dp_name); - } - retval = nl_sock_transact(sock, &request, &reply); - ofpbuf_uninit(&request); - if (retval) { - nl_sock_destroy(sock); - return retval; - } - if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN, - openflow_multicast_policy, attrs, - ARRAY_SIZE(openflow_multicast_policy))) { - nl_sock_destroy(sock); - ofpbuf_delete(reply); - return EPROTO; - } - *dp_idx = nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]); - *multicast_group = nl_attr_get_u32(attrs[DP_GENL_A_MC_GROUP]); - nl_sock_destroy(sock); - ofpbuf_delete(reply); + local_ifindex = if_nametoindex(local.devname); + if (!local_ifindex) { + VLOG_WARN("could not get ifindex of %s device: %s", + local.devname, strerror(errno)); + return errno; + } + + error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &sock); + if (error) { + VLOG_WARN("could not create rtnetlink socket: %s", strerror(error)); + return error; + } + mon = *monp = xmalloc(sizeof *mon); + mon->dpif = dpif; + mon->sock = sock; + mon->local_ifindex = local_ifindex; return 0; } -/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If - * successful, stores the multicast group in '*multicast_group' and returns 0. - * Otherwise, returns a positve errno value. */ -static int -lookup_openflow_multicast_group(int dp_idx, int *multicast_group) +void +dpifmon_destroy(struct dpifmon *mon) { - return query_datapath(&dp_idx, multicast_group, NULL); + if (mon) { + nl_sock_destroy(mon->sock); + } } -/* Looks up the datatpath index based on the name. Returns the index, or - * -1 on error. */ int -dpif_get_idx(const char *name) +dpifmon_poll(struct dpifmon *mon, char **devnamep) { - int dp_idx = -1; - int mc_group = 0; + static struct vlog_rate_limit slow_rl = VLOG_RATE_LIMIT_INIT(1, 5); + static const struct nl_policy rtnlgrp_link_policy[] = { + [IFLA_IFNAME] = { .type = NL_A_STRING }, + [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; + struct ofpbuf *buf; + int error; + + *devnamep = NULL; +again: + error = nl_sock_recv(mon->sock, &buf, false); + switch (error) { + case 0: + if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), + rtnlgrp_link_policy, + attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { + VLOG_WARN_RL(&slow_rl, "received bad rtnl message"); + error = ENOBUFS; + } else { + const char *devname = nl_attr_get_string(attrs[IFLA_IFNAME]); + bool for_us; + + if (attrs[IFLA_MASTER]) { + uint32_t master_ifindex = nl_attr_get_u32(attrs[IFLA_MASTER]); + for_us = master_ifindex == mon->local_ifindex; + } else { + struct odp_port odp_port; + for_us = (dpif_port_query_by_name(mon->dpif, devname, + &odp_port) == 0); + } - if (query_datapath(&dp_idx, &mc_group, name)) { - return -1; + if (!for_us) { + /* Not for us, try again. */ + ofpbuf_delete(buf); + goto again; + } + *devnamep = xstrdup(devname); + } + ofpbuf_delete(buf); + break; + + case EAGAIN: + /* Nothing to do. */ + break; + + case ENOBUFS: + VLOG_WARN_RL(&slow_rl, "dpifmon socket overflowed"); + break; + + default: + VLOG_WARN_RL(&slow_rl, "error on dpifmon socket: %s", strerror(error)); + break; } + return error; +} + +void +dpifmon_run(struct dpifmon *mon UNUSED) +{ + /* Nothing to do in this implementation. */ +} - return dp_idx; +void +dpifmon_wait(struct dpifmon *mon) +{ + nl_sock_wait(mon->sock, POLLIN); } + +static int get_openflow_major(void); +static int get_major(const char *target, int default_major); -/* Sends the given 'command' to datapath 'dp', related to the local datapath - * numbered 'dp_idx'. If 'arg' is nonnull, adds it to the command as the - * datapath or port name attribute depending on the requested operation. - * Returns 0 if successful, otherwise a positive errno value. */ static int -send_mgmt_command(struct dpif *dp, int dp_idx, int command, const char *arg) +lookup_minor(const char *name, unsigned int *minor) { - struct ofpbuf request, *reply; - int retval; + struct ethtool_drvinfo drvinfo; + struct ifreq ifr; + int error; + int sock; + + *minor = -1; + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno)); + error = errno; + goto error; + } - ofpbuf_init(&request, 0); - nl_msg_put_genlmsghdr(&request, dp->sock, 32, openflow_family, - NLM_F_REQUEST | NLM_F_ACK, command, 1); - if (dp_idx != -1) { - nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); + ifr.ifr_data = (caddr_t) &drvinfo; + + memset(&drvinfo, 0, sizeof drvinfo); + drvinfo.cmd = ETHTOOL_GDRVINFO; + if (ioctl(sock, SIOCETHTOOL, &ifr)) { + VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno)); + error = errno; + goto error_close_sock; + } + + if (strcmp(drvinfo.driver, "openflow")) { + VLOG_WARN("%s is not an openflow device", name); + error = EOPNOTSUPP; + goto error_close_sock; } - if (arg) { - if ((command == DP_GENL_C_ADD_DP) || (command == DP_GENL_C_DEL_DP)) { - nl_msg_put_string(&request, DP_GENL_A_DP_NAME, arg); + + if (!isdigit(drvinfo.bus_info[0])) { + VLOG_WARN("%s ethtool info does not contain an openflow minor", name); + error = EPROTOTYPE; + goto error_close_sock; + } + + *minor = atoi(drvinfo.bus_info); + close(sock); + return 0; + +error_close_sock: + close(sock); +error: + return error; +} + +static int +make_openflow_device(unsigned int minor, char **fnp) +{ + dev_t dev = makedev(get_openflow_major(), minor); + const char dirname[] = "/dev/net"; + struct stat s; + char fn[128]; + + *fnp = NULL; + sprintf(fn, "%s/dp%d", dirname, minor); + if (!stat(fn, &s)) { + if (!S_ISCHR(s.st_mode)) { + VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", + fn); + } else if (s.st_rdev != dev) { + VLOG_WARN_RL(&error_rl, + "%s is device %u:%u instead of %u:%u, fixing", + fn, major(s.st_rdev), minor(s.st_rdev), + major(dev), minor(dev)); + } else { + goto success; + } + if (unlink(fn)) { + VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", + fn, strerror(errno)); + return errno; + } + } else if (errno == ENOENT) { + if (stat(dirname, &s)) { + if (errno == ENOENT) { + if (mkdir(dirname, 0755)) { + VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", + dirname, strerror(errno)); + return errno; + } + } else { + VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", + dirname, strerror(errno)); + return errno; + } + } + } else { + VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); + return errno; + } + + /* The device needs to be created. */ + if (mknod(fn, S_IFCHR | 0700, dev)) { + VLOG_WARN_RL(&error_rl, + "%s: creating character device %u:%u failed (%s)", + fn, major(dev), minor(dev), strerror(errno)); + return errno; + } + +success: + *fnp = xstrdup(fn); + return 0; +} + + +static int +get_openflow_major(void) +{ + static unsigned int openflow_major; + if (!openflow_major) { + enum { DEFAULT_MAJOR = 248 }; + openflow_major = get_major("openflow", DEFAULT_MAJOR); + } + return openflow_major; +} + +static int +get_major(const char *target, int default_major) +{ + const char fn[] = "/proc/devices"; + char line[128]; + FILE *file; + int ln; + + file = fopen(fn, "r"); + if (!file) { + VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); + goto error; + } + + for (ln = 1; fgets(line, sizeof line, file); ln++) { + char name[64]; + int major; + + if (!strncmp(line, "Character", 9) || line[0] == '\0') { + /* Nothing to do. */ + } else if (!strncmp(line, "Block", 5)) { + /* We only want character devices, so skip the rest of the file. */ + break; + } else if (sscanf(line, "%d %63s", &major, name)) { + if (!strcmp(name, target)) { + fclose(file); + return major; + } } else { - nl_msg_put_string(&request, DP_GENL_A_PORTNAME, arg); + static bool warned; + if (!warned) { + VLOG_WARN("%s:%d: syntax error", fn, ln); + } + warned = true; } } - retval = nl_sock_transact(dp->sock, &request, &reply); - ofpbuf_uninit(&request); - ofpbuf_delete(reply); - return retval; + VLOG_ERR("%s: %s major not found (is the module loaded?), using " + "default major %d", fn, target, default_major); +error: + VLOG_INFO("using default major %d for %s", default_major, target); + return default_major; +} + +static int +name_to_minor(const char *name, unsigned int *minor) +{ + if (!get_minor_from_name(name, minor)) { + return 0; + } + return lookup_minor(name, minor); +} + +static int +get_minor_from_name(const char *name, unsigned int *minor) +{ + if (!strncmp(name, "dp", 2) && isdigit(name[2])) { + *minor = atoi(name + 2); + return 0; + } else if (!strncmp(name, "nl:", 3) && isdigit(name[3])) { + /* This is for compatibility only and will be dropped. */ + *minor = atoi(name + 3); + return 0; + } else { + return EINVAL; + } +} + +static int +open_by_minor(unsigned int minor, struct dpif *dpif) +{ + int error; + char *fn; + int fd; + + error = make_openflow_device(minor, &fn); + if (error) { + return error; + } + + fd = open(fn, O_RDONLY | O_NONBLOCK); + if (fd < 0) { + error = errno; + VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); + free(fn); + return error; + } + + free(fn); + dpif->minor = minor; + dpif->fd = fd; + return 0; +} + +static char * +odp_actions_to_string(const union odp_action actions[], size_t n_actions) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + if (!n_actions) { + ds_put_cstr(&ds, ""); + } else { + const union odp_action *a; + for (a = actions; a < &actions[n_actions]; a++) { + if (a != actions) { + ds_put_char(&ds, ','); + } + switch (a->type) { + case ODPAT_OUTPUT: + ds_put_format(&ds, "out:%"PRIu16, a->output.port); + break; + case ODPAT_OUTPUT_GROUP: + ds_put_format(&ds, "group:%"PRIu16, a->output_group.group); + break; + case ODPAT_CONTROLLER: + ds_put_format(&ds, "controller(arg:%"PRIu32")", + a->controller.arg); + break; + case ODPAT_SET_VLAN_VID: + ds_put_format(&ds, "vid:%"PRIu16, ntohs(a->vlan_vid.vlan_vid)); + break; + case ODPAT_SET_VLAN_PCP: + ds_put_format(&ds, "pri:%"PRIu8, a->vlan_pcp.vlan_pcp); + break; + case ODPAT_STRIP_VLAN: + ds_put_cstr(&ds, "strip-vlan"); + break; + case ODPAT_SET_DL_SRC: + ds_put_format(&ds, "dl-src:"ETH_ADDR_FMT, + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case ODPAT_SET_DL_DST: + ds_put_format(&ds, "dl-dst:"ETH_ADDR_FMT, + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case ODPAT_SET_NW_SRC: + ds_put_format(&ds, "nw-src:"IP_FMT, + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case ODPAT_SET_NW_DST: + ds_put_format(&ds, "nw-dst:"IP_FMT, + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case ODPAT_SET_TP_SRC: + ds_put_format(&ds, "tp-src:%"PRIu16, + ntohs(a->tp_port.tp_port)); + break; + case ODPAT_SET_TP_DST: + ds_put_format(&ds, "tp-dst:%"PRIu16, + ntohs(a->tp_port.tp_port)); + break; + case ODPAT_SNAT: + ds_put_format(&ds, "snat:%"PRIu16, a->snat.port); + break; + default: + ds_put_format(&ds, "unknown(%"PRIu16")", a->type); + break; + } + } + + } + return ds_cstr(&ds); } diff --git a/lib/dpif.h b/lib/dpif.h index 12b81b30..590de1ea 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -1,6 +1,6 @@ /* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University - * + * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute @@ -13,10 +13,10 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -25,7 +25,7 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * + * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. @@ -37,32 +37,77 @@ /* Operations for the datapath running in the local kernel. The interface can * generalize to multiple types of local datapaths, but the implementation only - * supports the openflow kernel module via netlink. */ + * supports the openflow kernel module. */ +#include "openflow/datapath-protocol.h" #include +#include #include struct ofpbuf; -struct ofp_match; /* A datapath interface. Opaque. */ -struct dpif -{ - struct nl_sock *sock; +struct dpif { + unsigned int minor; /* For use in error messages. */ + int fd; }; -int dpif_open(int subscribe_dp_idx, struct dpif *); +int dpif_open(const char *name, struct dpif *); +int dpif_create(const char *name, struct dpif *); void dpif_close(struct dpif *); -/* OpenFlow. */ -int dpif_recv_openflow(struct dpif *, int dp_idx, struct ofpbuf **, bool wait); -int dpif_send_openflow(struct dpif *, int dp_idx, struct ofpbuf *); +int dpif_delete(struct dpif *); -/* Management functions. */ -int dpif_add_dp(struct dpif *, int dp_idx, const char *dp_name); -int dpif_del_dp(struct dpif *, int dp_idx, const char *dp_name); -int dpif_add_port(struct dpif *, int dp_idx, const char *netdev); -int dpif_del_port(struct dpif *, int dp_idx, const char *netdev); -int dpif_get_idx(const char *dp_name); +int dpif_get_dp_stats(const struct dpif *, struct odp_stats *); +int dpif_get_drop_frags(const struct dpif *, bool *drop_frags); +int dpif_set_drop_frags(struct dpif *, bool drop_frags); + +int dpif_get_listen_mask(const struct dpif *, int *listen_mask); +int dpif_set_listen_mask(struct dpif *, int listen_mask); + +int dpif_port_add(struct dpif *, const char *devname, uint16_t port_no); +int dpif_port_del(struct dpif *, uint16_t port_no); +int dpif_port_query_by_number(const struct dpif *, uint16_t port_no, + struct odp_port *); +int dpif_port_query_by_name(const struct dpif *, const char *devname, + struct odp_port *); +int dpif_port_list(const struct dpif *, struct odp_port **, size_t *n_ports); + +int dpif_port_group_set(struct dpif *, uint16_t group, + const uint16_t ports[], size_t n_ports); +int dpif_port_group_get(const struct dpif *, uint16_t group, + uint16_t ports[], size_t n_ports, size_t *n_out); + +int dpif_flow_flush(struct dpif *); +int dpif_flow_add(struct dpif *, struct odp_flow *); +int dpif_flow_set_actions(struct dpif *, const struct odp_flow_key *, + const union odp_action *actions, size_t n_actions); +int dpif_flow_del(struct dpif *, struct odp_flow *); +int dpif_flow_query(const struct dpif *, struct odp_flow *); +int dpif_flow_query_multiple(const struct dpif *, struct odp_flow[], size_t n); +int dpif_flow_list(const struct dpif *, struct odp_flow[], size_t n, + size_t *n_out); +int dpif_flow_list_all(const struct dpif *, + struct odp_flow **flowsp, size_t *np); + +int dpif_execute(struct dpif *, uint16_t in_port, + const union odp_action[], size_t n_actions, + const struct ofpbuf *); + +int dpif_snat_add_port(struct dpif *, const struct odp_snat_config *); +int dpif_snat_del_port(struct dpif *, uint16_t port); + +int dpif_recv(struct dpif *, struct ofpbuf **); +void dpif_recv_wait(struct dpif *); + +struct dpifmon; + +int dpifmon_create(const struct dpif *, struct dpifmon **); +void dpifmon_destroy(struct dpifmon *); + +int dpifmon_poll(struct dpifmon *, char **devnamep); + +void dpifmon_run(struct dpifmon *); +void dpifmon_wait(struct dpifmon *); #endif /* dpif.h */ diff --git a/lib/flow.c b/lib/flow.c index 5fa79aa6..5e8250cc 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -39,6 +39,7 @@ #include #include "hash.h" #include "ofpbuf.h" +#include "openflow/datapath-protocol.h" #include "openflow/openflow.h" #include "packets.h" @@ -97,7 +98,7 @@ pull_vlan(struct ofpbuf *packet) /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. */ int -flow_extract(struct ofpbuf *packet, uint16_t in_port, struct flow *flow) +flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow) { struct ofpbuf b = *packet; struct eth_header *eth; @@ -203,10 +204,11 @@ flow_extract(struct ofpbuf *packet, uint16_t in_port, struct flow *flow) } void -flow_to_match(const struct flow *flow, uint32_t wildcards, struct ofp_match *match) +flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match) { match->wildcards = htonl(wildcards); - match->in_port = htons(flow->in_port); + match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL + : flow->in_port); match->dl_vlan = flow->dl_vlan; memcpy(match->dl_src, flow->dl_src, ETH_ADDR_LEN); memcpy(match->dl_dst, flow->dl_dst, ETH_ADDR_LEN); @@ -220,7 +222,7 @@ flow_to_match(const struct flow *flow, uint32_t wildcards, struct ofp_match *mat } void -flow_from_match(struct flow *flow, uint32_t *wildcards, +flow_from_match(flow_t *flow, uint32_t *wildcards, const struct ofp_match *match) { if (wildcards) { @@ -240,7 +242,7 @@ flow_from_match(struct flow *flow, uint32_t *wildcards, } char * -flow_to_string(const struct flow *flow) +flow_to_string(const flow_t *flow) { return xasprintf("port%04x:vlan%d mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" " "type%04x proto%"PRId8" ip"IP_FMT"->"IP_FMT" port%d->%d", @@ -252,7 +254,7 @@ flow_to_string(const struct flow *flow) } void -flow_print(FILE *stream, const struct flow *flow) +flow_print(FILE *stream, const flow_t *flow) { char *s = flow_to_string(flow); fputs(s, stream); diff --git a/lib/flow.h b/lib/flow.h index 66479099..967b0b03 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -39,55 +39,38 @@ #include #include "openflow/openflow.h" #include "hash.h" +#include "openflow/datapath-protocol.h" +#include "openflow/openflow.h" #include "util.h" struct ofp_match; struct ofpbuf; -/* Identification data for a flow. - All fields are in network byte order. - In decreasing order by size, so that flow structures can be hashed or - compared bytewise. */ -struct flow { - uint32_t nw_src; /* IP source address. */ - uint32_t nw_dst; /* IP destination address. */ - uint16_t in_port; /* Input switch port. */ - uint16_t dl_vlan; /* Input VLAN. */ - uint16_t dl_type; /* Ethernet frame type. */ - uint16_t tp_src; /* TCP/UDP source port. */ - uint16_t tp_dst; /* TCP/UDP destination port. */ - uint8_t dl_src[6]; /* Ethernet source address. */ - uint8_t dl_dst[6]; /* Ethernet destination address. */ - uint8_t nw_proto; /* IP protocol. */ - uint8_t reserved; /* Pad to 32-bit alignment. */ -}; -BUILD_ASSERT_DECL(sizeof(struct flow) == 32); +typedef struct odp_flow_key flow_t; -int flow_extract(struct ofpbuf *, uint16_t in_port, struct flow *); -void flow_to_match(const struct flow *, uint32_t wildcards, - struct ofp_match *); -void flow_from_match(struct flow *, uint32_t *wildcards, - const struct ofp_match *); -char *flow_to_string(const struct flow *); -void flow_print(FILE *, const struct flow *); -static inline int flow_compare(const struct flow *, const struct flow *); -static inline bool flow_equal(const struct flow *, const struct flow *); -static inline size_t flow_hash(const struct flow *, uint32_t basis); +int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *); +void flow_to_match(const flow_t *, uint32_t wildcards, struct ofp_match *); +void flow_from_match(flow_t *, uint32_t *wildcards, const struct ofp_match *); +char *flow_to_string(const flow_t *); +void flow_print(FILE *, const flow_t *); +static inline int flow_compare(const flow_t *, const flow_t *); +static inline bool flow_equal(const flow_t *, const flow_t *); +static inline size_t flow_hash(const flow_t *, uint32_t basis); static inline int -flow_compare(const struct flow *a, const struct flow *b) +flow_compare(const flow_t *a, const flow_t *b) { return memcmp(a, b, sizeof *a); } static inline bool -flow_equal(const struct flow *a, const struct flow *b) +flow_equal(const flow_t *a, const flow_t *b) { return !flow_compare(a, b); } static inline size_t -flow_hash(const struct flow *flow, uint32_t basis) +flow_hash(const flow_t *flow, uint32_t basis) { BUILD_ASSERT_DECL(!(sizeof *flow % sizeof(uint32_t))); return hash_words((const uint32_t *) flow, diff --git a/lib/learning-switch.c b/lib/learning-switch.c index 37788234..197e33a4 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -408,7 +408,7 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_) size_t pkt_ofs, pkt_len; struct ofpbuf pkt; - struct flow flow; + flow_t flow; /* Extract flow data from 'opi' into 'flow'. */ pkt_ofs = offsetof(struct ofp_packet_in, data); @@ -435,8 +435,8 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_) } if (sw->ml) { - uint16_t learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0); - if (may_send(sw, learned_port)) { + int learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0); + if (learned_port >= 0 && may_send(sw, learned_port)) { out_port = learned_port; } } diff --git a/lib/mac-learning.c b/lib/mac-learning.c index aa1e3ad0..73e1eef9 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -40,7 +40,6 @@ #include "hash.h" #include "list.h" -#include "openflow/openflow.h" #include "poll-loop.h" #include "tag.h" #include "timeval.h" @@ -232,8 +231,8 @@ mac_learning_learn(struct mac_learning *ml, } /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a - * frame destined for 'dst' should be sent, OFPP_FLOOD if unknown. */ -uint16_t + * frame destined for 'dst' should be sent, -1 if unknown. */ +int mac_learning_lookup(const struct mac_learning *ml, const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan) { @@ -242,18 +241,18 @@ mac_learning_lookup(const struct mac_learning *ml, } /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a - * frame destined for 'dst' should be sent, OFPP_FLOOD if unknown. + * frame destined for 'dst' should be sent, -1 if unknown. * * Adds to '*tag' (which the caller must have initialized) the tag that should * be attached to any flow created based on the return value, if any, to allow * those flows to be revalidated when the MAC learning entry changes. */ -uint16_t +int mac_learning_lookup_tag(const struct mac_learning *ml, const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan, tag_type *tag) { if (eth_addr_is_multicast(dst)) { - return OFPP_FLOOD; + return -1; } else { struct mac_entry *e = search_bucket(mac_table_bucket(ml, dst, vlan), dst, vlan); @@ -262,7 +261,7 @@ mac_learning_lookup_tag(const struct mac_learning *ml, return e->port; } else { *tag |= make_unknown_mac_tag(ml, dst, vlan); - return OFPP_FLOOD; + return -1; } } } diff --git a/lib/mac-learning.h b/lib/mac-learning.h index 8e3db910..6a90256d 100644 --- a/lib/mac-learning.h +++ b/lib/mac-learning.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -42,11 +42,11 @@ void mac_learning_destroy(struct mac_learning *); tag_type mac_learning_learn(struct mac_learning *, const uint8_t src[ETH_ADDR_LEN], uint16_t vlan, uint16_t src_port); -uint16_t mac_learning_lookup(const struct mac_learning *, - const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan); -uint16_t mac_learning_lookup_tag(const struct mac_learning *, - const uint8_t dst[ETH_ADDR_LEN], - uint16_t vlan, tag_type *tag); +int mac_learning_lookup(const struct mac_learning *, + const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan); +int mac_learning_lookup_tag(const struct mac_learning *, + const uint8_t dst[ETH_ADDR_LEN], + uint16_t vlan, tag_type *tag); void mac_learning_flush(struct mac_learning *); void mac_learning_run(struct mac_learning *, struct tag_set *); void mac_learning_wait(struct mac_learning *); diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 430b494a..d1adae4a 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -147,24 +147,13 @@ ofp_packet_in(struct ds *string, const void *oh, size_t len, int verbosity) ds_put_char(string, '\n'); if (verbosity > 0) { - struct flow flow; + flow_t flow; struct ofpbuf packet; struct ofp_match match; packet.data = (void *) op->data; packet.size = data_len; flow_extract(&packet, ntohs(op->in_port), &flow); - match.wildcards = 0; - match.in_port = flow.in_port; - memcpy(match.dl_src, flow.dl_src, ETH_ADDR_LEN); - memcpy(match.dl_dst, flow.dl_dst, ETH_ADDR_LEN); - match.dl_vlan = flow.dl_vlan; - match.dl_type = flow.dl_type; - match.nw_proto = flow.nw_proto; - match.pad = 0; - match.nw_src = flow.nw_src; - match.nw_dst = flow.nw_dst; - match.tp_src = flow.tp_src; - match.tp_dst = flow.tp_dst; + flow_to_match(&flow, 0, &match); ofp_print_match(string, &match, verbosity); ds_put_char(string, '\n'); } @@ -792,65 +781,6 @@ ofp_print_flow_expired(struct ds *string, const void *oh, size_t len UNUSED, ntohl(ofe->duration), ntohll(ofe->packet_count), ntohll(ofe->byte_count)); } -/* Pretty-print the NXT_FLOW_EXPIRED packet of 'len' bytes at 'oh' to 'string' - * at the given 'verbosity' level. */ -static void -nx_print_flow_end(struct ds *string, const void *oh, size_t len, - int verbosity) -{ - const struct nx_flow_end *nfe = oh; - - ds_put_cstr(string, "nx_flow_end: "); - - if (len < sizeof(*nfe)) { - ds_put_format(string, " (***length=%zu < min_size=%zu***)\n", - len, sizeof(*nfe)); - return; - } - - ofp_print_match(string, &nfe->match, verbosity); - ds_put_cstr(string, " reason="); - switch (nfe->reason) { - case NXFER_IDLE_TIMEOUT: - ds_put_cstr(string, "idle"); - break; - case NXFER_HARD_TIMEOUT: - ds_put_cstr(string, "hard"); - break; - case NXFER_DELETE: - ds_put_cstr(string, "delete"); - break; - case NXFER_EJECT: - ds_put_cstr(string, "eject"); - break; - default: - ds_put_format(string, "**%"PRIu8"**", nfe->reason); - break; - } - ds_put_format(string, - " pri=%"PRIu16" init=%"PRIu64" used=%"PRIu64" end=%"PRIu64, - nfe->match.wildcards ? ntohs(nfe->priority) : (uint16_t)-1, - ntohll(nfe->init_time), ntohll(nfe->used_time), - ntohll(nfe->end_time)); - ds_put_format(string, - " tflags=0x%x tos=0x%x pkts=%"PRIu64" bytes=%"PRIu64"\n", - nfe->tcp_flags, nfe->ip_tos, ntohll(nfe->packet_count), - ntohll(nfe->byte_count)); -} - -static void -nx_print_msg(struct ds *string, const void *oh, size_t len, int verbosity) -{ - const struct nicira_header *nh = oh; - - switch(ntohl(nh->subtype)) - { - case NXT_FLOW_END: - nx_print_flow_end(string, oh, len, verbosity); - return; - } -} - static void ofp_print_port_mod(struct ds *string, const void *oh, size_t len UNUSED, @@ -1319,19 +1249,6 @@ ofp_echo(struct ds *string, const void *oh, size_t len, int verbosity) } } -static void -ofp_vendor(struct ds *string, const void *oh, size_t len, int verbosity) -{ - const struct ofp_vendor_header *vh = oh; - - switch(ntohl(vh->vendor)) - { - case NX_VENDOR_ID: - return nx_print_msg(string, oh, len, verbosity); - break; - } -} - struct openflow_packet { uint8_t type; const char *name; @@ -1446,7 +1363,7 @@ static const struct openflow_packet packets[] = { OFPT_VENDOR, "vendor", sizeof (struct ofp_vendor_header), - ofp_vendor, + NULL, }, }; diff --git a/lib/vconn-netlink.c b/lib/vconn-netlink.c deleted file mode 100644 index ef0fac35..00000000 --- a/lib/vconn-netlink.c +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "vconn.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "dpif.h" -#include "netlink.h" -#include "ofpbuf.h" -#include "openflow/openflow-netlink.h" -#include "openflow/openflow.h" -#include "poll-loop.h" -#include "socket-util.h" -#include "util.h" -#include "vconn-provider.h" - -#include "vlog.h" -#define THIS_MODULE VLM_VCONN_NETLINK - -struct netlink_vconn -{ - struct vconn vconn; - struct dpif dp; - int dp_idx; -}; - -static struct netlink_vconn * -netlink_vconn_cast(struct vconn *vconn) -{ - vconn_assert_class(vconn, &netlink_vconn_class); - return CONTAINER_OF(vconn, struct netlink_vconn, vconn); -} - -static int -netlink_open(const char *name, char *suffix, struct vconn **vconnp) -{ - struct netlink_vconn *netlink; - int subscribe; - int dp_idx; - int retval; - - subscribe = 1; - if (sscanf(suffix, "%d:%d", &dp_idx, &subscribe) < 1) { - ofp_error(0, "%s: syntax error", name); - return EAFNOSUPPORT; - } - - netlink = xmalloc(sizeof *netlink); - vconn_init(&netlink->vconn, &netlink_vconn_class, 0, 0, name, true); - retval = dpif_open(subscribe ? dp_idx : -1, &netlink->dp); - netlink->dp_idx = dp_idx; - if (retval) { - free(netlink); - *vconnp = NULL; - return retval; - } - *vconnp = &netlink->vconn; - return 0; -} - -static void -netlink_close(struct vconn *vconn) -{ - struct netlink_vconn *netlink = netlink_vconn_cast(vconn); - dpif_close(&netlink->dp); - free(netlink); -} - -static int -netlink_recv(struct vconn *vconn, struct ofpbuf **bufferp) -{ - struct netlink_vconn *netlink = netlink_vconn_cast(vconn); - return dpif_recv_openflow(&netlink->dp, netlink->dp_idx, bufferp, false); -} - -static int -netlink_send(struct vconn *vconn, struct ofpbuf *buffer) -{ - struct netlink_vconn *netlink = netlink_vconn_cast(vconn); - int retval = dpif_send_openflow(&netlink->dp, netlink->dp_idx, buffer); - if (!retval) { - ofpbuf_delete(buffer); - } - return retval; -} - -static void -netlink_wait(struct vconn *vconn, enum vconn_wait_type wait) -{ - struct netlink_vconn *netlink = netlink_vconn_cast(vconn); - short int events = 0; - switch (wait) { - case WAIT_CONNECT: - NOT_REACHED(); - - case WAIT_RECV: - events = POLLIN; - break; - - case WAIT_SEND: - events = 0; - break; - - default: - NOT_REACHED(); - } - nl_sock_wait(netlink->dp.sock, events); -} - -struct vconn_class netlink_vconn_class = { - "nl", /* name */ - netlink_open, /* open */ - netlink_close, /* close */ - NULL, /* connect */ - netlink_recv, /* recv */ - netlink_send, /* send */ - netlink_wait, /* wait */ -}; diff --git a/lib/vconn-provider.h b/lib/vconn-provider.h index caf739b2..8098c15b 100644 --- a/lib/vconn-provider.h +++ b/lib/vconn-provider.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -184,8 +184,5 @@ extern struct vconn_class fd_vconn_class; extern struct vconn_class ssl_vconn_class; extern struct pvconn_class pssl_pvconn_class; #endif -#ifdef HAVE_NETLINK -extern struct vconn_class netlink_vconn_class; -#endif #endif /* vconn-provider.h */ diff --git a/lib/vconn.c b/lib/vconn.c index f44fdbc2..bd8e255f 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -70,9 +70,6 @@ enum vconn_state { static struct vconn_class *vconn_classes[] = { &tcp_vconn_class, &unix_vconn_class, -#ifdef HAVE_NETLINK - &netlink_vconn_class, -#endif #ifdef HAVE_OPENSSL &ssl_vconn_class, #endif @@ -869,7 +866,7 @@ update_openflow_length(struct ofpbuf *buffer) } struct ofpbuf * -make_flow_mod(uint16_t command, const struct flow *flow, size_t actions_len) +make_flow_mod(uint16_t command, const flow_t *flow, size_t actions_len) { struct ofp_flow_mod *ofm; size_t size = sizeof *ofm + actions_len; @@ -879,7 +876,8 @@ make_flow_mod(uint16_t command, const struct flow *flow, size_t actions_len) ofm->header.type = OFPT_FLOW_MOD; ofm->header.length = htons(size); ofm->match.wildcards = htonl(0); - ofm->match.in_port = flow->in_port; + ofm->match.in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL + : flow->in_port); memcpy(ofm->match.dl_src, flow->dl_src, sizeof ofm->match.dl_src); memcpy(ofm->match.dl_dst, flow->dl_dst, sizeof ofm->match.dl_dst); ofm->match.dl_vlan = flow->dl_vlan; @@ -894,7 +892,7 @@ make_flow_mod(uint16_t command, const struct flow *flow, size_t actions_len) } struct ofpbuf * -make_add_flow(const struct flow *flow, uint32_t buffer_id, +make_add_flow(const flow_t *flow, uint32_t buffer_id, uint16_t idle_timeout, size_t actions_len) { struct ofpbuf *out = make_flow_mod(OFPFC_ADD, flow, actions_len); @@ -906,7 +904,7 @@ make_add_flow(const struct flow *flow, uint32_t buffer_id, } struct ofpbuf * -make_del_flow(const struct flow *flow) +make_del_flow(const flow_t *flow) { struct ofpbuf *out = make_flow_mod(OFPFC_DELETE_STRICT, flow, 0); struct ofp_flow_mod *ofm = out->data; @@ -915,7 +913,7 @@ make_del_flow(const struct flow *flow) } struct ofpbuf * -make_add_simple_flow(const struct flow *flow, +make_add_simple_flow(const flow_t *flow, uint32_t buffer_id, uint16_t out_port, uint16_t idle_timeout) { @@ -945,7 +943,7 @@ make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id, opo->header.length = htons(size); opo->header.xid = htonl(0); opo->buffer_id = htonl(buffer_id); - opo->in_port = htons(in_port); + opo->in_port = htons(in_port == ODPP_LOCAL ? OFPP_LOCAL : in_port); opo->actions_len = htons(actions_len); ofpbuf_put(out, actions, actions_len); if (packet) { diff --git a/lib/vconn.h b/lib/vconn.h index 41d60c69..375f155c 100644 --- a/lib/vconn.h +++ b/lib/vconn.h @@ -39,8 +39,9 @@ #include #include +#include "flow.h" + struct ofpbuf; -struct flow; struct ofp_action_header; struct ofp_header; struct ofp_match; @@ -90,12 +91,12 @@ void *put_openflow(size_t openflow_len, uint8_t type, struct ofpbuf *); void *put_openflow_xid(size_t openflow_len, uint8_t type, uint32_t xid, struct ofpbuf *); void update_openflow_length(struct ofpbuf *); -struct ofpbuf *make_flow_mod(uint16_t command, const struct flow *, +struct ofpbuf *make_flow_mod(uint16_t command, const flow_t *, size_t actions_len); -struct ofpbuf *make_add_flow(const struct flow *, uint32_t buffer_id, +struct ofpbuf *make_add_flow(const flow_t *, uint32_t buffer_id, uint16_t max_idle, size_t actions_len); -struct ofpbuf *make_del_flow(const struct flow *); -struct ofpbuf *make_add_simple_flow(const struct flow *, +struct ofpbuf *make_del_flow(const flow_t *); +struct ofpbuf *make_add_simple_flow(const flow_t *, uint32_t buffer_id, uint16_t out_port, uint16_t max_idle); struct ofpbuf *make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id, diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def index 7ff4004e..b8aa1d35 100644 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@ -17,14 +17,16 @@ VLOG_MODULE(executer) VLOG_MODULE(fail_open) VLOG_MODULE(fault) VLOG_MODULE(flow) -VLOG_MODULE(flow_end) VLOG_MODULE(in_band) VLOG_MODULE(leak_checker) VLOG_MODULE(learning_switch) VLOG_MODULE(mac_learning) VLOG_MODULE(netdev) +VLOG_MODULE(netflow) VLOG_MODULE(netlink) VLOG_MODULE(ofp_discover) +VLOG_MODULE(ofproto) +VLOG_MODULE(pktbuf) VLOG_MODULE(pcap) VLOG_MODULE(poll_loop) VLOG_MODULE(port_watcher) @@ -41,7 +43,6 @@ VLOG_MODULE(switch) VLOG_MODULE(terminal) VLOG_MODULE(socket_util) VLOG_MODULE(vconn_fd) -VLOG_MODULE(vconn_netlink) VLOG_MODULE(vconn_tcp) VLOG_MODULE(vconn_ssl) VLOG_MODULE(vconn_stream) @@ -49,6 +50,7 @@ VLOG_MODULE(vconn_unix) VLOG_MODULE(vconn) VLOG_MODULE(vlog) VLOG_MODULE(vlog_socket) +VLOG_MODULE(wcelim) VLOG_MODULE(vswitchd) #ifdef HAVE_EXT diff --git a/m4/libopenflow.m4 b/m4/libopenflow.m4 index 294932b0..77545732 100644 --- a/m4/libopenflow.m4 +++ b/m4/libopenflow.m4 @@ -1,6 +1,6 @@ # -*- autoconf -*- -# Copyright (c) 2008 The Board of Trustees of The Leland Stanford +# Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford # Junior University # # We are making the OpenFlow specification and associated documentation @@ -174,6 +174,7 @@ dnl Runs the checks required to include the headers in include/ and dnl link against lib/libopenflow.a. AC_DEFUN([OFP_CHECK_LIBOPENFLOW], [AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) + AC_REQUIRE([AC_C_BIGENDIAN]) AC_REQUIRE([OFP_CHECK_NDEBUG]) AC_REQUIRE([OFP_CHECK_NETLINK]) AC_REQUIRE([OFP_CHECK_OPENSSL]) diff --git a/secchan/automake.mk b/secchan/automake.mk index f1f832cc..76a279dd 100644 --- a/secchan/automake.mk +++ b/secchan/automake.mk @@ -1,6 +1,10 @@ bin_PROGRAMS += secchan/secchan man_MANS += secchan/secchan.8 +# secchan/stp-secchan.c \ +# secchan/stp-secchan.h +# + secchan_secchan_SOURCES = \ secchan/discovery.c \ secchan/discovery.h \ @@ -8,26 +12,25 @@ secchan_secchan_SOURCES = \ secchan/executer.h \ secchan/fail-open.c \ secchan/fail-open.h \ - secchan/flow-end.c \ - secchan/flow-end.h \ secchan/in-band.c \ secchan/in-band.h \ + secchan/netflow.c \ secchan/netflow.h \ - secchan/port-watcher.c \ - secchan/port-watcher.h \ - secchan/ratelimit.c \ - secchan/ratelimit.h \ + secchan/ofproto.c \ + secchan/ofproto.h \ + secchan/pktbuf.c \ + secchan/pktbuf.h \ + secchan/pinsched.c \ + secchan/pinsched.h \ secchan/secchan.c \ secchan/secchan.h \ secchan/status.c \ - secchan/status.h \ - secchan/stp-secchan.c \ - secchan/stp-secchan.h -if SUPPORT_SNAT -secchan_secchan_SOURCES += \ - secchan/snat.c \ - secchan/snat.h -endif + secchan/status.h +#if SUPPORT_SNAT +#secchan_secchan_SOURCES += \ +# secchan/snat.c \ +# secchan/snat.h +#endif secchan_secchan_LDADD = lib/libopenflow.a $(FAULT_LIBS) $(SSL_LIBS) EXTRA_DIST += secchan/secchan.8.in diff --git a/secchan/discovery.c b/secchan/discovery.c index feb9c338..105614ff 100644 --- a/secchan/discovery.c +++ b/secchan/discovery.c @@ -34,23 +34,25 @@ #include #include "discovery.h" #include +#include #include #include #include "dhcp-client.h" #include "dhcp.h" +#include "dpif.h" #include "netdev.h" #include "openflow/openflow.h" #include "packets.h" -#include "port-watcher.h" #include "secchan.h" #include "status.h" #define THIS_MODULE VLM_discovery #include "vlog.h" -struct discovery -{ - const struct settings *s; +struct discovery { + const char *accept_controller_re; + bool update_resolv_conf; + regex_t accept_controller_regex; struct dhclient *dhcp; int n_changes; }; @@ -65,7 +67,7 @@ discovery_status_cb(struct status_reply *sr, void *d_) { struct discovery *d = d_; - status_reply_put(sr, "accept-remote=%s", d->s->accept_controller_re); + status_reply_put(sr, "accept-remote=%s", d->accept_controller_re); status_reply_put(sr, "n-changes=%d", d->n_changes); if (d->dhcp) { status_reply_put(sr, "state=%s", dhclient_get_state(d->dhcp)); @@ -105,71 +107,42 @@ discovery_status_cb(struct status_reply *sr, void *d_) } } -static void -discovery_local_port_cb(const struct ofp_phy_port *port, void *d_) -{ - struct discovery *d = d_; - if (port) { - char name[OFP_MAX_PORT_NAME_LEN + 1]; - struct netdev *netdev; - int retval; - - /* Check that this was really a change. */ - get_port_name(port, name, sizeof name); - if (d->dhcp && !strcmp(netdev_get_name(dhclient_get_netdev(d->dhcp)), - name)) { - return; - } - - /* Destroy current DHCP client. */ - dhclient_destroy(d->dhcp); - d->dhcp = NULL; - - /* Bring local network device up. */ - retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev); - if (retval) { - VLOG_ERR("Could not open %s device, discovery disabled: %s", - name, strerror(retval)); - return; - } - retval = netdev_turn_flags_on(netdev, NETDEV_UP, true); - if (retval) { - VLOG_ERR("Could not bring %s device up, discovery disabled: %s", - name, strerror(retval)); - return; - } - netdev_close(netdev); - - /* Initialize DHCP client. */ - retval = dhclient_create(name, modify_dhcp_request, - validate_dhcp_offer, (void *) d->s, &d->dhcp); - if (retval) { - VLOG_ERR("Failed to initialize DHCP client, " - "discovery disabled: %s", strerror(retval)); - return; - } - dhclient_set_max_timeout(d->dhcp, 3); - dhclient_init(d->dhcp, 0); - } else { - dhclient_destroy(d->dhcp); - d->dhcp = NULL; - } -} - - struct discovery * -discovery_init(const struct settings *s, struct port_watcher *pw, - struct switch_status *ss) +discovery_create(const char *accept_controller_re, bool update_resolv_conf, + struct dpif *dpif, struct switch_status *ss) { struct discovery *d; + struct odp_port port; + int error; + + d = xcalloc(1, sizeof *d); + + /* Controller regular expression. */ + d->accept_controller_re = accept_controller_re; + error = regcomp(&d->accept_controller_regex, accept_controller_re, + REG_NOSUB | REG_EXTENDED); + if (error) { + size_t length = regerror(error, &d->accept_controller_regex, NULL, 0); + char *buffer = xmalloc(length); + regerror(error, &d->accept_controller_regex, buffer, length); + ofp_fatal(0, "%s: %s", accept_controller_re, buffer); + } + d->update_resolv_conf = update_resolv_conf; - d = xmalloc(sizeof *d); - d->s = s; - d->dhcp = NULL; - d->n_changes = 0; + /* Initialize DHCP client. */ + error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port); + if (error) { + ofp_fatal(error, "failed to query datapath local port"); + } + error = dhclient_create(port.devname, modify_dhcp_request, + validate_dhcp_offer, d, &d->dhcp); + if (error) { + ofp_fatal(error, "failed to initialize DHCP client"); + } + dhclient_set_max_timeout(d->dhcp, 3); + dhclient_init(d->dhcp, 0); switch_status_register_category(ss, "discovery", discovery_status_cb, d); - port_watcher_register_local_port_callback(pw, discovery_local_port_cb, d); return d; } @@ -196,7 +169,7 @@ discovery_run(struct discovery *d, char **controller_name) } dhclient_configure_netdev(d->dhcp); - if (d->s->update_resolv_conf) { + if (d->update_resolv_conf) { dhclient_update_resolv_conf(d->dhcp); } @@ -230,9 +203,9 @@ modify_dhcp_request(struct dhcp_msg *msg, void *aux UNUSED) } static bool -validate_dhcp_offer(const struct dhcp_msg *msg, void *s_) +validate_dhcp_offer(const struct dhcp_msg *msg, void *d_) { - const struct settings *s = s_; + const struct discovery *d = d_; char *vconn_name; bool accept; @@ -241,10 +214,11 @@ validate_dhcp_offer(const struct dhcp_msg *msg, void *s_) VLOG_WARN_RL(&rl, "rejecting DHCP offer missing controller vconn"); return false; } - accept = !regexec(&s->accept_controller_regex, vconn_name, 0, NULL, 0); + accept = !regexec(&d->accept_controller_regex, vconn_name, 0, NULL, + 0); if (!accept) { VLOG_WARN_RL(&rl, "rejecting controller vconn that fails to match %s", - s->accept_controller_re); + d->accept_controller_re); } free(vconn_name); return accept; diff --git a/secchan/discovery.h b/secchan/discovery.h index b2cb03c9..c955d75b 100644 --- a/secchan/discovery.h +++ b/secchan/discovery.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -36,13 +36,13 @@ #include +struct dpif; struct settings; -struct port_watcher; struct switch_status; -struct discovery *discovery_init(const struct settings *, - struct port_watcher *, - struct switch_status *); +struct discovery *discovery_create(const char *accept_controller_re, + bool update_resolv_conf, + struct dpif *, struct switch_status *); void discovery_question_connectivity(struct discovery *); bool discovery_run(struct discovery *, char **controller_name); void discovery_wait(struct discovery *); diff --git a/secchan/executer.c b/secchan/executer.c index bb6fea8d..b235862b 100644 --- a/secchan/executer.c +++ b/secchan/executer.c @@ -66,7 +66,7 @@ struct child { pid_t pid; /* Child's process ID. */ /* For sending a reply to the controller when the child dies. */ - struct relay *relay; + struct rconn *rconn; uint32_t xid; /* Transaction ID used by controller. */ /* We read up to MAX_OUTPUT bytes of output and send them back to the @@ -78,7 +78,9 @@ struct child { }; struct executer { - const struct settings *s; + /* Settings. */ + const char *command_acl; /* Command white/blacklist, as shell globs. */ + const char *command_dir; /* Directory that contains commands. */ /* Children. */ struct child children[MAX_CHILDREN]; @@ -89,9 +91,9 @@ struct executer { int null_fd; /* FD for /dev/null. */ }; -static void send_child_status(struct relay *, uint32_t xid, uint32_t status, +static void send_child_status(struct rconn *, uint32_t xid, uint32_t status, const void *data, size_t size); -static void send_child_message(struct relay *, uint32_t xid, uint32_t status, +static void send_child_message(struct rconn *, uint32_t xid, uint32_t status, const char *message); /* Returns true if 'cmd' is allowed by 'acl', which is a command-separated @@ -138,12 +140,10 @@ executer_is_permitted(const char *acl_, const char *cmd) return allowed && !denied; } -static bool -executer_remote_packet_cb(struct relay *r, void *e_) +int +executer_handle_request(struct executer *e, struct rconn *rconn, + struct nicira_header *request) { - struct executer *e = e_; - struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; - struct nicira_header *request; char **argv; char *args; char *exec_file = NULL; @@ -155,30 +155,17 @@ executer_remote_packet_cb(struct relay *r, void *e_) pid_t pid; int output_fds[2]; - /* Check for NXT_COMMAND_REQUEST vendor extension. */ - if (msg->size < sizeof(struct nicira_header)) { - return false; - } - request = msg->data; - if (request->header.type != OFPT_VENDOR - || request->vendor != htonl(NX_VENDOR_ID) - || request->subtype != htonl(NXT_COMMAND_REQUEST)) { - return false; - } - /* Verify limit on children not exceeded. * XXX should probably kill children when the connection drops? */ if (e->n_children >= MAX_CHILDREN) { - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "too many child processes"); - VLOG_WARN("limit of %d child processes reached, dropping request", - MAX_CHILDREN); - return false; + return 0; } /* Copy argument buffer, adding a null terminator at the end. Now every * argument is null-terminated, instead of being merely null-delimited. */ - args_size = msg->size - sizeof *request; + args_size = ntohs(request->header.length) - sizeof *request; args = xmemdup0((const void *) (request + 1), args_size); /* Count arguments. */ @@ -196,23 +183,23 @@ executer_remote_packet_cb(struct relay *r, void *e_) argv[argc] = NULL; /* Check permissions. */ - if (!executer_is_permitted(e->s->command_acl, argv[0])) { - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + if (!executer_is_permitted(e->command_acl, argv[0])) { + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "command not allowed"); goto done; } /* Find the executable. */ - exec_file = xasprintf("%s/%s", e->s->command_dir, argv[0]); + exec_file = xasprintf("%s/%s", e->command_dir, argv[0]); if (stat(exec_file, &s)) { VLOG_WARN("failed to stat \"%s\": %s", exec_file, strerror(errno)); - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "command not allowed"); goto done; } if (!S_ISREG(s.st_mode)) { VLOG_WARN("\"%s\" is not a regular file", exec_file); - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "command not allowed"); goto done; } @@ -221,7 +208,7 @@ executer_remote_packet_cb(struct relay *r, void *e_) /* Arrange to capture output. */ if (pipe(output_fds)) { VLOG_WARN("pipe failed: %s", strerror(errno)); - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "internal error (pipe)"); goto done; } @@ -240,9 +227,9 @@ executer_remote_packet_cb(struct relay *r, void *e_) for (i = 3; i < max_fds; i++) { close(i); } - if (chdir(e->s->command_dir)) { + if (chdir(e->command_dir)) { printf("could not change directory to \"%s\": %s", - e->s->command_dir, strerror(errno)); + e->command_dir, strerror(errno)); exit(EXIT_FAILURE); } execv(argv[0], argv); @@ -253,11 +240,12 @@ executer_remote_packet_cb(struct relay *r, void *e_) struct child *child; VLOG_INFO("started \"%s\" subprocess", argv[0]); - send_child_status(r, request->header.xid, NXT_STATUS_STARTED, NULL, 0); + send_child_status(rconn, request->header.xid, NXT_STATUS_STARTED, + NULL, 0); child = &e->children[e->n_children++]; child->name = xstrdup(argv[0]); child->pid = pid; - child->relay = r; + child->rconn = rconn; child->xid = request->header.xid; child->output_fd = output_fds[0]; child->output = xmalloc(MAX_OUTPUT); @@ -266,7 +254,7 @@ executer_remote_packet_cb(struct relay *r, void *e_) close(output_fds[1]); } else { VLOG_WARN("fork failed: %s", strerror(errno)); - send_child_message(r, request->header.xid, NXT_STATUS_ERROR, + send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR, "internal error (fork)"); close(output_fds[0]); close(output_fds[1]); @@ -276,14 +264,14 @@ done: free(exec_file); free(args); free(argv); - return true; + return 0; } static void -send_child_status(struct relay *relay, uint32_t xid, uint32_t status, +send_child_status(struct rconn *rconn, uint32_t xid, uint32_t status, const void *data, size_t size) { - if (relay) { + if (rconn) { struct nx_command_reply *r; struct ofpbuf *buffer; @@ -293,17 +281,17 @@ send_child_status(struct relay *relay, uint32_t xid, uint32_t status, r->status = htonl(status); ofpbuf_put(buffer, data, size); update_openflow_length(buffer); - if (rconn_send(relay->halves[HALF_REMOTE].rconn, buffer, NULL)) { + if (rconn_send(rconn, buffer, NULL)) { ofpbuf_delete(buffer); } } } static void -send_child_message(struct relay *relay, uint32_t xid, uint32_t status, +send_child_message(struct rconn *rconn, uint32_t xid, uint32_t status, const char *message) { - send_child_status(relay, xid, status, message, strlen(message)); + send_child_status(rconn, xid, status, message, strlen(message)); } /* 'child' died with 'status' as its return code. Deal with it. */ @@ -346,7 +334,7 @@ child_terminated(struct child *child, int status) if (WCOREDUMP(status)) { ofp_status |= NXT_STATUS_COREDUMP; } - send_child_status(child->relay, child->xid, ofp_status, + send_child_status(child->rconn, child->xid, ofp_status, child->output, child->output_size); } @@ -376,10 +364,9 @@ poll_child(struct child *child) child->output_fd = -1; } -static void -executer_periodic_cb(void *e_) +void +executer_run(struct executer *e) { - struct executer *e = e_; char buffer[MAX_CHILDREN]; size_t i; @@ -427,10 +414,9 @@ executer_periodic_cb(void *e_) } -static void -executer_wait_cb(void *e_) +void +executer_wait(struct executer *e) { - struct executer *e = e_; if (e->n_children) { size_t i; @@ -447,10 +433,9 @@ executer_wait_cb(void *e_) } } -static void -executer_closing_cb(struct relay *r, void *e_) +void +executer_rconn_closing(struct executer *e, struct rconn *rconn) { - struct executer *e = e_; size_t i; /* If any of our children was connected to 'r', then disconnect it so we @@ -458,8 +443,8 @@ executer_closing_cb(struct relay *r, void *e_) * later. * XXX kill the children started by 'r'? */ for (i = 0; i < e->n_children; i++) { - if (e->children[i].relay == r) { - e->children[i].relay = NULL; + if (e->children[i].rconn == rconn) { + e->children[i].rconn = NULL; } } } @@ -472,17 +457,8 @@ sigchld_handler(int signr UNUSED) write(child_fd, "", 1); } -static const struct hook_class executer_hook_class = { - NULL, /* local_packet_cb */ - executer_remote_packet_cb, /* remote_packet_cb */ - executer_periodic_cb, /* periodic_cb */ - executer_wait_cb, /* wait_cb */ - executer_closing_cb, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -executer_start(struct secchan *secchan, const struct settings *settings) +struct executer * +executer_create(const char *command_acl, const char *command_dir) { struct executer *e; struct sigaction sa; @@ -511,11 +487,11 @@ executer_start(struct secchan *secchan, const struct settings *settings) ofp_fatal(errno, "sigaction(SIGCHLD) failed"); } - /* Add hook. */ e = xcalloc(1, sizeof *e); - e->s = settings; + e->command_acl = command_acl; + e->command_dir = command_dir; e->n_children = 0; e->wait_fd = fds[0]; e->null_fd = null_fd; - add_hook(secchan, &executer_hook_class, e); + return e; } diff --git a/secchan/executer.h b/secchan/executer.h index 8e9ad3d8..0429a879 100644 --- a/secchan/executer.h +++ b/secchan/executer.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -34,9 +34,15 @@ #ifndef EXECUTER_H #define EXECUTER_H 1 -struct secchan; -struct settings; +struct nicira_header; +struct rconn; -void executer_start(struct secchan *, const struct settings *); +struct executer *executer_create(const char *command_acl, + const char *command_dir); +void executer_run(struct executer *); +void executer_wait(struct executer *); +void executer_rconn_closing(struct executer *, struct rconn *); +int executer_handle_request(struct executer *, struct rconn *, + struct nicira_header *); #endif /* executer.h */ diff --git a/secchan/fail-open.c b/secchan/fail-open.c index a4d0d83d..605e25bd 100644 --- a/secchan/fail-open.c +++ b/secchan/fail-open.c @@ -33,128 +33,124 @@ #include #include "fail-open.h" -#include -#include -#include -#include "learning-switch.h" -#include "netdev.h" -#include "packets.h" -#include "port-watcher.h" +#include +#include "flow.h" +#include "mac-learning.h" +#include "ofproto.h" #include "rconn.h" -#include "secchan.h" #include "status.h" -#include "stp-secchan.h" #include "timeval.h" #define THIS_MODULE VLM_fail_open #include "vlog.h" -struct fail_open_data { - const struct settings *s; - struct rconn *local_rconn; - struct rconn *remote_rconn; - struct lswitch *lswitch; +struct fail_open { + struct rconn *controller; + int trigger_duration; int last_disconn_secs; - time_t boot_deadline; + struct mac_learning *mac_learning; }; /* Causes 'r' to enter or leave fail-open mode, if appropriate. */ -static void -fail_open_periodic_cb(void *fail_open_) -{ - struct fail_open_data *fail_open = fail_open_; - int disconn_secs; - bool open; - if (time_now() < fail_open->boot_deadline) { - return; - } - disconn_secs = rconn_failure_duration(fail_open->remote_rconn); - open = disconn_secs >= fail_open->s->probe_interval * 3; - if (open != (fail_open->lswitch != NULL)) { +void +fail_open_run(struct fail_open *fo) +{ + int disconn_secs = rconn_failure_duration(fo->controller); + bool open = disconn_secs >= fo->trigger_duration; + if (open != (fo->mac_learning != NULL)) { if (!open) { VLOG_WARN("No longer in fail-open mode"); - lswitch_destroy(fail_open->lswitch); - fail_open->lswitch = NULL; + mac_learning_destroy(fo->mac_learning); + fo->mac_learning = NULL; } else { VLOG_WARN("Could not connect to controller for %d seconds, " "failing open", disconn_secs); - fail_open->lswitch = lswitch_create(fail_open->local_rconn, true, - fail_open->s->max_idle); - fail_open->last_disconn_secs = disconn_secs; + fo->mac_learning = mac_learning_create(); + fo->last_disconn_secs = disconn_secs; } - } else if (open && disconn_secs > fail_open->last_disconn_secs + 60) { + } else if (open && disconn_secs > fo->last_disconn_secs + 60) { VLOG_INFO("Still in fail-open mode after %d seconds disconnected " "from controller", disconn_secs); - fail_open->last_disconn_secs = disconn_secs; + fo->last_disconn_secs = disconn_secs; } - if (fail_open->lswitch) { - lswitch_run(fail_open->lswitch, fail_open->local_rconn); + if (fo->mac_learning) { + mac_learning_run(fo->mac_learning, NULL); } } -static void -fail_open_wait_cb(void *fail_open_) +void +fail_open_wait(struct fail_open *fo) { - struct fail_open_data *fail_open = fail_open_; - if (fail_open->lswitch) { - lswitch_wait(fail_open->lswitch); + if (fo->mac_learning) { + mac_learning_wait(fo->mac_learning); } } -static bool -fail_open_local_packet_cb(struct relay *r, void *fail_open_) +bool +fail_open_handle_flow_miss(struct fail_open *fo, struct ofproto *ofproto, + uint16_t in_port, const flow_t *flow, + const struct ofpbuf *payload) { - struct fail_open_data *fail_open = fail_open_; - if (rconn_is_connected(fail_open->remote_rconn) || !fail_open->lswitch) { + /* -1 (FLOOD) is coincidentally the value returned by mac_learning_lookup() + * when it doesn't have a entry for that address. */ + enum { FLOOD = -1, DROP = -2 }; + union ofp_action action; + int out_port; + + if (rconn_is_connected(fo->controller) || !fo->mac_learning) { return false; + } + + if (mac_learning_learn(fo->mac_learning, flow->dl_src, 0, in_port)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + VLOG_DBG_RL(&rl, "learned that "ETH_ADDR_FMT" is on port %"PRIu16, + ETH_ADDR_ARGS(flow->dl_src), in_port); + } + + out_port = (eth_addr_is_reserved(flow->dl_src) ? DROP + : mac_learning_lookup(fo->mac_learning, flow->dl_dst, 0)); + memset(&action, 0, sizeof action); + action.output.type = htons(OFPAT_OUTPUT); + action.output.len = htons(sizeof action); + if (in_port == out_port || out_port == DROP) { + /* Set up a flow to drop packets. */ + ofproto_setup_exact_flow(ofproto, flow, NULL, 0, NULL); + } else if (out_port != FLOOD) { + /* The output port is known, so add a new flow. */ + action.output.port = htons(out_port); + ofproto_setup_exact_flow(ofproto, flow, &action, 1, payload); } else { - lswitch_process_packet(fail_open->lswitch, fail_open->local_rconn, - r->halves[HALF_LOCAL].rxbuf); - rconn_run(fail_open->local_rconn); - return true; + /* We don't know that MAC. Send along the packet without setting up a + * flow. */ + action.output.port = htons(OFPP_FLOOD); + ofproto_send_packet(ofproto, flow, &action, 1, payload); } + return true; } static void -fail_open_status_cb(struct status_reply *sr, void *fail_open_) +fail_open_status_cb(struct status_reply *sr, void *fo_) { - struct fail_open_data *fail_open = fail_open_; - const struct settings *s = fail_open->s; - int trigger_duration = s->probe_interval * 3; - int cur_duration = rconn_failure_duration(fail_open->remote_rconn); + struct fail_open *fo = fo_; + int cur_duration = rconn_failure_duration(fo->controller); - status_reply_put(sr, "trigger-duration=%d", trigger_duration); + status_reply_put(sr, "trigger-duration=%d", fo->trigger_duration); status_reply_put(sr, "current-duration=%d", cur_duration); status_reply_put(sr, "triggered=%s", - cur_duration >= trigger_duration ? "true" : "false"); - status_reply_put(sr, "max-idle=%d", s->max_idle); + cur_duration >= fo->trigger_duration ? "true" : "false"); } -static const struct hook_class fail_open_hook_class = { - fail_open_local_packet_cb, /* local_packet_cb */ - NULL, /* remote_packet_cb */ - fail_open_periodic_cb, /* periodic_cb */ - fail_open_wait_cb, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -fail_open_start(struct secchan *secchan, const struct settings *s, - struct switch_status *ss, - struct rconn *local_rconn, struct rconn *remote_rconn) +struct fail_open * +fail_open_create(int trigger_duration, struct switch_status *switch_status, + struct rconn *controller) { - struct fail_open_data *fail_open = xmalloc(sizeof *fail_open); - fail_open->s = s; - fail_open->local_rconn = local_rconn; - fail_open->remote_rconn = remote_rconn; - fail_open->lswitch = NULL; - fail_open->boot_deadline = time_now() + s->probe_interval * 3; - if (s->enable_stp) { - fail_open->boot_deadline += STP_EXTRA_BOOT_TIME; - } - switch_status_register_category(ss, "fail-open", - fail_open_status_cb, fail_open); - add_hook(secchan, &fail_open_hook_class, fail_open); + struct fail_open *fo = xmalloc(sizeof *fo); + fo->controller = controller; + fo->trigger_duration = trigger_duration; + fo->last_disconn_secs = 0; + fo->mac_learning = NULL; + switch_status_register_category(switch_status, "fail-open", + fail_open_status_cb, fo); + return fo; } diff --git a/secchan/fail-open.h b/secchan/fail-open.h index 69a3b310..f8ac32c5 100644 --- a/secchan/fail-open.h +++ b/secchan/fail-open.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -34,13 +34,22 @@ #ifndef FAIL_OPEN_H #define FAIL_OPEN_H 1 +#include +#include +#include "flow.h" + +struct fail_open; +struct ofproto; struct rconn; -struct secchan; -struct settings; struct switch_status; -void fail_open_start(struct secchan *, const struct settings *, - struct switch_status *, - struct rconn *local, struct rconn *remote); +struct fail_open *fail_open_create(int trigger_duration, + struct switch_status *, + struct rconn *controller); +void fail_open_wait(struct fail_open *); +void fail_open_run(struct fail_open *); +bool fail_open_handle_flow_miss(struct fail_open *, struct ofproto *, + uint16_t in_port, const flow_t *, + const struct ofpbuf *payload); #endif /* fail-open.h */ diff --git a/secchan/flow-end.c b/secchan/flow-end.c deleted file mode 100644 index d77cbda7..00000000 --- a/secchan/flow-end.c +++ /dev/null @@ -1,379 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "openflow/nicira-ext.h" -#include "openflow/openflow.h" -#include "cfg.h" -#include "flow-end.h" -#include "netflow.h" -#include "ofpbuf.h" -#include "rconn.h" -#include "secchan.h" -#include "socket-util.h" -#include "svec.h" -#include "vconn.h" -#include "xtoxll.h" - -#define THIS_MODULE VLM_flow_end -#include "vlog.h" - - -#define MAX_COLLECTORS 8 - -struct flow_end_data { - const struct settings *s; - - struct rconn *remote_rconn; - struct rconn *local_rconn; - - bool send_ofp_exp; /* Send OpenFlow 'flow expired' messages? */ - - int netflow_fds[MAX_COLLECTORS]; /* Sockets for NetFlow collectors. */ - uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */ -}; - -static int -udp_open(char *dst) -{ - char *save_ptr; - const char *host_name; - const char *port_string; - struct sockaddr_in sin; - int retval; - int fd; - - /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that - * can cause segfaults here: - * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. - * Using "::" instead of the obvious ":" works around it. */ - host_name = strtok_r(dst, "::", &save_ptr); - port_string = strtok_r(NULL, "::", &save_ptr); - if (!host_name) { - ofp_error(0, "%s: bad peer name format", dst); - return -EAFNOSUPPORT; - } - if (!port_string) { - ofp_error(0, "%s: bad port format", dst); - return -EAFNOSUPPORT; - } - - memset(&sin, 0, sizeof sin); - sin.sin_family = AF_INET; - if (lookup_ip(host_name, &sin.sin_addr)) { - return -ENOENT; - } - sin.sin_port = htons(atoi(port_string)); - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - VLOG_ERR("%s: socket: %s", dst, strerror(errno)); - return -errno; - } - - retval = set_nonblocking(fd); - if (retval) { - close(fd); - return -retval; - } - - retval = connect(fd, (struct sockaddr *) &sin, sizeof sin); - if (retval < 0) { - int error = errno; - VLOG_ERR("%s: connect: %s", dst, strerror(error)); - close(fd); - return -error; - } - - return fd; -} - -static void -send_netflow_msg(const struct nx_flow_end *nfe, struct flow_end_data *fe) -{ - struct netflow_v5_header *nf_hdr; - struct netflow_v5_record *nf_rec; - uint8_t buf[sizeof(*nf_hdr) + sizeof(*nf_rec)]; - uint8_t *p = buf; - struct timeval now; - int i; - - /* We only send NetFlow messages for fully specified IP flows; any - * entry with a wildcard is ignored. */ - if ((nfe->match.wildcards != 0) - || (nfe->match.dl_type != htons(ETH_TYPE_IP))) { - return; - } - - memset(&buf, 0, sizeof(buf)); - gettimeofday(&now, NULL); - - nf_hdr = (struct netflow_v5_header *)p; - p += sizeof(*nf_hdr); - nf_rec = (struct netflow_v5_record *)p; - - nf_hdr->version = htons(NETFLOW_V5_VERSION); - nf_hdr->count = htons(1); - nf_hdr->sysuptime = htonl((uint32_t)ntohll(nfe->end_time)); - nf_hdr->unix_secs = htonl(now.tv_sec); - nf_hdr->unix_nsecs = htonl(now.tv_usec * 1000); - nf_hdr->flow_seq = htonl(fe->netflow_cnt); - nf_hdr->engine_type = 0; - nf_hdr->engine_id = 0; - nf_hdr->sampling_interval = htons(0); - - nf_rec->src_addr = nfe->match.nw_src; - nf_rec->dst_addr = nfe->match.nw_dst; - nf_rec->nexthop = htons(0); - nf_rec->input = nfe->match.in_port; - nf_rec->output = htons(0); - nf_rec->packet_count = htonl((uint32_t)ntohll(nfe->packet_count)); - nf_rec->byte_count = htonl((uint32_t)ntohll(nfe->byte_count)); - nf_rec->init_time = htonl((uint32_t)ntohll(nfe->init_time)); - nf_rec->used_time = htonl((uint32_t)ntohll(nfe->used_time)); - - if (nfe->match.nw_proto == IP_TYPE_ICMP) { - /* In NetFlow, the ICMP type and code are concatenated and - * placed in the 'dst_port' field. */ - uint8_t type = (uint8_t)ntohs(nfe->match.tp_src); - uint8_t code = (uint8_t)ntohs(nfe->match.tp_dst); - nf_rec->src_port = htons(0); - nf_rec->dst_port = htons((type << 8) | code); - } else { - nf_rec->src_port = nfe->match.tp_src; - nf_rec->dst_port = nfe->match.tp_dst; - } - - nf_rec->tcp_flags = nfe->tcp_flags; - nf_rec->ip_proto = nfe->match.nw_proto; - nf_rec->ip_tos = nfe->ip_tos; - - nf_rec->src_as = htons(0); - nf_rec->dst_as = htons(0); - nf_rec->src_mask = 0; - nf_rec->dst_mask = 0; - - for (i=0; inetflow_fds[i] == -1) { - break; - } - send(fe->netflow_fds[i], buf, sizeof(buf), 0); - } - fe->netflow_cnt++; -} - -static void -send_ofp_expired(const struct nx_flow_end *nfe, const struct flow_end_data *fe) -{ - struct ofp_flow_expired *ofe; - struct ofpbuf *b; - - if ((nfe->reason != NXFER_IDLE_TIMEOUT) - && (nfe->reason != NXFER_HARD_TIMEOUT)) { - return; - } - - ofe = make_openflow(sizeof(*ofe), OFPT_FLOW_EXPIRED, &b); - ofe->match = nfe->match; - ofe->priority = nfe->priority; - if (nfe->reason == NXFER_IDLE_TIMEOUT) { - ofe->reason = OFPER_IDLE_TIMEOUT; - } else { - ofe->reason = OFPER_HARD_TIMEOUT; - } - /* 'duration' is in seconds, but we keeping track of milliseconds. */ - ofe->duration = htonl((ntohll(nfe->end_time)-ntohll(nfe->init_time))/1000); - ofe->packet_count = nfe->packet_count; - ofe->byte_count = nfe->byte_count; - - rconn_send(fe->remote_rconn, b, NULL); -} - -static void -send_nx_flow_end_config(const struct flow_end_data *fe) -{ - struct nx_flow_end_config *nfec; - struct ofpbuf *b; - - nfec = make_openflow(sizeof(*nfec), OFPT_VENDOR, &b); - nfec->header.vendor = htonl(NX_VENDOR_ID); - nfec->header.subtype = htonl(NXT_FLOW_END_CONFIG); - if ((fe->send_ofp_exp == false) && (fe->netflow_fds[0] < 0)) { - nfec->enable = 0; - } else { - nfec->enable = 1; - } - - rconn_send(fe->local_rconn, b, NULL); -} - -static bool -flow_end_local_packet_cb(struct relay *r, void *flow_end_) -{ - struct flow_end_data *fe = flow_end_; - struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf; - struct nicira_header *request = msg->data; - struct nx_flow_end *nfe = msg->data; - - - if (msg->size < sizeof(*nfe)) { - return false; - } - request = msg->data; - if (request->header.type != OFPT_VENDOR - || request->vendor != htonl(NX_VENDOR_ID) - || request->subtype != htonl(NXT_FLOW_END)) { - return false; - } - - if (fe->netflow_fds[0] >= 0) { - send_netflow_msg(nfe, fe); - } - - if (fe->send_ofp_exp) { - send_ofp_expired(nfe, fe); - } - - /* We always consume these Flow End messages. */ - return true; -} - -static bool -flow_end_remote_packet_cb(struct relay *r, void *flow_end_) -{ - struct flow_end_data *fe = flow_end_; - struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; - struct ofp_switch_config *osc = msg->data; - - /* Check for OFPT_SET_CONFIG messages to see if the controller wants - * to receive 'flow expired' messages. If so, we need to intercept - * the datapath's 'flow end' meta-messages and convert. */ - - if ((msg->size < sizeof(*osc)) - || (osc->header.type != OFPT_SET_CONFIG)) { - return false; - } - - if (osc->flags & htons(OFPC_SEND_FLOW_EXP)) { - fe->send_ofp_exp = true; - } else { - fe->send_ofp_exp = false; - } - - send_nx_flow_end_config(fe); - - return false; -} - -static void -flow_end_reconfigure_cb(void *flow_end_) -{ - int i, nf_idx=0; - struct flow_end_data *fe = flow_end_; - struct svec collectors; - - /* Configure NetFlow collectors. */ - for (i=0; inetflow_fds[i] >= 0) { - close(fe->netflow_fds[i]); - fe->netflow_fds[i] = -1; - } - } - - svec_init(&collectors); - cfg_get_all_keys(&collectors, "netflow.%s.host", fe->s->br_name); - svec_sort(&collectors); - if (!svec_is_unique(&collectors)) { - VLOG_WARN("%s specified twice as netflow collector", - svec_get_duplicate(&collectors)); - svec_unique(&collectors); - } - - for (i=0; i= MAX_COLLECTORS) { - VLOG_WARN("too many netflow collectors specified, ignoring %s\n", - collectors.names[i]); - continue; - } - - fe->netflow_fds[nf_idx] = udp_open(collectors.names[i]); - if (fe->netflow_fds[nf_idx] < 0) { - VLOG_WARN("couldn't open connection to collector, ignoring %s\n", - collectors.names[i]); - } else { - nf_idx++; - } - } - - if (nf_idx > 0) { - send_nx_flow_end_config(fe); - } -} - -static const struct hook_class flow_end_hook_class = { - flow_end_local_packet_cb, /* local_packet_cb */ - flow_end_remote_packet_cb, /* remote_packet_cb */ - NULL, /* periodic_cb */ - NULL, /* wait_cb */ - NULL, /* closing_cb */ - flow_end_reconfigure_cb, /* reconfigure_cb */ -}; - -void -flow_end_start(struct secchan *secchan, const struct settings *settings, - struct rconn *local, struct rconn *remote) -{ - int i; - struct flow_end_data *fe; - - fe = xcalloc(1, sizeof *fe); - - fe->s = settings; - fe->remote_rconn = remote; - fe->local_rconn = local; - - for (i=0; inetflow_fds[i] = -1; - } - fe->send_ofp_exp = false; - - add_hook(secchan, &flow_end_hook_class, fe); -} diff --git a/secchan/flow-end.h b/secchan/flow-end.h deleted file mode 100644 index 4ffad59a..00000000 --- a/secchan/flow-end.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef FLOW_END_H -#define FLOW_END_H 1 - - -struct secchan; -struct settings; -struct rconn; - -void flow_end_start(struct secchan *, const struct settings *, - struct rconn *, struct rconn *); - -#endif /* flow-end.h */ diff --git a/secchan/in-band.c b/secchan/in-band.c index 5ea06e07..bf5e904c 100644 --- a/secchan/in-band.c +++ b/secchan/in-band.c @@ -37,15 +37,16 @@ #include #include #include +#include "dpif.h" #include "flow.h" #include "mac-learning.h" #include "netdev.h" +#include "ofp-print.h" +#include "ofproto.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" -#include "port-watcher.h" #include "rconn.h" -#include "secchan.h" #include "status.h" #include "timeval.h" #include "vconn.h" @@ -53,24 +54,16 @@ #define THIS_MODULE VLM_in_band #include "vlog.h" -struct in_band_data { - const struct settings *s; - struct mac_learning *ml; - struct netdev *of_device; +struct in_band { + struct mac_learning *mac_learning; + struct netdev *netdev; struct rconn *controller; - int n_queued; }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); -static void -queue_tx(struct rconn *rc, struct in_band_data *in_band, struct ofpbuf *b) -{ - rconn_send_with_limit(rc, b, &in_band->n_queued, 10); -} - static const uint8_t * -get_controller_mac(struct in_band_data *in_band) +get_controller_mac(struct in_band *in_band) { static uint32_t ip, last_nonzero_ip; static uint8_t mac[ETH_ADDR_LEN], last_nonzero_mac[ETH_ADDR_LEN]; @@ -86,8 +79,8 @@ get_controller_mac(struct in_band_data *in_band) /* Look up MAC address. */ memset(mac, 0, sizeof mac); - if (ip && in_band->of_device) { - int retval = netdev_arp_lookup(in_band->of_device, ip, mac); + if (ip) { + int retval = netdev_arp_lookup(in_band->netdev, ip, mac); if (retval) { VLOG_DBG_RL(&rl, "cannot look up controller hw address " "("IP_FMT"): %s", IP_ARGS(&ip), strerror(retval)); @@ -120,110 +113,91 @@ get_controller_mac(struct in_band_data *in_band) static bool is_controller_mac(const uint8_t dl_addr[ETH_ADDR_LEN], - struct in_band_data *in_band) + struct in_band *in_band) { const uint8_t *mac = get_controller_mac(in_band); return mac && eth_addr_equals(mac, dl_addr); } static void -in_band_learn_mac(struct in_band_data *in_band, +in_band_learn_mac(struct in_band *in_band, uint16_t in_port, const uint8_t src_mac[ETH_ADDR_LEN]) { - if (mac_learning_learn(in_band->ml, src_mac, 0, in_port)) { + if (mac_learning_learn(in_band->mac_learning, src_mac, 0, in_port)) { VLOG_DBG_RL(&rl, "learned that "ETH_ADDR_FMT" is on port %"PRIu16, ETH_ADDR_ARGS(src_mac), in_port); } } -static bool -in_band_local_packet_cb(struct relay *r, void *in_band_) +bool +in_band_handle_flow_miss(struct in_band *in_band, struct ofproto *ofproto, + uint16_t in_port, const flow_t *flow, + const struct ofpbuf *payload) { - struct in_band_data *in_band = in_band_; - struct rconn *rc = r->halves[HALF_LOCAL].rconn; - struct ofp_packet_in *opi; - struct eth_header *eth; - struct ofpbuf payload; - struct flow flow; - uint16_t in_port; + /* -1 (FLOOD) is coincidentally the value returned by mac_learning_lookup() + * when it doesn't have a entry for that address. */ + enum { FLOOD = -1, DROP = -2 }; + union ofp_action action; int out_port; - if (!get_ofp_packet_eth_header(r, &opi, ð) || !in_band->of_device) { - return false; - } - in_port = ntohs(opi->in_port); - get_ofp_packet_payload(opi, &payload); - flow_extract(&payload, in_port, &flow); - /* Deal with local stuff. */ - if (in_port == OFPP_LOCAL) { + if (in_port == ODPP_LOCAL) { /* Sent by secure channel. */ - out_port = mac_learning_lookup(in_band->ml, eth->eth_dst, 0); - } else if (eth_addr_equals(eth->eth_dst, - netdev_get_etheraddr(in_band->of_device))) { + out_port = mac_learning_lookup(in_band->mac_learning, flow->dl_dst, 0); + } else if (eth_addr_equals(flow->dl_dst, + netdev_get_etheraddr(in_band->netdev))) { /* Sent to secure channel. */ - out_port = OFPP_LOCAL; - in_band_learn_mac(in_band, in_port, eth->eth_src); - } else if (eth->eth_type == htons(ETH_TYPE_ARP) - && eth_addr_is_broadcast(eth->eth_dst) - && is_controller_mac(eth->eth_src, in_band)) { + out_port = ODPP_LOCAL; + in_band_learn_mac(in_band, in_port, flow->dl_src); + } else if (flow->dl_type == htons(ETH_TYPE_ARP) + && eth_addr_is_broadcast(flow->dl_dst) + && is_controller_mac(flow->dl_src, in_band)) { /* ARP sent by controller. */ - out_port = OFPP_FLOOD; - } else if ((is_controller_mac(eth->eth_dst, in_band) - || is_controller_mac(eth->eth_src, in_band)) - && flow.dl_type == htons(ETH_TYPE_IP) - && flow.nw_proto == IP_TYPE_TCP - && (flow.tp_src == htons(OFP_TCP_PORT) - || flow.tp_src == htons(OFP_SSL_PORT) - || flow.tp_dst == htons(OFP_TCP_PORT) - || flow.tp_dst == htons(OFP_SSL_PORT))) { + out_port = FLOOD; + } else if ((is_controller_mac(flow->dl_dst, in_band) || + is_controller_mac(flow->dl_src, in_band)) + && flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_TCP + && (flow->tp_src == htons(OFP_TCP_PORT) || + flow->tp_src == htons(OFP_SSL_PORT) || + flow->tp_dst == htons(OFP_TCP_PORT) || + flow->tp_dst == htons(OFP_SSL_PORT))) { /* Traffic to or from controller. Switch it by hand. */ - in_band_learn_mac(in_band, in_port, eth->eth_src); - out_port = mac_learning_lookup(in_band->ml, eth->eth_dst, 0); + in_band_learn_mac(in_band, in_port, flow->dl_src); + out_port = mac_learning_lookup(in_band->mac_learning, flow->dl_dst, 0); } else { - const uint8_t *controller_mac; - controller_mac = get_controller_mac(in_band); - if (eth->eth_type == htons(ETH_TYPE_ARP) - && eth_addr_is_broadcast(eth->eth_dst) - && is_controller_mac(eth->eth_src, in_band)) { + const uint8_t *controller_mac = get_controller_mac(in_band); + if (flow->dl_type == htons(ETH_TYPE_ARP) + && eth_addr_is_broadcast(flow->dl_dst) + && is_controller_mac(flow->dl_src, in_band)) { /* ARP sent by controller. */ - out_port = OFPP_FLOOD; - } else if (is_controller_mac(eth->eth_dst, in_band) - && in_port == mac_learning_lookup(in_band->ml, + out_port = FLOOD; + } else if (is_controller_mac(flow->dl_dst, in_band) + && in_port == mac_learning_lookup(in_band->mac_learning, controller_mac, 0)) { /* Drop controller traffic that arrives on the controller port. */ - out_port = -1; + out_port = DROP; } else { return false; } } - if (in_port == out_port) { - /* The input and output port match. Set up a flow to drop packets. */ - queue_tx(rc, in_band, make_add_flow(&flow, ntohl(opi->buffer_id), - in_band->s->max_idle, 0)); - } else if (out_port != OFPP_FLOOD) { + memset(&action, 0, sizeof action); + action.output.type = htons(OFPAT_OUTPUT); + action.output.len = htons(sizeof action); + if (in_port == out_port || out_port == DROP) { + /* Set up a flow to drop packets. */ + ofproto_setup_exact_flow(ofproto, flow, NULL, 0, NULL); + } else if (out_port != FLOOD) { /* The output port is known, so add a new flow. */ - queue_tx(rc, in_band, - make_add_simple_flow(&flow, ntohl(opi->buffer_id), - out_port, in_band->s->max_idle)); - - /* If the switch didn't buffer the packet, we need to send a copy. */ - if (ntohl(opi->buffer_id) == UINT32_MAX) { - queue_tx(rc, in_band, - make_unbuffered_packet_out(&payload, in_port, out_port)); - } + action.output.port = htons(out_port); + ofproto_setup_exact_flow(ofproto, flow, &action, 1, payload); } else { /* We don't know that MAC. Send along the packet without setting up a * flow. */ - struct ofpbuf *b; - if (ntohl(opi->buffer_id) == UINT32_MAX) { - b = make_unbuffered_packet_out(&payload, in_port, out_port); - } else { - b = make_buffered_packet_out(ntohl(opi->buffer_id), - in_port, out_port); - } - queue_tx(rc, in_band, b); + action.type = htons(OFPAT_OUTPUT); + action.output.port = htons(OFPP_FLOOD); + ofproto_send_packet(ofproto, flow, &action, 1, payload); } return true; } @@ -231,102 +205,66 @@ in_band_local_packet_cb(struct relay *r, void *in_band_) static void in_band_status_cb(struct status_reply *sr, void *in_band_) { - struct in_band_data *in_band = in_band_; + struct in_band *in_band = in_band_; struct in_addr local_ip; uint32_t controller_ip; const uint8_t *controller_mac; + const uint8_t *mac; - if (in_band->of_device) { - const uint8_t *mac = netdev_get_etheraddr(in_band->of_device); - if (netdev_get_in4(in_band->of_device, &local_ip)) { - status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr)); - } - status_reply_put(sr, "local-mac="ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); + mac = netdev_get_etheraddr(in_band->netdev); + if (netdev_get_in4(in_band->netdev, &local_ip)) { + status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr)); + } + status_reply_put(sr, "local-mac="ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); - controller_ip = rconn_get_ip(in_band->controller); - if (controller_ip) { - status_reply_put(sr, "controller-ip="IP_FMT, - IP_ARGS(&controller_ip)); - } - controller_mac = get_controller_mac(in_band); - if (controller_mac) { - status_reply_put(sr, "controller-mac="ETH_ADDR_FMT, - ETH_ADDR_ARGS(controller_mac)); - } + controller_ip = rconn_get_ip(in_band->controller); + if (controller_ip) { + status_reply_put(sr, "controller-ip="IP_FMT, + IP_ARGS(&controller_ip)); + } + controller_mac = get_controller_mac(in_band); + if (controller_mac) { + status_reply_put(sr, "controller-mac="ETH_ADDR_FMT, + ETH_ADDR_ARGS(controller_mac)); } } void -get_ofp_packet_payload(struct ofp_packet_in *opi, struct ofpbuf *payload) +in_band_run(struct in_band *in_band) { - payload->data = opi->data; - payload->size = ntohs(opi->header.length) - offsetof(struct ofp_packet_in, - data); + mac_learning_run(in_band->mac_learning, NULL); } -static void -in_band_local_port_cb(const struct ofp_phy_port *port, void *in_band_) -{ - struct in_band_data *in_band = in_band_; - if (port) { - char name[sizeof port->name + 1]; - get_port_name(port, name, sizeof name); - - if (!in_band->of_device - || strcmp(netdev_get_name(in_band->of_device), name)) - { - int error; - netdev_close(in_band->of_device); - error = netdev_open(name, NETDEV_ETH_TYPE_NONE, - &in_band->of_device); - if (error) { - VLOG_ERR("failed to open in-band control network device " - "\"%s\": %s", name, strerror(errno)); - } - } - } else { - netdev_close(in_band->of_device); - in_band->of_device = NULL; - } -} - -static void -in_band_periodic_cb(void *in_band_) +void +in_band_wait(struct in_band *in_band) { - struct in_band_data *in_band = in_band_; - mac_learning_run(in_band->ml, NULL); + mac_learning_wait(in_band->mac_learning); } -static void -in_band_wait_cb(void *in_band_) +struct in_band * +in_band_create(struct dpif *dpif, struct switch_status *ss, + struct rconn *controller) { - struct in_band_data *in_band = in_band_; - mac_learning_wait(in_band->ml); -} + struct in_band *in_band; + struct netdev *netdev; + struct odp_port port; + int error; -static const struct hook_class in_band_hook_class = { - in_band_local_packet_cb, /* local_packet_cb */ - NULL, /* remote_packet_cb */ - in_band_periodic_cb, /* periodic_cb */ - in_band_wait_cb, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; + error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port); + if (error) { + ofp_fatal(error, "failed to query datapath local port"); + } -void -in_band_start(struct secchan *secchan, - const struct settings *s, struct switch_status *ss, - struct port_watcher *pw, struct rconn *remote) -{ - struct in_band_data *in_band; + error = netdev_open(port.devname, NETDEV_ETH_TYPE_NONE, &netdev); + if (error) { + ofp_fatal(error, "failed to open %s network device", port.devname); + } in_band = xcalloc(1, sizeof *in_band); - in_band->s = s; - in_band->ml = mac_learning_create(); - in_band->of_device = NULL; - in_band->controller = remote; + in_band->mac_learning = mac_learning_create(); + in_band->netdev = netdev; + in_band->controller = controller; switch_status_register_category(ss, "in-band", in_band_status_cb, in_band); - port_watcher_register_local_port_callback(pw, in_band_local_port_cb, - in_band); - add_hook(secchan, &in_band_hook_class, in_band); + + return in_band; } diff --git a/secchan/in-band.h b/secchan/in-band.h index b4d21ab9..26636e2c 100644 --- a/secchan/in-band.h +++ b/secchan/in-band.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -34,14 +34,21 @@ #ifndef IN_BAND_H #define IN_BAND_H 1 -struct port_watcher; +#include "flow.h" + +struct dpif; +struct ofproto; struct rconn; struct secchan; struct settings; struct switch_status; -void in_band_start(struct secchan *, const struct settings *, - struct switch_status *, struct port_watcher *, - struct rconn *remote); +struct in_band *in_band_create(struct dpif *, struct switch_status *, + struct rconn *controller); +void in_band_run(struct in_band *); +void in_band_wait(struct in_band *); +bool in_band_handle_flow_miss(struct in_band *, struct ofproto *, + uint16_t in_port, const flow_t *, + const struct ofpbuf *payload); #endif /* in-band.h */ diff --git a/secchan/netflow.c b/secchan/netflow.c new file mode 100644 index 00000000..193109c4 --- /dev/null +++ b/secchan/netflow.c @@ -0,0 +1,313 @@ +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "netflow.h" +#include +#include +#include +#include +#include "cfg.h" +#include "flow.h" +#include "netflow.h" +#include "packets.h" +#include "secchan.h" +#include "socket-util.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "xtoxll.h" + +#define THIS_MODULE VLM_netflow +#include "vlog.h" + +#define NETFLOW_V5_VERSION 5 + +/* Every NetFlow v5 message contains the header that follows. This is + * followed by up to thirty records that describe a terminating flow. + * We only send a single record per NetFlow message. + */ +struct netflow_v5_header { + uint16_t version; /* NetFlow version is 5. */ + uint16_t count; /* Number of records in this message. */ + uint32_t sysuptime; /* System uptime in milliseconds. */ + uint32_t unix_secs; /* Number of seconds since Unix epoch. */ + uint32_t unix_nsecs; /* Number of residual nanoseconds + after epoch seconds. */ + uint32_t flow_seq; /* Number of flows since sending + messages began. */ + uint8_t engine_type; /* Set to zero. */ + uint8_t engine_id; /* Set to zero. */ + uint16_t sampling_interval; /* Set to zero. */ +}; +BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24); + +/* A NetFlow v5 description of a terminating flow. It is preceded by a + * NetFlow v5 header. + */ +struct netflow_v5_record { + uint32_t src_addr; /* Source IP address. */ + uint32_t dst_addr; /* Destination IP address. */ + uint32_t nexthop; /* IP address of next hop. Set to 0. */ + uint16_t input; /* Input interface index. */ + uint16_t output; /* Output interface index. */ + uint32_t packet_count; /* Number of packets. */ + uint32_t byte_count; /* Number of bytes. */ + uint32_t init_time; /* Value of sysuptime on first packet. */ + uint32_t used_time; /* Value of sysuptime on last packet. */ + + /* The 'src_port' and 'dst_port' identify the source and destination + * port, respectively, for TCP and UDP. For ICMP, the high-order + * byte identifies the type and low-order byte identifies the code + * in the 'dst_port' field. */ + uint16_t src_port; + uint16_t dst_port; + + uint8_t pad1; + uint8_t tcp_flags; /* Union of seen TCP flags. */ + uint8_t ip_proto; /* IP protocol. */ + uint8_t ip_tos; /* IP TOS value. */ + uint16_t src_as; /* Source AS ID. Set to 0. */ + uint16_t dst_as; /* Destination AS ID. Set to 0. */ + uint8_t src_mask; /* Source mask bits. Set to 0. */ + uint8_t dst_mask; /* Destination mask bits. Set to 0. */ + uint8_t pad[2]; +}; +BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48); + +#define MAX_COLLECTORS 8 + +struct netflow { + const char *br_name; /* Bridge name, for reading config file. */ + long long int boot_time; /* Time when netflow_create() was called. */ + int netflow_fds[MAX_COLLECTORS]; /* Sockets for NetFlow collectors. */ + uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */ +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +static int +udp_open(char *dst) +{ + char *save_ptr; + const char *host_name; + const char *port_string; + struct sockaddr_in sin; + int retval; + int fd; + + /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that + * can cause segfaults here: + * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. + * Using "::" instead of the obvious ":" works around it. */ + host_name = strtok_r(dst, "::", &save_ptr); + port_string = strtok_r(NULL, "::", &save_ptr); + if (!host_name) { + ofp_error(0, "%s: bad peer name format", dst); + return -EAFNOSUPPORT; + } + if (!port_string) { + ofp_error(0, "%s: bad port format", dst); + return -EAFNOSUPPORT; + } + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + if (lookup_ip(host_name, &sin.sin_addr)) { + return -ENOENT; + } + sin.sin_port = htons(atoi(port_string)); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + VLOG_ERR("%s: socket: %s", dst, strerror(errno)); + return -errno; + } + + retval = set_nonblocking(fd); + if (retval) { + close(fd); + return -retval; + } + + retval = connect(fd, (struct sockaddr *) &sin, sizeof sin); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: connect: %s", dst, strerror(error)); + close(fd); + return -error; + } + + return fd; +} + +void +netflow_expire(struct netflow *nf, const flow_t *flow, + const struct odp_flow_stats *stats, + long long int created) +{ + struct netflow_v5_header nf_hdr; + struct netflow_v5_record nf_rec; + struct timeval now; + long long int used; + int i; + + time_timeval(&now); + + memset(&nf_hdr, 0, sizeof nf_hdr); + nf_hdr.version = htons(NETFLOW_V5_VERSION); + nf_hdr.count = htons(1); + nf_hdr.sysuptime = htonl(time_msec() - nf->boot_time); + nf_hdr.unix_secs = htonl(now.tv_sec); + nf_hdr.unix_nsecs = htonl(now.tv_usec * 1000); + nf_hdr.flow_seq = htonl(nf->netflow_cnt); + nf_hdr.engine_type = 0; + nf_hdr.engine_id = 0; + nf_hdr.sampling_interval = htons(0); + + memset(&nf_rec, 0, sizeof nf_rec); + nf_rec.src_addr = flow->nw_src; + nf_rec.dst_addr = flow->nw_dst; + nf_rec.nexthop = htons(0); + nf_rec.input = htons(flow->in_port); + nf_rec.output = htons(0); + nf_rec.packet_count = htonl(stats->n_packets); + nf_rec.byte_count = htonl(stats->n_bytes); + nf_rec.init_time = htonl(created - nf->boot_time); + used = stats->used_sec * 1000 + stats->used_nsec / 1000000; + nf_rec.used_time = htonl((used > created ? used : created) + - nf->boot_time); + + if (flow->nw_proto == IP_TYPE_ICMP) { + /* In NetFlow, the ICMP type and code are concatenated and + * placed in the 'dst_port' field. */ + uint8_t type = ntohs(flow->tp_src); + uint8_t code = ntohs(flow->tp_dst); + nf_rec.src_port = htons(0); + nf_rec.dst_port = htons((type << 8) | code); + } else { + nf_rec.src_port = flow->tp_src; + nf_rec.dst_port = flow->tp_dst; + } + + nf_rec.tcp_flags = stats->tcp_flags; + nf_rec.ip_proto = flow->nw_proto; + nf_rec.ip_tos = stats->ip_tos; + + nf_rec.src_as = htons(0); + nf_rec.dst_as = htons(0); + nf_rec.src_mask = 0; + nf_rec.dst_mask = 0; + + for (i=0; inetflow_fds[i] == -1) { + break; + } + + iov[0].iov_base = &nf_hdr; + iov[0].iov_len = sizeof nf_hdr; + iov[1].iov_base = &nf_rec; + iov[1].iov_len = sizeof nf_rec; + msghdr.msg_name = NULL; + msghdr.msg_namelen = 0; + msghdr.msg_iov = iov; + msghdr.msg_iovlen = 2; + msghdr.msg_control = NULL; + msghdr.msg_controllen = 0; + msghdr.msg_flags = 0; + if (sendmsg(nf->netflow_fds[i], &msghdr, 0) < 0) { + VLOG_WARN_RL(&rl, "netflow message send failed: %s", + strerror(errno)); + } + } + nf->netflow_cnt++; +} + +void +netflow_reconfigure(struct netflow *nf) +{ + struct svec collectors; + int i, nf_idx=0; + + /* Configure NetFlow collectors. */ + for (i=0; inetflow_fds[i] >= 0) { + close(nf->netflow_fds[i]); + nf->netflow_fds[i] = -1; + } + } + + svec_init(&collectors); + cfg_get_all_keys(&collectors, "netflow.%s.host", nf->br_name); + svec_sort(&collectors); + if (!svec_is_unique(&collectors)) { + VLOG_WARN("%s specified twice as netflow collector", + svec_get_duplicate(&collectors)); + svec_unique(&collectors); + } + + for (i=0; i= MAX_COLLECTORS) { + VLOG_WARN("too many netflow collectors specified, ignoring %s\n", + collectors.names[i]); + continue; + } + + nf->netflow_fds[nf_idx] = udp_open(collectors.names[i]); + if (nf->netflow_fds[nf_idx] < 0) { + VLOG_WARN("couldn't open connection to collector, ignoring %s\n", + collectors.names[i]); + } else { + nf_idx++; + } + } +} + +struct netflow * +netflow_create(const char *br_name) +{ + struct netflow *nf; + int i; + + nf = xmalloc(sizeof *nf); + nf->br_name = br_name; + nf->boot_time = time_msec(); + for (i = 0; i < MAX_COLLECTORS; i++) { + nf->netflow_fds[i] = -1; + } + nf->netflow_cnt = 0; + return nf; +} diff --git a/secchan/netflow.h b/secchan/netflow.h index 365625a3..98dd7bcb 100644 --- a/secchan/netflow.h +++ b/secchan/netflow.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -34,61 +34,14 @@ #ifndef NETFLOW_H #define NETFLOW_H 1 -#include +#include "flow.h" +struct odp_flow_stats; -#define NETFLOW_V5_VERSION 5 - -/* Every NetFlow v5 message contains the header that follows. This is - * followed by up to thirty records that describe a terminating flow. - * We only send a single record per NetFlow message. - */ -struct netflow_v5_header { - uint16_t version; /* NetFlow version is 5. */ - uint16_t count; /* Number of records in this message. */ - uint32_t sysuptime; /* System uptime in milliseconds. */ - uint32_t unix_secs; /* Number of seconds since Unix epoch. */ - uint32_t unix_nsecs; /* Number of residual nanoseconds - after epoch seconds. */ - uint32_t flow_seq; /* Number of flows since sending - messages began. */ - uint8_t engine_type; /* Set to zero. */ - uint8_t engine_id; /* Set to zero. */ - uint16_t sampling_interval; /* Set to zero. */ -}; -BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24); - -/* A NetFlow v5 description of a terminating flow. It is preceded by a - * NetFlow v5 header. - */ -struct netflow_v5_record { - uint32_t src_addr; /* Source IP address. */ - uint32_t dst_addr; /* Destination IP address. */ - uint32_t nexthop; /* IP address of next hop. Set to 0. */ - uint16_t input; /* Input interface index. */ - uint16_t output; /* Output interface index. */ - uint32_t packet_count; /* Number of packets. */ - uint32_t byte_count; /* Number of bytes. */ - uint32_t init_time; /* Value of sysuptime on first packet. */ - uint32_t used_time; /* Value of sysuptime on last packet. */ - - /* The 'src_port' and 'dst_port' identify the source and destination - * port, respectively, for TCP and UDP. For ICMP, the high-order - * byte identifies the type and low-order byte identifies the code - * in the 'dst_port' field. */ - uint16_t src_port; - uint16_t dst_port; - - uint8_t pad1; - uint8_t tcp_flags; /* Union of seen TCP flags. */ - uint8_t ip_proto; /* IP protocol. */ - uint8_t ip_tos; /* IP TOS value. */ - uint16_t src_as; /* Source AS ID. Set to 0. */ - uint16_t dst_as; /* Destination AS ID. Set to 0. */ - uint8_t src_mask; /* Source mask bits. Set to 0. */ - uint8_t dst_mask; /* Destination mask bits. Set to 0. */ - uint8_t pad[2]; -}; -BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48); +struct netflow *netflow_create(const char *br_name); +void netflow_reconfigure(struct netflow *); +void netflow_expire(struct netflow *, const flow_t *, + const struct odp_flow_stats *, + long long int created); #endif /* netflow.h */ diff --git a/secchan/ofproto.c b/secchan/ofproto.c new file mode 100644 index 00000000..f1ac717f --- /dev/null +++ b/secchan/ofproto.c @@ -0,0 +1,2418 @@ +/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "ofproto.h" +#include +#include +#include +#include +#include +#include "classifier.h" +#include "discovery.h" +#include "dpif.h" +#include "executer.h" +#include "fail-open.h" +#include "in-band.h" +#include "netdev.h" +#include "netflow.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "openflow/datapath-protocol.h" +#include "openflow/nicira-ext.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "pinsched.h" +#include "pktbuf.h" +#include "poll-loop.h" +#include "port-array.h" +#include "rconn.h" +#include "secchan.h" +#include "shash.h" +#include "status.h" +#include "svec.h" +#include "timeval.h" +#include "vconn.h" +#include "xtoxll.h" + +#define THIS_MODULE VLM_ofproto +#include "vlog.h" + +enum { + DP_GROUP_FLOOD = 0, + DP_GROUP_ALL = 1 +}; + +enum { + TABLEID_HASH = 0, + TABLEID_CLASSIFIER = 1 +}; + +struct ofport { + struct netdev *netdev; + struct ofp_phy_port opp; /* In host byte order. */ +}; + +static void hton_ofp_phy_port(struct ofp_phy_port *opp); + +struct odp_actions { + union odp_action *actions; + size_t n_actions, allocated_actions; +}; + +static void init_actions(struct odp_actions *); +static void free_actions(struct odp_actions *); +static void ofp_actions_to_odp_actions(uint16_t ofp_in_port, + const struct ofp_action_header *in_, + size_t n_in, struct odp_actions *out); + +#define UNKNOWN_SUPER ((struct rule *)-1) +struct rule { + struct cls_rule cr; + + uint16_t idle_timeout; + uint16_t hard_timeout; + long long int used; + long long int created; + uint64_t packet_count; /* Packets from *expired* subrules. */ + uint64_t byte_count; /* Bytes from *expired* subrules. */ + uint8_t tcp_flags; + uint8_t ip_tos; + + struct rule *super; + struct list list; + + /* A subrule has no actions (it uses the super-rule's actions). */ + int n_actions; + union ofp_action actions[]; +}; + +static void rule_destroy(struct rule *); +static inline size_t rule_size(int n_actions); +static struct rule *rule_from_cls_rule(const struct cls_rule *); +static void rule_make_actions(const struct rule *, struct odp_actions *); + +struct ofconn { + struct list node; + struct rconn *rconn; + struct pktbuf *pktbuf; + bool send_flow_exp; + int miss_send_len; +}; + +static struct ofconn *ofconn_create(struct ofproto *, struct rconn *); +static void ofconn_destroy(struct ofconn *, struct ofproto *); +static void ofconn_run(struct ofconn *, struct ofproto *); +static void ofconn_wait(struct ofconn *); +static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn); + +struct ofproto { + /* Settings. */ + uint64_t datapath_id; /* Datapath ID. */ + const char *mfr_desc; /* Manufacturer. */ + const char *hw_desc; /* Hardware. */ + const char *sw_desc; /* Software version. */ + const char *serial_desc; /* Serial number. */ + + /* Datapath. */ + struct dpif dpif; + struct dpifmon *dpifmon; + struct port_array ports; + struct shash port_by_name; + + /* Configuration. */ + struct switch_status *switch_status; + struct in_band *in_band; + struct discovery *discovery; + struct fail_open *fail_open; + struct pinsched *miss_sched, *action_sched; + struct executer *executer; + struct netflow *netflow; + + /* Flow table. */ + struct classifier cls; + bool need_revalidate; + long long int next_expiration; + + /* OpenFlow connections. */ + struct list all_conns; + struct ofconn *controller; + struct pvconn *listeners[MAX_MGMT]; + size_t n_listeners; +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +static uint64_t pick_datapath_id(struct dpif *); +static void send_packet_in_miss(struct ofpbuf *, void *ofproto); +static void send_packet_in_action(struct ofpbuf *, void *ofproto); +static void update_used(struct ofproto *); +static void expire_rule(struct cls_rule *, void *ofproto); +static void revalidate_subrule(struct cls_rule *, void *ofproto); + +static void handle_odp_msg(struct ofproto *, struct ofpbuf *); + +static void handle_openflow(struct ofconn *, struct ofproto *, + struct ofpbuf *); + +static void refresh_port_group(struct ofproto *, unsigned int group); +static void update_port(struct ofproto *, const char *devname); +static void init_ports(struct ofproto *); +static void reinit_ports(struct ofproto *); + +static uint16_t odp_port_to_ofp_port(uint16_t odp_port); +static uint16_t ofp_port_to_odp_port(uint16_t ofp_port); + +struct ofproto * +ofproto_create(const struct settings *settings) +{ + struct dpifmon *dpifmon; + struct rconn *controller; + struct ofproto *p; + struct dpif dpif; + int error; + size_t i; + + /* Connect to datapath and start listening for messages. */ + error = dpif_open(settings->dp_name, &dpif); + if (error) { + ofp_fatal(error, "Failed to open datapath %s", settings->dp_name); + } + error = dpif_set_listen_mask(&dpif, ODPL_MISS | ODPL_ACTION); + if (error) { + ofp_fatal(error, "failed to listen on dpif %d", dpif.minor); + } + + /* Start monitoring datapath ports for status changes. */ + error = dpifmon_create(&dpif, &dpifmon); + if (error) { + ofp_fatal(error, "failed to starting monitoring dpif %d", dpif.minor); + } + + /* Create controller connection. */ + controller = rconn_create(settings->probe_interval, settings->max_backoff); + if (settings->controller_name) { + error = rconn_connect(controller, settings->controller_name); + if (error == EAFNOSUPPORT) { + ofp_fatal(0, "No support for %s vconn", settings->controller_name); + } + } + + /* Initialize settings. */ + p = xcalloc(1, sizeof *p); + p->datapath_id = settings->datapath_id; + if (!p->datapath_id) { + p->datapath_id = pick_datapath_id(&dpif); + } + VLOG_INFO("using datapath ID %012"PRIx64, settings->datapath_id); + p->mfr_desc = settings->mfr_desc; + p->hw_desc = settings->hw_desc; + p->sw_desc = settings->sw_desc; + p->serial_desc = settings->serial_desc; + + /* Initialize datapath information. */ + p->dpif = dpif; + p->dpifmon = dpifmon; + port_array_init(&p->ports); + shash_init(&p->port_by_name); + + /* Initialize submodules. */ + p->switch_status = switch_status_create(settings); + switch_status_register_category(p->switch_status, "remote", + rconn_status_cb, controller); + if (settings->in_band) { + p->in_band = in_band_create(&dpif, p->switch_status, controller); + } + if (settings->discovery) { + p->discovery = discovery_create(settings->accept_controller_re, + settings->update_resolv_conf, &p->dpif, + p->switch_status); + } + if (settings->fail_mode == FAIL_OPEN) { + p->fail_open = fail_open_create(settings->probe_interval * 3, + p->switch_status, controller); + } + if (settings->rate_limit) { + p->miss_sched = pinsched_create(settings->rate_limit, + settings->burst_limit, + p->switch_status); + p->action_sched = pinsched_create(settings->rate_limit, + settings->burst_limit, NULL); + } + if (settings->command_acl[0]) { + p->executer = executer_create(settings->command_acl, + settings->command_dir); + } + if (settings->br_name) { + p->netflow = netflow_create(settings->br_name); + } + + /* Initialize flow table. */ + classifier_init(&p->cls); + p->need_revalidate = false; + p->next_expiration = time_msec() + 1000; + + /* Initialize OpenFlow connections. */ + list_init(&p->all_conns); + p->controller = ofconn_create(p, controller); + p->controller->pktbuf = pktbuf_create(); + p->controller->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + p->n_listeners = 0; + for (i = 0; i < settings->n_listeners; i++) { + const char *name = settings->listener_names[i]; + struct pvconn *pvconn; + + error = pvconn_open(name, &pvconn); + if (error && error != EAGAIN) { + ofp_fatal(error, "opening %s", name); + } + p->listeners[p->n_listeners++] = pvconn; + } + + /* Retrieve initial port status. */ + init_ports(p); + + return p; +} + +void +ofproto_reconfigure(struct ofproto *p) +{ + if (p->netflow) { + netflow_reconfigure(p->netflow); + } +} + +void +ofproto_run(struct ofproto *p) +{ + struct ofconn *ofconn, *next_ofconn; + char *devname; + int error; + int i; + + for (i = 0; i < 50; i++) { + struct ofpbuf *buf; + int error; + + error = dpif_recv(&p->dpif, &buf); + if (error) { + break; + } + + handle_odp_msg(p, buf); + } + + while ((error = dpifmon_poll(p->dpifmon, &devname)) != EAGAIN) { + if (error == ENOBUFS) { + reinit_ports(p); + } else if (!error) { + update_port(p, devname); + free(devname); + } + } + + if (p->in_band) { + in_band_run(p->in_band); + } + if (p->discovery) { + char *controller_name; + if (rconn_is_connectivity_questionable(p->controller->rconn)) { + discovery_question_connectivity(p->discovery); + } + if (discovery_run(p->discovery, &controller_name)) { + if (controller_name) { + rconn_connect(p->controller->rconn, controller_name); + } else { + rconn_disconnect(p->controller->rconn); + } + } + } + if (p->fail_open) { + fail_open_run(p->fail_open); + } + pinsched_run(p->miss_sched, send_packet_in_miss, p); + pinsched_run(p->action_sched, send_packet_in_action, p); + if (p->executer) { + executer_run(p->executer); + } + + LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node, + &p->all_conns) { + ofconn_run(ofconn, p); + } + + for (i = 0; i < p->n_listeners; i++) { + struct vconn *vconn; + int retval; + + retval = pvconn_accept(p->listeners[i], OFP_VERSION, &vconn); + if (!retval) { + ofconn_create(p, rconn_new_from_vconn("passive", vconn)); + } else if (retval != EAGAIN) { + VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval)); + } + } + + if (time_msec() >= p->next_expiration) { + p->next_expiration = time_msec() + 1000; + update_used(p); + classifier_for_each(&p->cls, expire_rule, p); + } + + if (p->need_revalidate) { + classifier_for_each_with_wildcards(&p->cls, 0, revalidate_subrule, p); + p->need_revalidate = false; + } +} + +void +ofproto_wait(struct ofproto *p) +{ + struct ofconn *ofconn; + size_t i; + + dpif_recv_wait(&p->dpif); + dpifmon_wait(p->dpifmon); + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + ofconn_wait(ofconn); + } + if (p->in_band) { + in_band_wait(p->in_band); + } + if (p->discovery) { + discovery_wait(p->discovery); + } + if (p->fail_open) { + fail_open_wait(p->fail_open); + } + pinsched_wait(p->miss_sched); + pinsched_wait(p->action_sched); + if (p->executer) { + executer_wait(p->executer); + } + if (p->need_revalidate) { + /* Shouldn't happen, but if it does just go around again. */ + VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()"); + poll_immediate_wake(); + } else if (p->next_expiration != LLONG_MAX) { + poll_timer_wait(p->next_expiration - time_msec()); + } + for (i = 0; i < p->n_listeners; i++) { + pvconn_wait(p->listeners[i]); + } +} + +bool +ofproto_is_alive(const struct ofproto *p) +{ + return p->discovery || rconn_is_alive(p->controller->rconn); +} + +int +ofproto_send_packet(struct ofproto *p, const flow_t *flow, + const union ofp_action *actions, size_t n_actions, + const struct ofpbuf *packet) +{ + struct odp_actions odp_actions; + int error; + + ofp_actions_to_odp_actions(odp_port_to_ofp_port(flow->in_port), + (const struct ofp_action_header *) actions, + n_actions, &odp_actions); + error = dpif_execute(&p->dpif, flow->in_port, odp_actions.actions, + odp_actions.n_actions, packet); + free_actions(&odp_actions); + return error; +} + +void +ofproto_setup_exact_flow(struct ofproto *p, const flow_t *flow, + const union ofp_action *actions, size_t n_actions, + const struct ofpbuf *packet) +{ + struct rule *rule, *displaced_rule; + struct odp_actions odp_actions; + struct odp_flow odp_flow; + + rule = xmalloc(rule_size(n_actions)); + cls_rule_from_flow(&rule->cr, flow, 0, UINT16_MAX); + rule->idle_timeout = 5; /* XXX */ + rule->hard_timeout = 0; /* XXX */ + rule->used = rule->created = time_msec(); + rule->packet_count = 0; + rule->byte_count = 0; + rule->tcp_flags = 0; + rule->ip_tos = 0; + rule->super = NULL; /* XXX */ + list_init(&rule->list); + rule->n_actions = n_actions; + memcpy(rule->actions, actions, n_actions * sizeof *rule->actions); + + displaced_rule = rule_from_cls_rule(classifier_insert(&p->cls, &rule->cr)); + if (displaced_rule) { + /* XXX */ + rule_destroy(displaced_rule); + } + + rule_make_actions(rule, &odp_actions); + if (packet) { + if (!ofproto_send_packet(p, flow, actions, n_actions, packet)) { + rule->byte_count = packet->size; + rule->packet_count++; + } + } + + memset(&odp_flow.stats, 0, sizeof odp_flow.stats); + odp_flow.key = *flow; + odp_flow.actions = odp_actions.actions; + odp_flow.n_actions = odp_actions.n_actions; + dpif_flow_add(&p->dpif, &odp_flow); + free_actions(&odp_actions); +} + +static void +reinit_ports(struct ofproto *p) +{ + struct svec devnames; + struct ofport *ofport; + unsigned int port_no; + struct odp_port *odp_ports; + size_t n_odp_ports; + size_t i; + + svec_init(&devnames); + PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) { + svec_add (&devnames, (char *) ofport->opp.name); + } + dpif_port_list(&p->dpif, &odp_ports, &n_odp_ports); + for (i = 0; i < n_odp_ports; i++) { + svec_add (&devnames, odp_ports[i].devname); + } + free(odp_ports); + + svec_sort_unique(&devnames); + for (i = 0; i < devnames.n; i++) { + update_port(p, devnames.names[i]); + } + svec_destroy(&devnames); +} + +static void +refresh_port_group(struct ofproto *p, unsigned int group) +{ + uint16_t *ports; + size_t n_ports; + struct ofport *port; + unsigned int port_no; + + assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD); + + ports = xmalloc(port_array_count(&p->ports) * sizeof *ports); + n_ports = 0; + PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { + if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { + ports[n_ports++] = port_no; + } + } + dpif_port_group_set(&p->dpif, group, ports, n_ports); + free(ports); +} + +static void +refresh_port_groups(struct ofproto *p) +{ + refresh_port_group(p, DP_GROUP_FLOOD); + refresh_port_group(p, DP_GROUP_ALL); +} + +static struct ofport * +make_ofport(const struct odp_port *odp_port) +{ + enum netdev_flags flags; + struct ofport *ofport; + struct netdev *netdev; + int error; + + error = netdev_open(odp_port->devname, NETDEV_ETH_TYPE_NONE, &netdev); + if (error) { + VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s " + "cannot be opened (%s)", + odp_port->devname, odp_port->port, + odp_port->devname, strerror(error)); + return NULL; + } + + ofport = xmalloc(sizeof *ofport); + ofport->netdev = netdev; + ofport->opp.port_no = odp_port_to_ofp_port(odp_port->port); + memcpy(ofport->opp.hw_addr, netdev_get_etheraddr(netdev), ETH_ALEN); + memcpy(ofport->opp.name, odp_port->devname, + MIN(sizeof ofport->opp.name, sizeof odp_port->devname)); + ofport->opp.name[sizeof ofport->opp.name - 1] = '\0'; + + netdev_get_flags(netdev, &flags); + ofport->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; + ofport->opp.state = flags & NETDEV_CARRIER ? 0 : OFPPS_LINK_DOWN; + + netdev_get_features(netdev, + &ofport->opp.curr, &ofport->opp.advertised, + &ofport->opp.supported, &ofport->opp.peer); + return ofport; +} + +static bool +ofport_conflicts(const struct ofproto *p, const struct odp_port *odp_port) +{ + if (port_array_get(&p->ports, odp_port->port)) { + VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath", + odp_port->port); + return true; + } else if (shash_find(&p->port_by_name, odp_port->devname)) { + VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath", + odp_port->devname); + return true; + } else { + return false; + } +} + +static int +ofport_equal(const struct ofport *a_, const struct ofport *b_) +{ + const struct ofp_phy_port *a = &a_->opp; + const struct ofp_phy_port *b = &b_->opp; + + BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */ + return (a->port_no == b->port_no + && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) + && !strcmp((char *) a->name, (char *) b->name) + && a->state == b->state + && a->config == b->config + && a->curr == b->curr + && a->advertised == b->advertised + && a->supported == b->supported + && a->peer == b->peer); +} + +static void +send_port_status(struct ofproto *p, const struct ofport *ofport, + uint8_t reason) +{ + struct ofconn *ofconn; + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + struct ofp_port_status *ops; + struct ofpbuf *b; + + ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &b); + ops->reason = reason; + ops->desc = ofport->opp; + hton_ofp_phy_port(&ops->desc); + queue_tx(b, ofconn); + } +} + +static void +ofport_install(struct ofproto *p, struct ofport *ofport) +{ + port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no), + ofport); + shash_add(&p->port_by_name, (char *) ofport->opp.name, ofport); +} + +static void +ofport_remove(struct ofproto *p, struct ofport *ofport) +{ + port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no), NULL); + shash_delete(&p->port_by_name, + shash_find(&p->port_by_name, (char *) ofport->opp.name)); +} + +static void +ofport_free(struct ofport *ofport) +{ + if (ofport) { + netdev_close(ofport->netdev); + free(ofport); + } +} + +static void +update_port(struct ofproto *p, const char *devname) +{ + struct odp_port odp_port; + struct ofport *ofport; + int error; + + ofport = shash_find_data(&p->port_by_name, devname); + error = dpif_port_query_by_name(&p->dpif, devname, &odp_port); + if (!error) { + if (!ofport) { + /* New port. */ + if (!ofport_conflicts(p, &odp_port)) { + ofport = make_ofport(&odp_port); + if (ofport) { + ofport_install(p, ofport); + send_port_status(p, ofport, OFPPR_ADD); + } + } + } else { + /* Modified port. */ + struct ofport *new_ofport = make_ofport(&odp_port); + if (!new_ofport) { + return; + } + + new_ofport->opp.config &= OFPPC_PORT_DOWN; + new_ofport->opp.config |= ofport->opp.config & ~OFPPC_PORT_DOWN; + if (ofport_equal(ofport, new_ofport)) { + /* False alarm--no change. */ + ofport_free(new_ofport); + } else { + ofport_remove(p, ofport); + ofport_install(p, new_ofport); + ofport_free(ofport); + send_port_status(p, new_ofport, OFPPR_MODIFY); + } + } + } else if (error == ENOENT || error == ENODEV) { + /* Deleted port. */ + if (ofport) { + send_port_status(p, ofport, OFPPR_DELETE); + ofport_remove(p, ofport); + ofport_free(ofport); + } + } else { + VLOG_WARN_RL(&rl, "dpif_port_query_by_name returned unexpected error " + "%s", strerror(error)); + return; + } + refresh_port_groups(p); +} + +static void +init_ports(struct ofproto *p) +{ + struct odp_port *ports; + size_t n_ports; + size_t i; + int error; + + error = dpif_port_list(&p->dpif, &ports, &n_ports); + if (error) { + ofp_fatal(error, "failed to list datapath ports"); + } + + for (i = 0; i < n_ports; i++) { + const struct odp_port *odp_port = &ports[i]; + if (!ofport_conflicts(p, odp_port)) { + struct ofport *ofport = make_ofport(odp_port); + if (ofport) { + ofport_install(p, ofport); + } + } + } + free(ports); + refresh_port_groups(p); +} + +static struct ofconn * +ofconn_create(struct ofproto *p, struct rconn *rconn) +{ + struct ofconn *ofconn = xmalloc(sizeof *ofconn); + list_push_back(&p->all_conns, &ofconn->node); + ofconn->rconn = rconn; + ofconn->pktbuf = NULL; + ofconn->send_flow_exp = false; + ofconn->miss_send_len = 0; + return ofconn; +} + +static void +ofconn_destroy(struct ofconn *ofconn, struct ofproto *p) +{ + if (p->executer) { + executer_rconn_closing(p->executer, ofconn->rconn); + } + + list_remove(&ofconn->node); + rconn_destroy(ofconn->rconn); + pktbuf_destroy(ofconn->pktbuf); + free(ofconn); +} + +static void +ofconn_run(struct ofconn *ofconn, struct ofproto *p) +{ + int iteration; + + rconn_run(ofconn->rconn); + + /* Limit the number of iterations to prevent other tasks from starving. */ + for (iteration = 0; iteration < 50; iteration++) { + struct ofpbuf *of_msg = rconn_recv(ofconn->rconn); + if (!of_msg) { + break; + } + handle_openflow(ofconn, p, of_msg); + ofpbuf_delete(of_msg); + } + + if (ofconn != p->controller && !rconn_is_alive(ofconn->rconn)) { + ofconn_destroy(ofconn, p); + } +} + +static void +ofconn_wait(struct ofconn *ofconn) +{ + rconn_run_wait(ofconn->rconn); + rconn_recv_wait(ofconn->rconn); +} + +static inline size_t +rule_size(int n_actions) +{ + return (offsetof(struct rule, actions) + + sizeof(union ofp_action) * n_actions); +} + +static struct rule * +rule_from_cls_rule(const struct cls_rule *cls_rule) +{ + return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL; +} + +static void +rule_destroy(struct rule *rule) +{ + if (!rule->super) { + struct rule *subrule; + LIST_FOR_EACH (subrule, struct rule, list, &rule->list) { + subrule->super = UNKNOWN_SUPER; + } + } else if (rule->super != UNKNOWN_SUPER) { + list_remove(&rule->list); + } + free(rule); +} + +static bool +rule_has_out_port(const struct rule *rule, uint16_t out_port) +{ + const union ofp_action *oa; + struct actions_iterator i; + + if (out_port == htons(OFPP_NONE)) { + return true; + } + for (oa = actions_first(&i, rule->actions, rule->n_actions); oa; + oa = actions_next(&i)) { + if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) { + return true; + } + } + return false; +} + +static void +rule_make_actions(const struct rule *rule, struct odp_actions *actions) +{ + const struct rule *super = rule->super ? rule->super : rule; + assert(!rule->cr.wc.wildcards); + ofp_actions_to_odp_actions(odp_port_to_ofp_port(rule->cr.flow.in_port), + (const struct ofp_action_header *) super->actions, + super->n_actions, actions); +} + +static void +queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn) +{ + update_openflow_length(msg); + if (rconn_send(ofconn->rconn, msg, NULL)) { /* XXX */ + ofpbuf_delete(msg); + } +} + +static void +send_error(const struct ofconn *ofconn, const struct ofp_header *oh, + int error, const void *data, size_t len) +{ + struct ofpbuf *buf; + struct ofp_error_msg *oem; + + if (!(error >> 16)) { + VLOG_WARN_RL(&rl, "not sending bad error code %d to controller", + error); + return; + } + + oem = make_openflow_xid(len + sizeof *oem, OFPT_ERROR, + oh ? oh->xid : 0, &buf); + oem->type = htons((unsigned int) error >> 16); + oem->code = htons(error & 0xffff); + memcpy(oem->data, data, len); + queue_tx(buf, ofconn); +} + +static void +send_error_oh(const struct ofconn *ofconn, const struct ofp_header *oh, + int error) +{ + size_t oh_length = ntohs(oh->length); + send_error(ofconn, oh, error, oh, MIN(oh_length, 64)); +} + +static void +hton_ofp_phy_port(struct ofp_phy_port *opp) +{ + opp->port_no = htons(opp->port_no); + opp->config = htonl(opp->config); + opp->state = htonl(opp->state); + opp->curr = htonl(opp->curr); + opp->advertised = htonl(opp->advertised); + opp->supported = htonl(opp->supported); + opp->peer = htonl(opp->peer); +} + +static int +handle_features_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) +{ + struct ofp_switch_features *osf; + struct ofpbuf *buf; + unsigned int port_no; + struct ofport *port; + + osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf); + osf->datapath_id = htonll(p->datapath_id); + osf->n_buffers = htonl(pktbuf_capacity()); + osf->n_tables = 2; + osf->capabilities = htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS | + OFPC_PORT_STATS | OFPC_MULTI_PHY_TX); + osf->actions = htonl((1u << OFPAT_OUTPUT) | + (1u << OFPAT_SET_VLAN_VID) | + (1u << OFPAT_SET_VLAN_PCP) | + (1u << OFPAT_STRIP_VLAN) | + (1u << OFPAT_SET_DL_SRC) | + (1u << OFPAT_SET_DL_DST) | + (1u << OFPAT_SET_NW_SRC) | + (1u << OFPAT_SET_NW_DST) | + (1u << OFPAT_SET_TP_SRC) | + (1u << OFPAT_SET_TP_DST)); + + PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { + hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); + } + + queue_tx(buf, ofconn); + return 0; +} + +static int +handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) +{ + struct ofpbuf *buf; + struct ofp_switch_config *osc; + uint16_t flags; + bool drop_frags; + + /* Figure out flags. */ + dpif_get_drop_frags(&p->dpif, &drop_frags); + flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL; + if (ofconn->send_flow_exp) { + flags |= OFPC_SEND_FLOW_EXP; + } + + /* Send reply. */ + osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf); + osc->flags = htons(flags); + osc->miss_send_len = htons(ofconn->miss_send_len); + queue_tx(buf, ofconn); + + return 0; +} + +static int +handle_set_config(struct ofproto *p, struct ofconn *ofconn, + struct ofp_switch_config *osc) +{ + uint16_t flags; + int error; + + error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc); + if (error) { + return error; + } + flags = ntohs(osc->flags); + + ofconn->send_flow_exp = (flags & OFPC_SEND_FLOW_EXP) != 0; + + switch (flags & OFPC_FRAG_MASK) { + case OFPC_FRAG_NORMAL: + dpif_set_drop_frags(&p->dpif, false); + break; + case OFPC_FRAG_DROP: + dpif_set_drop_frags(&p->dpif, true); + break; + default: + VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")", + osc->flags); + break; + } + + if ((ntohs(osc->miss_send_len) != 0) != (ofconn->miss_send_len != 0)) { + if (ntohs(osc->miss_send_len) != 0) { + ofconn->pktbuf = pktbuf_create(); + } else { + pktbuf_destroy(ofconn->pktbuf); + } + } + + ofconn->miss_send_len = ntohs(osc->miss_send_len); + + return 0; +} + +static uint16_t +ofp_port_to_odp_port(uint16_t ofp_port) +{ + switch (ofp_port) { + case OFPP_LOCAL: + return ODPP_LOCAL; + case OFPP_NONE: + return ODPP_NONE; + default: + return ofp_port; + } +} + +static uint16_t +odp_port_to_ofp_port(uint16_t odp_port) +{ + switch (odp_port) { + case ODPP_LOCAL: + return OFPP_LOCAL; + case ODPP_NONE: + return OFPP_NONE; + default: + return odp_port; + } +} + +static void +init_actions(struct odp_actions *actions) +{ + actions->actions = NULL; + actions->n_actions = 0; + actions->allocated_actions = 0; +} + +static void +free_actions(struct odp_actions *actions) +{ + free(actions->actions); +} + +static union odp_action * +add_action(struct odp_actions *actions, uint16_t type) +{ + union odp_action *a; + if (actions->n_actions >= actions->allocated_actions) { + actions->actions = x2nrealloc(actions->actions, + &actions->allocated_actions, + sizeof *actions->actions); + } + a = &actions->actions[actions->n_actions++]; + memset(a, 0, sizeof *a); + a->type = type; + return a; +} + +static void +add_output_action(struct odp_actions *actions, uint16_t port) +{ + add_action(actions, ODPAT_OUTPUT)->output.port = port; +} + +static void +add_output_group_action(struct odp_actions *actions, uint16_t group) +{ + add_action(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group; +} + +static void +add_controller_action(struct odp_actions *actions, + const struct ofp_action_output *oao) +{ + union odp_action *a = add_action(actions, ODPAT_CONTROLLER); + a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX; +} + +static int +ofp_to_odp_action_output(struct odp_actions *actions, uint16_t ofp_in_port, + const struct ofp_action_output *oao) +{ + switch (ntohs(oao->port)) { + case OFPP_IN_PORT: + add_output_action(actions, ofp_port_to_odp_port(ofp_in_port)); + break; + case OFPP_TABLE: + /* XXX not implemented */ + break; + case OFPP_NORMAL: + add_output_group_action(actions, DP_GROUP_FLOOD); /* XXX */ + break; + case OFPP_FLOOD: + add_output_group_action(actions, DP_GROUP_FLOOD); + break; + case OFPP_ALL: + add_output_group_action(actions, DP_GROUP_ALL); + break; + case OFPP_CONTROLLER: + add_controller_action(actions, oao); + break; + case OFPP_LOCAL: + add_output_action(actions, ODPP_LOCAL); + break; + default: + if (ntohs(oao->port) != ofp_in_port) { + add_output_action(actions, ofp_port_to_odp_port(ntohs(oao->port))); + } + break; + } + return 0; +} + +static void +ofp_actions_to_odp_actions(uint16_t ofp_in_port, + const struct ofp_action_header *in_, size_t n_in, + struct odp_actions *out) +{ + union ofp_action *in = (union ofp_action *) in_; + struct actions_iterator iter; + const union ofp_action *a; + + init_actions(out); + for (a = actions_first(&iter, in, n_in); a; a = actions_next(&iter)) { + uint16_t type = ntohs(a->type); + union odp_action *oa; + + switch (type) { + case OFPAT_OUTPUT: + ofp_to_odp_action_output(out, ofp_in_port, &a->output); + break; + + case OFPAT_SET_VLAN_VID: + oa = add_action(out, ODPAT_SET_VLAN_VID); + oa->vlan_vid.vlan_vid = a->vlan_vid.vlan_vid; + break; + + case OFPAT_SET_VLAN_PCP: + oa = add_action(out, ODPAT_SET_VLAN_PCP); + oa->vlan_pcp.vlan_pcp = a->vlan_pcp.vlan_pcp; + break; + + case OFPAT_STRIP_VLAN: + add_action(out, ODPAT_STRIP_VLAN); + break; + + case OFPAT_SET_DL_SRC: + oa = add_action(out, ODPAT_SET_DL_SRC); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_DL_DST: + oa = add_action(out, ODPAT_SET_DL_DST); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_NW_SRC: + oa = add_action(out, ODPAT_SET_NW_SRC); + oa->nw_addr.nw_addr = a->nw_addr.nw_addr; + break; + + case OFPAT_SET_TP_SRC: + oa = add_action(out, ODPAT_SET_TP_SRC); + oa->tp_port.tp_port = a->tp_port.tp_port; + break; + + case OFPAT_VENDOR: + if (a->vendor.vendor == htonl(NX_VENDOR_ID)) { + const struct nx_action_snat *nas = + (const struct nx_action_snat *) a; + if (nas->subtype == htons(NXAST_SNAT)) { + oa = add_action(out, ODPAT_SNAT); + oa->snat.port = ntohs(nas->port); + } + } + break; + + default: + VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type); + break; + } + } +} + +static int +handle_packet_out(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) +{ + struct ofp_packet_out *opo; + struct ofpbuf payload, *buffer; + struct odp_actions actions; + int n_actions; + uint16_t in_port; + int error; + + error = check_ofp_packet_out(oh, &payload, &n_actions); + if (error) { + return error; + } + opo = (struct ofp_packet_out *) oh; + + if (opo->buffer_id != htonl(UINT32_MAX)) { + error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id), + &buffer, &in_port); + if (error) { + return error; + } + payload = *buffer; + } else { + buffer = NULL; + } + + in_port = ofp_port_to_odp_port(ntohs(opo->in_port)); + ofp_actions_to_odp_actions(ntohs(opo->in_port), opo->actions, + n_actions, &actions); + dpif_execute(&p->dpif, in_port, actions.actions, actions.n_actions, + &payload); + free_actions(&actions); + ofpbuf_delete(buffer); + + return 0; +} + +static void +update_port_config(struct ofproto *p, struct ofport *port, + uint32_t config, uint32_t mask) +{ + mask &= config ^ port->opp.config; + if (mask & OFPPC_PORT_DOWN) { + if (config & OFPPC_PORT_DOWN) { + netdev_turn_flags_off(port->netdev, NETDEV_UP, true); + } else { + netdev_turn_flags_on(port->netdev, NETDEV_UP, true); + } + } + if (mask & OFPPC_NO_STP) { + /* XXX */ + } + if (mask & OFPPC_NO_RECV) { + /* XXX */ + } + if (mask & OFPPC_NO_RECV_STP) { + /* XXX */ + } + if (mask & OFPPC_NO_FLOOD) { + port->opp.config ^= OFPPC_NO_FLOOD; + refresh_port_group(p, DP_GROUP_FLOOD); + } + if (mask & OFPPC_NO_FWD) { + /* XXX */ + } + if (mask & OFPPC_NO_PACKET_IN) { + port->opp.config ^= OFPPC_NO_PACKET_IN; + } +} + +static int +handle_port_mod(struct ofproto *p, struct ofp_header *oh) +{ + const struct ofp_port_mod *opm; + struct ofport *port; + int error; + + error = check_ofp_message(oh, OFPT_PORT_MOD, sizeof *opm); + if (error) { + return error; + } + opm = (struct ofp_port_mod *) oh; + + port = port_array_get(&p->ports, + ofp_port_to_odp_port(ntohs(opm->port_no))); + if (!port) { + return ofp_mkerr(OFPET_PORT_MOD_FAILED, OFPPMFC_BAD_PORT); + } else if (memcmp(port->opp.hw_addr, opm->hw_addr, OFP_ETH_ALEN)) { + return ofp_mkerr(OFPET_PORT_MOD_FAILED, OFPPMFC_BAD_HW_ADDR); + } else { + update_port_config(p, port, ntohl(opm->config), ntohl(opm->mask)); + if (opm->advertise) { + netdev_set_advertisements(port->netdev, ntohl(opm->advertise)); + } + } + return 0; +} + +static struct ofpbuf * +make_stats_reply(uint32_t xid, uint16_t type, size_t body_len) +{ + struct ofp_stats_reply *osr; + struct ofpbuf *msg; + + msg = ofpbuf_new(MIN(sizeof *osr + body_len, UINT16_MAX)); + osr = put_openflow_xid(sizeof *osr, OFPT_STATS_REPLY, xid, msg); + osr->type = type; + osr->flags = htons(0); + return msg; +} + +static struct ofpbuf * +start_stats_reply(const struct ofp_stats_request *request, size_t body_len) +{ + return make_stats_reply(request->header.xid, request->type, body_len); +} + +static void * +append_stats_reply(size_t nbytes, struct ofconn *ofconn, struct ofpbuf **msgp) +{ + struct ofpbuf *msg = *msgp; + assert(nbytes <= UINT16_MAX - sizeof(struct ofp_stats_reply)); + if (nbytes + msg->size > UINT16_MAX) { + struct ofp_stats_reply *reply = msg->data; + reply->flags = htons(OFPSF_REPLY_MORE); + *msgp = make_stats_reply(reply->header.xid, reply->type, nbytes); + queue_tx(msg, ofconn); + } + return ofpbuf_put_uninit(*msgp, nbytes); +} + +static int +handle_desc_stats_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_stats_request *request) +{ + struct ofp_desc_stats *ods; + struct ofpbuf *msg; + + msg = start_stats_reply(request, sizeof *ods); + ods = append_stats_reply(sizeof *ods, ofconn, &msg); + strncpy(ods->mfr_desc, p->mfr_desc, sizeof ods->mfr_desc); + strncpy(ods->hw_desc, p->hw_desc, sizeof ods->hw_desc); + strncpy(ods->sw_desc, p->sw_desc, sizeof ods->sw_desc); + strncpy(ods->serial_num, p->serial_desc, sizeof ods->serial_num); + queue_tx(msg, ofconn); + + return 0; +} + +static void +count_subrules(struct cls_rule *cls_rule, void *n_subrules_) +{ + struct rule *rule = rule_from_cls_rule(cls_rule); + int *n_subrules = n_subrules_; + + if (rule->super) { + (*n_subrules)++; + } +} + +static int +handle_table_stats_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_stats_request *request) +{ + struct ofp_table_stats *ots; + struct ofpbuf *msg; + struct odp_stats dpstats; + int n_exact, n_subrules, n_wild; + + msg = start_stats_reply(request, sizeof *ots * 2); + + /* Count rules of various kinds. */ + n_subrules = 0; + classifier_for_each_with_wildcards(&p->cls, 0, + count_subrules, &n_subrules); + n_exact = classifier_count_exact(&p->cls) - n_subrules; + n_wild = classifier_count(&p->cls) - classifier_count_exact(&p->cls); + + /* Hash table. */ + dpif_get_dp_stats(&p->dpif, &dpstats); + ots = append_stats_reply(sizeof *ots, ofconn, &msg); + memset(ots, 0, sizeof *ots); + ots->table_id = TABLEID_HASH; + strcpy(ots->name, "hash"); + ots->wildcards = htonl(0); + ots->max_entries = htonl(dpstats.max_capacity); + ots->active_count = htonl(n_exact); + ots->lookup_count = htonll(dpstats.n_frags + dpstats.n_hit + + dpstats.n_missed); + ots->matched_count = htonll(dpstats.n_hit); /* XXX */ + + /* Classifier table. */ + ots = append_stats_reply(sizeof *ots, ofconn, &msg); + memset(ots, 0, sizeof *ots); + ots->table_id = TABLEID_CLASSIFIER; + strcpy(ots->name, "classifier"); + ots->wildcards = htonl(OFPFW_ALL); + ots->max_entries = htonl(65536); + ots->active_count = htonl(n_wild); + ots->lookup_count = htonll(0); /* XXX */ + ots->matched_count = htonll(0); /* XXX */ + + queue_tx(msg, ofconn); + return 0; +} + +static int +handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_stats_request *request) +{ + struct ofp_port_stats *ops; + struct ofpbuf *msg; + struct ofport *port; + unsigned int port_no; + + msg = start_stats_reply(request, sizeof *ops * 16); + PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { + struct netdev_stats stats; + + /* Intentionally ignore return value, since errors will set 'stats' to + * all-1s, which is correct for OpenFlow, and netdev_get_stats() will + * log errors. */ + netdev_get_stats(port->netdev, &stats); + + ops = append_stats_reply(sizeof *ops, ofconn, &msg); + ops->port_no = htons(odp_port_to_ofp_port(port_no)); + memset(ops->pad, 0, sizeof ops->pad); + ops->rx_packets = htonll(stats.rx_packets); + ops->tx_packets = htonll(stats.tx_packets); + ops->rx_bytes = htonll(stats.rx_bytes); + ops->tx_bytes = htonll(stats.tx_bytes); + ops->rx_dropped = htonll(stats.rx_dropped); + ops->tx_dropped = htonll(stats.tx_dropped); + ops->rx_errors = htonll(stats.rx_errors); + ops->tx_errors = htonll(stats.tx_errors); + ops->rx_frame_err = htonll(stats.rx_frame_errors); + ops->rx_over_err = htonll(stats.rx_over_errors); + ops->rx_crc_err = htonll(stats.rx_crc_errors); + ops->collisions = htonll(stats.collisions); + } + + queue_tx(msg, ofconn); + return 0; +} + +struct flow_stats_cbdata { + struct ofproto *ofproto; + struct ofconn *ofconn; + uint16_t out_port; + struct ofpbuf *msg; +}; + +static void +query_stats(struct ofproto *p, struct rule *rule, + uint64_t *packet_countp, uint64_t *byte_countp) +{ + uint64_t packet_count, byte_count; + struct rule *subrule; + struct odp_flow *odp_flows; + size_t n_odp_flows; + + n_odp_flows = rule->cr.wc.wildcards ? list_size(&rule->list) : 1; + odp_flows = xcalloc(1, n_odp_flows * sizeof *odp_flows); + if (rule->cr.wc.wildcards) { + size_t i = 0; + LIST_FOR_EACH (subrule, struct rule, list, &rule->list) { + odp_flows[i++].key = subrule->cr.flow; + } + } else { + odp_flows[0].key = rule->cr.flow; + } + + packet_count = rule->packet_count; + byte_count = rule->byte_count; + if (!dpif_flow_query_multiple(&p->dpif, odp_flows, n_odp_flows)) { + size_t i; + for (i = 0; i < n_odp_flows; i++) { + struct odp_flow *odp_flow = &odp_flows[i]; + packet_count += odp_flow->stats.n_packets; + byte_count += odp_flow->stats.n_bytes; + } + } + free(odp_flows); + + *packet_countp = packet_count; + *byte_countp = byte_count; +} + +static void +flow_stats_cb(struct cls_rule *rule_, void *cbdata_) +{ + struct rule *rule = rule_from_cls_rule(rule_); + struct flow_stats_cbdata *cbdata = cbdata_; + struct ofp_flow_stats *ofs; + uint64_t packet_count, byte_count; + size_t act_len, len; + + if (rule->super || !rule_has_out_port(rule, cbdata->out_port)) { + return; + } + + act_len = sizeof *rule->actions * rule->n_actions; + len = offsetof(struct ofp_flow_stats, actions) + act_len; + + query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); + + ofs = append_stats_reply(len, cbdata->ofconn, &cbdata->msg); + ofs->length = htons(len); + ofs->table_id = rule->cr.wc.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; + ofs->pad = 0; + flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &ofs->match); + ofs->duration = htonl((time_msec() - rule->created) / 1000); + ofs->priority = htons(rule->cr.priority); + ofs->idle_timeout = htons(rule->idle_timeout); + ofs->hard_timeout = htons(rule->hard_timeout); + memset(ofs->pad2, 0, sizeof ofs->pad2); + ofs->packet_count = htonll(packet_count); + ofs->byte_count = htonll(byte_count); + memcpy(ofs->actions, rule->actions, act_len); +} + +static int +table_id_to_include(uint8_t table_id) +{ + return (table_id == TABLEID_HASH ? CLS_INC_EXACT + : table_id == TABLEID_CLASSIFIER ? CLS_INC_WILD + : CLS_INC_EXACT | CLS_INC_WILD); +} + +static int +handle_flow_stats_request(struct ofproto *p, struct ofconn *ofconn, + const struct ofp_stats_request *osr, + size_t arg_size) +{ + struct ofp_flow_stats_request *fsr; + struct flow_stats_cbdata cbdata; + struct cls_rule target; + + if (arg_size != sizeof *fsr) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); + } + fsr = (struct ofp_flow_stats_request *) osr->body; + + cbdata.ofproto = p; + cbdata.ofconn = ofconn; + cbdata.out_port = fsr->out_port; + cbdata.msg = start_stats_reply(osr, 1024); + cls_rule_from_match(&target, &fsr->match, 0); + classifier_for_each_match(&p->cls, &target, + table_id_to_include(fsr->table_id), + flow_stats_cb, &cbdata); + queue_tx(cbdata.msg, ofconn); + return 0; +} + +struct aggregate_stats_cbdata { + struct ofproto *ofproto; + uint16_t out_port; + uint64_t packet_count; + uint64_t byte_count; + uint32_t n_flows; +}; + +static void +aggregate_stats_cb(struct cls_rule *rule_, void *cbdata_) +{ + struct rule *rule = rule_from_cls_rule(rule_); + struct aggregate_stats_cbdata *cbdata = cbdata_; + uint64_t packet_count, byte_count; + + if (rule->super || !rule_has_out_port(rule, cbdata->out_port)) { + return; + } + + query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); + + cbdata->packet_count += packet_count; + cbdata->byte_count += byte_count; + cbdata->n_flows++; +} + +static int +handle_aggregate_stats_request(struct ofproto *p, struct ofconn *ofconn, + const struct ofp_stats_request *osr, + size_t arg_size) +{ + struct ofp_aggregate_stats_request *asr; + struct ofp_aggregate_stats_reply *reply; + struct aggregate_stats_cbdata cbdata; + struct cls_rule target; + struct ofpbuf *msg; + + if (arg_size != sizeof *asr) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); + } + asr = (struct ofp_aggregate_stats_request *) osr->body; + + cbdata.ofproto = p; + cbdata.out_port = asr->out_port; + cbdata.packet_count = 0; + cbdata.byte_count = 0; + cbdata.n_flows = 0; + cls_rule_from_match(&target, &asr->match, 0); + classifier_for_each_match(&p->cls, &target, + table_id_to_include(asr->table_id), + aggregate_stats_cb, &cbdata); + + msg = start_stats_reply(osr, sizeof *reply); + reply = append_stats_reply(sizeof *reply, ofconn, &msg); + reply->flow_count = htonl(cbdata.n_flows); + reply->packet_count = htonll(cbdata.packet_count); + reply->byte_count = htonll(cbdata.byte_count); + queue_tx(msg, ofconn); + return 0; +} + +static int +handle_stats_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) +{ + struct ofp_stats_request *osr; + size_t arg_size; + int error; + + error = check_ofp_message_array(oh, OFPT_STATS_REQUEST, sizeof *osr, + 1, &arg_size); + if (error) { + return error; + } + osr = (struct ofp_stats_request *) oh; + + switch (ntohs(osr->type)) { + case OFPST_DESC: + return handle_desc_stats_request(p, ofconn, osr); + + case OFPST_FLOW: + return handle_flow_stats_request(p, ofconn, osr, arg_size); + + case OFPST_AGGREGATE: + return handle_aggregate_stats_request(p, ofconn, osr, arg_size); + + case OFPST_TABLE: + return handle_table_stats_request(p, ofconn, osr); + + case OFPST_PORT: + return handle_port_stats_request(p, ofconn, osr); + + case OFPST_VENDOR: + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR); + + default: + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_STAT); + } +} + +static long long int +msec_from_nsec(uint64_t sec, uint32_t nsec) +{ + return !sec ? 0 : sec * 1000 + nsec / 1000000; +} + +static void +update_time(struct rule *rule, const struct odp_flow_stats *stats) +{ + long long int used = msec_from_nsec(stats->used_sec, stats->used_nsec); + if (used > rule->used) { + rule->used = used; + } +} + +static void +update_stats(struct rule *rule, const struct odp_flow_stats *stats) +{ + update_time(rule, stats); + rule->packet_count += stats->n_packets; + rule->byte_count += stats->n_bytes; + rule->tcp_flags |= stats->tcp_flags; + rule->ip_tos = stats->ip_tos; +} + +static int +send_buffered(struct ofproto *p, struct ofconn *ofconn, + struct ofp_flow_mod *ofm, size_t n_actions, + int *byte_count) +{ + struct odp_actions actions; + struct ofpbuf *packet; + uint16_t in_port; + int error; + + *byte_count = 0; + if (ofm->buffer_id == htonl(UINT32_MAX)) { + return 0; + } else if (!ofconn->pktbuf) { + VLOG_WARN_RL(&rl, "attempt to send buffered packet via connection " + "without buffers"); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE); + } + + error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id), + &packet, &in_port); + if (error) { + return error; + } + + ofp_actions_to_odp_actions(in_port, ofm->actions, n_actions, &actions); + if (!dpif_execute(&p->dpif, ofp_port_to_odp_port(in_port), + actions.actions, actions.n_actions, packet)) { + *byte_count = packet->size; + } + free_actions(&actions); + ofpbuf_delete(packet); + + return 0; +} + +static int +add_flow(struct ofproto *p, struct ofconn *ofconn, + struct ofp_flow_mod *ofm, size_t n_actions) +{ + struct rule *rule, *displaced_rule; + int byte_count; + int buffer_error = 0; + + buffer_error = send_buffered(p, ofconn, ofm, n_actions, &byte_count); + + rule = xmalloc(rule_size(n_actions)); + cls_rule_from_match(&rule->cr, &ofm->match, ntohs(ofm->priority)); + rule->idle_timeout = ntohs(ofm->idle_timeout); + rule->hard_timeout = ntohs(ofm->hard_timeout); + rule->used = rule->created = time_msec(); + rule->packet_count = byte_count > 0; + rule->byte_count = byte_count; + rule->tcp_flags = 0; + rule->ip_tos = 0; + rule->super = NULL; + list_init(&rule->list); + rule->n_actions = n_actions; + memcpy(rule->actions, ofm->actions, n_actions * sizeof *rule->actions); + + displaced_rule = rule_from_cls_rule(classifier_insert(&p->cls, &rule->cr)); + if (rule->cr.wc.wildcards) { + if (displaced_rule) { + /* The displaced rule matches exactly the same packets as the new + * rule, and it has exactly the same priority, so we can transfer + * all displaced_rule's subrules to the new rule. The subrule + * actions might have changed, so we have to update the datapath + * flows, which also has the convenient side effect of zeroing the + * counters for those flows. */ + struct rule *subrule; + + list_splice(&rule->list, displaced_rule->list.next, + &displaced_rule->list); + LIST_FOR_EACH (subrule, struct rule, list, &rule->list) { + struct odp_actions actions; + struct odp_flow odp_flow; + + subrule->super = rule; + ofp_actions_to_odp_actions( + odp_port_to_ofp_port(subrule->cr.flow.in_port), + (const struct ofp_action_header *) rule->actions, + rule->n_actions, &actions); + odp_flow.key = subrule->cr.flow; + odp_flow.actions = actions.actions; + odp_flow.n_actions = actions.n_actions; + dpif_flow_add(&p->dpif, &odp_flow); + free_actions(&actions); + } + rule_destroy(displaced_rule); + } + } else { + struct odp_flow odp_flow; + struct odp_actions actions; + + ofp_actions_to_odp_actions(ntohs(ofm->match.in_port), + ofm->actions, n_actions, &actions); + + odp_flow.key = rule->cr.flow; + odp_flow.actions = actions.actions; + odp_flow.n_actions = actions.n_actions; + dpif_flow_add(&p->dpif, &odp_flow); + if (displaced_rule) { + if (displaced_rule->super && + displaced_rule->super != UNKNOWN_SUPER) { + update_stats(displaced_rule->super, &odp_flow.stats); + } + rule_destroy(displaced_rule); + } + } + return buffer_error; +} + +static int +modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm, + size_t n_actions, uint16_t command, struct rule *rule) +{ + if (rule->super) { + /* Subrules are invisible to the controller. */ + return 0; + } + + if (command == OFPFC_DELETE) { + if (!rule->cr.wc.wildcards) { + struct odp_flow odp_flow; + flow_from_match(&odp_flow.key, NULL, &ofm->match); + dpif_flow_del(&p->dpif, &odp_flow); + } + classifier_remove(&p->cls, &rule->cr); + rule_destroy(rule); + } else { + struct rule *old_rule = rule; + if (!rule->cr.wc.wildcards) { + struct odp_flow odp_flow; + struct odp_actions actions; + + ofp_actions_to_odp_actions(rule->cr.flow.in_port, + ofm->actions, n_actions, &actions); + odp_flow.key = rule->cr.flow; + odp_flow.actions = actions.actions; + odp_flow.n_actions = actions.n_actions; + dpif_flow_add(&p->dpif, &odp_flow); + + update_stats(rule, &odp_flow.stats); + } + rule = xrealloc(rule, rule_size(n_actions)); + memcpy(rule->actions, ofm->actions, n_actions * sizeof *rule->actions); + cls_rule_moved(&p->cls, &old_rule->cr, &rule->cr); + } + + if (rule->cr.wc.wildcards) { + p->need_revalidate = true; + } + return 0; +} + +static int +modify_flows_strict(struct ofproto *p, const struct ofp_flow_mod *ofm, + size_t n_actions, uint16_t command) +{ + struct rule *rule; + uint32_t wildcards; + flow_t flow; + + flow_from_match(&flow, &wildcards, &ofm->match); + rule = rule_from_cls_rule(classifier_find_rule_exactly( + &p->cls, &flow, wildcards, + ntohs(ofm->priority))); + + if (rule) { + if (command == OFPFC_DELETE + && ofm->out_port != htons(OFPP_NONE) + && !rule_has_out_port(rule, ofm->out_port)) { + return 0; + } + + modify_flow(p, ofm, n_actions, command, rule); + } + return 0; +} + +struct modify_flows_cbdata { + struct ofproto *ofproto; + const struct ofp_flow_mod *ofm; + uint16_t out_port; + size_t n_actions; + uint16_t command; +}; + +static void +modify_flows_cb(struct cls_rule *rule_, void *cbdata_) +{ + struct rule *rule = rule_from_cls_rule(rule_); + struct modify_flows_cbdata *cbdata = cbdata_; + + modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions, + cbdata->command, rule); +} + +static int +modify_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm, + size_t n_actions, uint16_t command) +{ + struct modify_flows_cbdata cbdata; + struct cls_rule target; + + cbdata.ofproto = p; + cbdata.ofm = ofm; + cbdata.out_port = (command == OFPFC_DELETE ? ofm->out_port + : htons(OFPP_NONE)); + cbdata.n_actions = n_actions; + cbdata.command = command; + + cls_rule_from_match(&target, &ofm->match, 0); + classifier_for_each_match(&p->cls, &target, CLS_INC_WILD | CLS_INC_EXACT, + modify_flows_cb, &cbdata); + return 0; +} + +static int +handle_flow_mod(struct ofproto *p, struct ofconn *ofconn, + struct ofp_flow_mod *ofm) +{ + size_t n_actions; + int error; + + error = check_ofp_message_array(&ofm->header, OFPT_FLOW_MOD, sizeof *ofm, + sizeof *ofm->actions, &n_actions); + if (error) { + return error; + } + + normalize_match(&ofm->match); + if (!ofm->match.wildcards) { + ofm->priority = htons(UINT16_MAX); + } + + error = validate_actions((const union ofp_action *) ofm->actions, + n_actions); + if (error) { + return error; + } + + switch (ntohs(ofm->command)) { + case OFPFC_ADD: + return add_flow(p, ofconn, ofm, n_actions); + + case OFPFC_MODIFY: + return modify_flows_loose(p, ofm, n_actions, OFPFC_MODIFY); + + case OFPFC_MODIFY_STRICT: + return modify_flows_strict(p, ofm, n_actions, OFPFC_MODIFY); + + case OFPFC_DELETE: + return modify_flows_loose(p, ofm, n_actions, OFPFC_DELETE); + + case OFPFC_DELETE_STRICT: + return modify_flows_strict(p, ofm, n_actions, OFPFC_DELETE); + + default: + return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_BAD_COMMAND); + } +} + +static int +handle_vendor(struct ofproto *p, struct ofconn *ofconn, void *msg) +{ + struct ofp_vendor_header *ovh = msg; + struct nicira_header *nh; + + if (ntohs(ovh->header.length) < sizeof(struct ofp_vendor_header)) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); + } + if (ovh->vendor != htonl(NX_VENDOR_ID)) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR); + } + if (ntohs(ovh->header.length) < sizeof(struct nicira_header)) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); + } + + nh = msg; + switch (ntohl(nh->subtype)) { + case NXT_STATUS_REQUEST: + return switch_status_handle_request(p->switch_status, ofconn->rconn, + msg); + + case NXT_ACT_SET_CONFIG: + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); /* XXX */ + + case NXT_ACT_GET_CONFIG: + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); /* XXX */ + + case NXT_COMMAND_REQUEST: + if (p->executer) { + return executer_handle_request(p->executer, ofconn->rconn, msg); + } + break; + } + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); +} + +static void +handle_openflow(struct ofconn *ofconn, struct ofproto *p, + struct ofpbuf *ofp_msg) +{ + struct ofp_header *oh = ofp_msg->data; + int error; + + switch (oh->type) { + case OFPT_ECHO_REPLY: + error = 0; + break; + + case OFPT_FEATURES_REQUEST: + error = handle_features_request(p, ofconn, oh); + break; + + case OFPT_GET_CONFIG_REQUEST: + error = handle_get_config_request(p, ofconn, oh); + break; + + case OFPT_SET_CONFIG: + error = handle_set_config(p, ofconn, ofp_msg->data); + break; + + case OFPT_PACKET_OUT: + error = handle_packet_out(p, ofconn, ofp_msg->data); + break; + + case OFPT_PORT_MOD: + error = handle_port_mod(p, oh); + break; + + case OFPT_FLOW_MOD: + error = handle_flow_mod(p, ofconn, ofp_msg->data); + break; + + case OFPT_STATS_REQUEST: + error = handle_stats_request(p, ofconn, oh); + break; + + case OFPT_VENDOR: + error = handle_vendor(p, ofconn, ofp_msg->data); + break; + + default: + if (VLOG_IS_WARN_ENABLED()) { + char *s = ofp_to_string(oh, ntohs(oh->length), 2); + VLOG_DBG_RL(&rl, "OpenFlow message ignored: %s", s); + free(s); + } + error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE); + break; + } + + if (error) { + send_error_oh(ofconn, ofp_msg->data, error); + } +} + +static void +handle_odp_msg(struct ofproto *p, struct ofpbuf *packet) +{ + struct odp_msg *msg = packet->data; + uint16_t in_port = odp_port_to_ofp_port(msg->port); + struct rule *rule, *subrule; + struct odp_actions actions; + struct ofpbuf payload; + flow_t flow; + + /* Handle controller actions. */ + if (msg->type == _ODPL_ACTION_NR) { + pinsched_send(p->action_sched, in_port, packet, + send_packet_in_action, p); + return; + } + + payload.data = msg + 1; + payload.size = msg->length - sizeof *msg; + flow_extract(&payload, msg->port, &flow); + + rule = rule_from_cls_rule(classifier_lookup(&p->cls, &flow)); + if (!rule) { + struct ofport *port; + + if (p->in_band && in_band_handle_flow_miss(p->in_band, p, msg->port, + &flow, &payload)) { + ofpbuf_delete(packet); + return; + } + + if (p->fail_open && fail_open_handle_flow_miss(p->fail_open, p, + msg->port, &flow, + &payload)) { + ofpbuf_delete(packet); + return; + } + + /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */ + port = port_array_get(&p->ports, msg->port); + if (port) { + if (port->opp.config & OFPPC_NO_PACKET_IN) { + /* XXX install 'drop' flow entry */ + ofpbuf_delete(packet); + return; + } + } else { + VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, msg->port); + } + + pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p); + return; + } + + if (rule->cr.wc.wildcards) { + struct rule *old_sr; + struct odp_flow odp_flow; + + subrule = xmalloc(sizeof *subrule); + cls_rule_from_flow(&subrule->cr, &flow, 0, 0); + subrule->idle_timeout = rule->idle_timeout; + subrule->hard_timeout = rule->hard_timeout; + subrule->used = subrule->created = time_msec(); + subrule->packet_count = subrule->byte_count = 0; + subrule->tcp_flags = 0; + subrule->ip_tos = 0; + subrule->super = rule; + subrule->n_actions = 0; + + old_sr = rule_from_cls_rule(classifier_insert(&p->cls, &subrule->cr)); + if (old_sr) { + if (!old_sr->super) { + /* Put old_sr back. */ + cls_rule_replace(&p->cls, &subrule->cr, &old_sr->cr); + free(subrule); + + /* Execute old_sr on packet. */ + rule_make_actions(old_sr, &actions); + dpif_execute(&p->dpif, msg->port, + actions.actions, actions.n_actions, &payload); + free_actions(&actions); + ofpbuf_delete(packet); + return; + } else { + subrule->packet_count += old_sr->packet_count; + subrule->byte_count += old_sr->byte_count; + rule_destroy(old_sr); + } + } + list_push_back(&rule->list, &subrule->list); + rule->used = time_msec(); + + /* Install flow entry into datapath. */ + rule_make_actions(subrule, &actions); + odp_flow.key = flow; + odp_flow.actions = actions.actions; + odp_flow.n_actions = actions.n_actions; + dpif_flow_add(&p->dpif, &odp_flow); + } else { + /* XXX This should happen only if a flow got dropped--perhaps a hash + * collision? Oh, it could also indicate that the packet was buffered + * before we processed another packet from the same flow. */ + subrule = rule; + rule_make_actions(subrule, &actions); + } + + /* Execute subrule on packet. */ + dpif_execute(&p->dpif, msg->port, actions.actions, actions.n_actions, + &payload); + free_actions(&actions); + ofpbuf_delete(packet); +} + +static void +revalidate_subrule(struct cls_rule *sub_, void *p_) +{ + struct rule *sub = rule_from_cls_rule(sub_); + struct ofproto *p = p_; + struct rule *super; + + if (!sub->super) { + /* Not a subrule. */ + return; + } + + super = rule_from_cls_rule(classifier_lookup(&p->cls, &sub->cr.flow)); + if (super != sub->super) { + if (!super) { + struct odp_flow odp_flow; + odp_flow.key = sub->cr.flow; + dpif_flow_del(&p->dpif, &odp_flow); + rule_destroy(sub); + } else { + struct odp_actions actions; + + sub->super = super; + sub->hard_timeout = super->hard_timeout; + sub->idle_timeout = super->idle_timeout; + sub->created = super->created; + sub->used = 0; + + rule_make_actions(sub, &actions); + dpif_flow_set_actions(&p->dpif, &sub->cr.flow, actions.actions, + actions.n_actions); + free_actions(&actions); + } + } +} + +static struct ofpbuf * +compose_flow_exp(const struct rule *rule, long long int now, uint8_t reason) +{ + struct ofp_flow_expired *ofe; + struct ofpbuf *buf; + + ofe = make_openflow(sizeof *ofe, OFPT_FLOW_EXPIRED, &buf); + flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &ofe->match); + ofe->priority = htons(rule->cr.priority); + ofe->reason = reason; + ofe->duration = (now - rule->created) / 1000; + ofe->packet_count = rule->packet_count; + ofe->byte_count = rule->byte_count; + + return buf; +} + +static void +send_flow_exp(struct ofproto *p, struct rule *rule, + long long int now, uint8_t reason) +{ + struct ofconn *ofconn; + struct ofconn *prev; + struct ofpbuf *buf; + + prev = NULL; + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + if (ofconn->send_flow_exp) { + if (prev) { + queue_tx(ofpbuf_clone(buf), prev); + } else { + buf = compose_flow_exp(rule, now, reason); + } + prev = ofconn; + } + } + if (prev) { + queue_tx(buf, prev); + } + + if (p->netflow && !rule->cr.wc.wildcards) { + /* XXX this interface isn't so great */ + struct odp_flow_stats stats; + stats.n_packets = rule->packet_count; + stats.n_bytes = rule->byte_count; + stats.used_sec = rule->used / 1000; + stats.used_nsec = rule->used % 1000 * 1000000; + stats.tcp_flags = rule->tcp_flags; + stats.ip_tos = rule->ip_tos; + stats.reserved = 0; + netflow_expire(p->netflow, &rule->cr.flow, &stats, rule->created); + } +} + +static void +expire_rule(struct cls_rule *cls_rule, void *p_) +{ + struct ofproto *p = p_; + struct rule *rule = rule_from_cls_rule(cls_rule); + long long int hard_expiration, idle_expiration, expiration, now; + + hard_expiration = (rule->hard_timeout + ? rule->created + rule->hard_timeout * 1000 + : LLONG_MAX); + idle_expiration = (rule->idle_timeout + && (rule->super || list_is_empty(&rule->list)) + ? rule->used + rule->idle_timeout * 1000 + : LLONG_MAX); + expiration = MIN(hard_expiration, idle_expiration); + + if (expiration == LLONG_MAX) { + return; + } + + now = time_msec(); + if (now < expiration) { + return; + } + + if (!rule->super) { + if (rule->cr.wc.wildcards) { + /* Update stats. (This code will be a no-op if the rule expired + * due to an idle timeout, because in that case the rule has no + * subrules left.) */ + struct rule *subrule, *next_subrule; + LIST_FOR_EACH_SAFE (subrule, next_subrule, + struct rule, list, &rule->list) { + struct odp_flow odp_flow; + odp_flow.key = subrule->cr.flow; + if (!dpif_flow_del(&p->dpif, &odp_flow)) { + update_stats(rule, &odp_flow.stats); + } + rule_destroy(subrule); + } + } else { + struct odp_flow odp_flow; + memset(&odp_flow, 0, sizeof odp_flow); + odp_flow.key = rule->cr.flow; + dpif_flow_del(&p->dpif, &odp_flow); + } + } + +#if 0 + printf("expiring rule (%s): ", + (now >= hard_expiration ? "hard" : "idle")); + flow_print(stdout, &rule->cr.flow); + printf("\n"); +#endif + + send_flow_exp(p, rule, now, + (now >= hard_expiration + ? OFPER_HARD_TIMEOUT : OFPER_IDLE_TIMEOUT)); + classifier_remove(&p->cls, &rule->cr); + rule_destroy(rule); +} + +static void +update_used(struct ofproto *p) +{ + struct odp_flow *flows; + size_t n_flows; + size_t i; + int error; + + error = dpif_flow_list_all(&p->dpif, &flows, &n_flows); + if (error) { + return; + } + + for (i = 0; i < n_flows; i++) { + struct odp_flow *f = &flows[i]; + struct rule *rule; + + rule = rule_from_cls_rule( + classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX)); + if (!rule) { + printf("deleting mysterious rule from datapath\n"); + flow_print(stdout, &f->key); + printf("\n"); + dpif_flow_del(&p->dpif, f); + continue; + } + + update_time(rule, &f->stats); + /* XXX update p->next_expiration */ + } + free(flows); +} + +static void +do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id, + const struct ofpbuf *packet, int send_len) +{ + struct ofp_packet_in *opi; + struct ofpbuf payload, *buf; + struct odp_msg *msg; + + msg = packet->data; + payload.data = msg + 1; + payload.size = msg->length - sizeof *msg; + + send_len = MIN(send_len, payload.size); + buf = ofpbuf_new(sizeof *opi + send_len); + opi = put_openflow_xid(offsetof(struct ofp_packet_in, data), + OFPT_PACKET_IN, 0, buf); + opi->buffer_id = htonl(buffer_id); + opi->total_len = htons(payload.size); + opi->in_port = htons(odp_port_to_ofp_port(msg->port)); + opi->reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH; + ofpbuf_put(buf, payload.data, MIN(send_len, payload.size)); + queue_tx(buf, ofconn); +} + +static void +send_packet_in_action(struct ofpbuf *packet, void *p_) +{ + struct ofproto *p = p_; + struct ofconn *ofconn; + struct odp_msg *msg; + + msg = packet->data; + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + if (ofconn == p->controller || ofconn->miss_send_len) { + do_send_packet_in(ofconn, UINT32_MAX, packet, msg->arg); + } + } + ofpbuf_delete(packet); +} + +static void +send_packet_in_miss(struct ofpbuf *packet, void *p_) +{ + struct ofproto *p = p_; + struct ofconn *ofconn; + struct ofpbuf payload; + struct odp_msg *msg; + + msg = packet->data; + payload.data = msg + 1; + payload.size = msg->length - sizeof *msg; + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + if (ofconn->miss_send_len) { + uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload, + msg->port); + int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len + : UINT32_MAX); + do_send_packet_in(ofconn, buffer_id, packet, send_len); + } + } + ofpbuf_delete(packet); +} + +static uint64_t +pick_datapath_id(struct dpif *dpif) +{ + struct odp_port port; + uint8_t ea[ETH_ADDR_LEN]; + int error; + + error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port); + if (!error) { + error = netdev_nodev_get_etheraddr(port.devname, ea); + if (!error) { + return eth_addr_to_uint64(ea); + } + VLOG_WARN("could not get MAC address for %s (%s)", + port.devname, strerror(error)); + } else { + VLOG_WARN("datapath local port query failed (%s)", strerror(error)); + } + + eth_addr_random(ea); + ea[0] = 0x00; /* Set Nicira OUI. */ + ea[1] = 0x23; + ea[2] = 0x20; + return eth_addr_to_uint64(ea); +} + diff --git a/secchan/ratelimit.h b/secchan/ofproto.h similarity index 68% rename from secchan/ratelimit.h rename to secchan/ofproto.h index 25ab9777..df0acd8b 100644 --- a/secchan/ratelimit.h +++ b/secchan/ofproto.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -31,15 +31,27 @@ * derivatives without specific, written prior permission. */ -#ifndef RATELIMIT_H -#define RATELIMIT_H 1 +#ifndef OFPROTO_H +#define OFPROTO_H 1 -struct rconn; -struct secchan; +#include "flow.h" + +struct dpif; +struct ofproto; struct settings; struct switch_status; -void rate_limit_start(struct secchan *, const struct settings *, - struct switch_status *, struct rconn *remote); +struct ofproto *ofproto_create(const struct settings *); +void ofproto_reconfigure(struct ofproto *); +void ofproto_run(struct ofproto *); +void ofproto_wait(struct ofproto *); +bool ofproto_is_alive(const struct ofproto *p); + +int ofproto_send_packet(struct ofproto *p, const flow_t *, + const union ofp_action *, size_t n_actions, + const struct ofpbuf *); +void ofproto_setup_exact_flow(struct ofproto *, const flow_t *, + const union ofp_action *, size_t n_actions, + const struct ofpbuf *); -#endif /* ratelimit.h */ +#endif /* ofproto.h */ diff --git a/secchan/ratelimit.c b/secchan/pinsched.c similarity index 52% rename from secchan/ratelimit.c rename to secchan/pinsched.c index 769889c0..fd877e43 100644 --- a/secchan/ratelimit.c +++ b/secchan/pinsched.c @@ -32,27 +32,30 @@ */ #include -#include "ratelimit.h" +#include "pinsched.h" #include #include #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" +#include "port-array.h" #include "queue.h" +#include "random.h" #include "rconn.h" #include "secchan.h" #include "status.h" #include "timeval.h" #include "vconn.h" -struct rate_limiter { - const struct settings *s; - struct rconn *remote_rconn; +struct pinsched { + /* Client-supplied parameters. */ + int rate_limit; /* Packets added to bucket per second. */ + int burst_limit; /* Maximum token bucket size, in packets. */ /* One queue per physical port. */ - struct ofp_queue queues[OFPP_MAX]; + struct port_array queues; /* Array of "struct ofp_queue *". */ int n_queued; /* Sum over queues[*].n. */ - int next_tx_port; /* Next port to check in round-robin. */ + unsigned int last_tx_port; /* Last port checked in round-robin. */ /* Token bucket. * @@ -73,17 +76,33 @@ struct rate_limiter { unsigned long long n_tx_dropped; /* # dropped due to tx overflow. */ }; -/* Drop a packet from the longest queue in 'rl'. */ +static struct ofpbuf * +dequeue_packet(struct pinsched *ps, struct ofp_queue *q, + unsigned int port_no) +{ + struct ofpbuf *packet = queue_pop_head(q); + if (!q->n) { + free(q); + port_array_set(&ps->queues, port_no, NULL); + } + ps->n_queued--; + return packet; +} + +/* Drop a packet from the longest queue in 'ps'. */ static void -drop_packet(struct rate_limiter *rl) +drop_packet(struct pinsched *ps) { struct ofp_queue *longest; /* Queue currently selected as longest. */ int n_longest; /* # of queues of same length as 'longest'. */ + unsigned int longest_port_no; + unsigned int port_no; struct ofp_queue *q; - longest = &rl->queues[0]; + longest = port_array_first(&ps->queues, &port_no); + longest_port_no = port_no; n_longest = 1; - for (q = &rl->queues[0]; q < &rl->queues[OFPP_MAX]; q++) { + while ((q = port_array_next(&ps->queues, &port_no)) != NULL) { if (longest->n < q->n) { longest = q; n_longest = 1; @@ -94,136 +113,113 @@ drop_packet(struct rate_limiter *rl) * distribution (Knuth algorithm 3.4.2R). */ if (!random_range(n_longest)) { longest = q; + longest_port_no = port_no; } } } /* FIXME: do we want to pop the tail instead? */ - ofpbuf_delete(queue_pop_head(longest)); - rl->n_queued--; + ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no)); } /* Remove and return the next packet to transmit (in round-robin order). */ static struct ofpbuf * -dequeue_packet(struct rate_limiter *rl) +get_tx_packet(struct pinsched *ps) { - unsigned int i; - - for (i = 0; i < OFPP_MAX; i++) { - unsigned int port = (rl->next_tx_port + i) % OFPP_MAX; - struct ofp_queue *q = &rl->queues[port]; - if (q->n) { - rl->next_tx_port = (port + 1) % OFPP_MAX; - rl->n_queued--; - return queue_pop_head(q); - } + struct ofp_queue *q = port_array_next(&ps->queues, &ps->last_tx_port); + if (!q) { + q = port_array_first(&ps->queues, &ps->last_tx_port); } - NOT_REACHED(); + return dequeue_packet(ps, q, ps->last_tx_port); } /* Add tokens to the bucket based on elapsed time. */ static void -refill_bucket(struct rate_limiter *rl) +refill_bucket(struct pinsched *ps) { - const struct settings *s = rl->s; long long int now = time_msec(); - long long int tokens = (now - rl->last_fill) * s->rate_limit + rl->tokens; + long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; if (tokens >= 1000) { - rl->last_fill = now; - rl->tokens = MIN(tokens, s->burst_limit * 1000); + ps->last_fill = now; + ps->tokens = MIN(tokens, ps->burst_limit * 1000); } } -/* Attempts to remove enough tokens from 'rl' to transmit a packet. Returns +/* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns * true if successful, false otherwise. (In the latter case no tokens are * removed.) */ static bool -get_token(struct rate_limiter *rl) +get_token(struct pinsched *ps) { - if (rl->tokens >= 1000) { - rl->tokens -= 1000; + if (ps->tokens >= 1000) { + ps->tokens -= 1000; return true; } else { return false; } } -static bool -rate_limit_local_packet_cb(struct relay *r, void *rl_) +void +pinsched_send(struct pinsched *ps, uint16_t port_no, + struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) { - struct rate_limiter *rl = rl_; - const struct settings *s = rl->s; - struct ofp_packet_in *opi; - - opi = get_ofp_packet_in(r); - if (!opi) { - return false; - } - - if (opi->reason == OFPR_ACTION) { - /* Don't rate-limit 'ofp-packet_in's generated by flows that the - * controller set up. XXX we should really just rate-limit them - * *separately* so that no one can flood the controller this way. */ - return false; - } - - if (!rl->n_queued && get_token(rl)) { + if (!ps) { + cb(packet, aux); + } else if (!ps->n_queued && get_token(ps)) { /* In the common case where we are not constrained by the rate limit, * let the packet take the normal path. */ - rl->n_normal++; - return false; + ps->n_normal++; + cb(packet, aux); } else { /* Otherwise queue it up for the periodic callback to drain out. */ - struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf; - int port = ntohs(opi->in_port) % OFPP_MAX; - if (rl->n_queued >= s->burst_limit) { - drop_packet(rl); + struct ofp_queue *q; + + if (ps->n_queued >= ps->burst_limit) { + drop_packet(ps); } - queue_push_tail(&rl->queues[port], ofpbuf_clone(msg)); - rl->n_queued++; - rl->n_limited++; - return true; + q = port_array_get(&ps->queues, port_no); + if (!q) { + q = xmalloc(sizeof *q); + queue_init(q); + port_array_set(&ps->queues, port_no, q); + } + queue_push_tail(q, packet); + ps->n_queued++; + ps->n_limited++; } } static void -rate_limit_status_cb(struct status_reply *sr, void *rl_) +pinsched_status_cb(struct status_reply *sr, void *ps_) { - struct rate_limiter *rl = rl_; + struct pinsched *ps = ps_; - status_reply_put(sr, "normal=%llu", rl->n_normal); - status_reply_put(sr, "limited=%llu", rl->n_limited); - status_reply_put(sr, "queue-dropped=%llu", rl->n_queue_dropped); - status_reply_put(sr, "tx-dropped=%llu", rl->n_tx_dropped); + status_reply_put(sr, "normal=%llu", ps->n_normal); + status_reply_put(sr, "limited=%llu", ps->n_limited); + status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); + status_reply_put(sr, "tx-dropped=%llu", ps->n_tx_dropped); } -static void -rate_limit_periodic_cb(void *rl_) +void +pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) { - struct rate_limiter *rl = rl_; - int i; + if (ps) { + int i; - /* Drain some packets out of the bucket if possible, but limit the number - * of iterations to allow other code to get work done too. */ - refill_bucket(rl); - for (i = 0; rl->n_queued && get_token(rl) && i < 50; i++) { - /* Use a small, arbitrary limit for the amount of queuing to do here, - * because the TCP connection is responsible for buffering and there is - * no point in trying to transmit faster than the TCP connection can - * handle. */ - struct ofpbuf *b = dequeue_packet(rl); - if (rconn_send_with_limit(rl->remote_rconn, b, &rl->n_txq, 10)) { - rl->n_tx_dropped++; + /* Drain some packets out of the bucket if possible, but limit the + * number of iterations to allow other code to get work done too. */ + refill_bucket(ps); + for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { + cb(get_tx_packet(ps), aux); } } } -static void -rate_limit_wait_cb(void *rl_) +void +pinsched_wait(struct pinsched *ps) { - struct rate_limiter *rl = rl_; - if (rl->n_queued) { - if (rl->tokens >= 1000) { + if (ps && ps->n_queued) { + if (ps->tokens >= 1000) { /* We can transmit more packets as soon as we're called again. */ poll_immediate_wake(); } else { @@ -234,31 +230,31 @@ rate_limit_wait_cb(void *rl_) } } -static const struct hook_class rate_limit_hook_class = { - rate_limit_local_packet_cb, /* local_packet_cb */ - NULL, /* remote_packet_cb */ - rate_limit_periodic_cb, /* periodic_cb */ - rate_limit_wait_cb, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -rate_limit_start(struct secchan *secchan, const struct settings *s, - struct switch_status *ss, struct rconn *remote) +/* Creates and returns a scheduler for sending packet-in messages. */ +struct pinsched * +pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) { - struct rate_limiter *rl; - size_t i; + struct pinsched *ps; - rl = xcalloc(1, sizeof *rl); - rl->s = s; - rl->remote_rconn = remote; - for (i = 0; i < ARRAY_SIZE(rl->queues); i++) { - queue_init(&rl->queues[i]); + ps = xcalloc(1, sizeof *ps); + ps->rate_limit = rate_limit; + ps->burst_limit = burst_limit; + port_array_init(&ps->queues); + ps->n_queued = 0; + ps->last_tx_port = PORT_ARRAY_SIZE; + ps->last_fill = time_msec(); + ps->tokens = rate_limit * 100; + ps->n_txq = 0; + ps->n_normal = 0; + ps->n_limited = 0; + ps->n_queue_dropped = 0; + ps->n_tx_dropped = 0; + + if (ss) { + switch_status_register_category(ss, "rate-limit", + pinsched_status_cb, ps); } - rl->last_fill = time_msec(); - rl->tokens = s->rate_limit * 100; - switch_status_register_category(ss, "rate-limit", - rate_limit_status_cb, rl); - add_hook(secchan, &rate_limit_hook_class, rl); + + return ps; } + diff --git a/udatapath/dp_act.h b/secchan/pinsched.h similarity index 75% rename from udatapath/dp_act.h rename to secchan/pinsched.h index daa8b270..76e9b0c4 100644 --- a/udatapath/dp_act.h +++ b/secchan/pinsched.h @@ -1,6 +1,6 @@ /* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University - * + * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute @@ -13,10 +13,10 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -25,25 +25,26 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * + * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. */ -#ifndef DP_ACT_H -#define DP_ACT_H 1 +#ifndef PINSCHED_H +#define PINSCHED_H_H 1 -#include "openflow/openflow.h" -#include "switch-flow.h" -#include "datapath.h" +#include -#define ACT_VALIDATION_OK ((uint16_t)-1) +struct ofpbuf; +struct switch_status; -uint16_t dp_validate_actions(struct datapath *, const struct sw_flow_key *, - const struct ofp_action_header *, size_t); -void execute_actions(struct datapath *, struct ofpbuf *, - struct sw_flow_key *, const struct ofp_action_header *, - size_t action_len, int ignore_no_fwd); +typedef void pinsched_tx_cb(struct ofpbuf *, void *aux); +struct pinsched *pinsched_create(int rate_limit, int burst_limit, + struct switch_status *); +void pinsched_send(struct pinsched *, uint16_t port_no, struct ofpbuf *, + pinsched_tx_cb *, void *aux); +void pinsched_run(struct pinsched *, pinsched_tx_cb *, void *aux); +void pinsched_wait(struct pinsched *); -#endif /* dp_act.h */ +#endif /* pinsched.h */ diff --git a/secchan/pktbuf.c b/secchan/pktbuf.c new file mode 100644 index 00000000..9fb0017f --- /dev/null +++ b/secchan/pktbuf.c @@ -0,0 +1,157 @@ +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "pktbuf.h" +#include +#include +#include "ofpbuf.h" +#include "timeval.h" +#include "util.h" +#include "vconn.h" + +#define THIS_MODULE VLM_pktbuf +#include "vlog.h" + +/* Buffers are identified by a 32-bit opaque ID. We divide the ID + * into a buffer number (low bits) and a cookie (high bits). The buffer number + * is an index into an array of buffers. The cookie distinguishes between + * different packets that have occupied a single buffer. Thus, the more + * buffers we have, the lower-quality the cookie... */ +#define PKTBUF_BITS 8 +#define PKTBUF_MASK (PKTBUF_CNT - 1) +#define PKTBUF_CNT (1u << PKTBUF_BITS) + +#define COOKIE_BITS (32 - PKTBUF_BITS) +#define COOKIE_MAX ((1u << COOKIE_BITS) - 1) + +#define OVERWRITE_MSECS 5000 + +struct packet { + struct ofpbuf *buffer; + uint32_t cookie; + long long int timeout; + uint16_t in_port; +}; + +struct pktbuf { + struct packet packets[PKTBUF_CNT]; + unsigned int buffer_idx; +}; + +int +pktbuf_capacity(void) +{ + return PKTBUF_CNT; +} + +struct pktbuf * +pktbuf_create(void) +{ + return xcalloc(1, sizeof *pktbuf_create()); +} + +void +pktbuf_destroy(struct pktbuf *pb) +{ + if (pb) { + size_t i; + + for (i = 0; i < PKTBUF_CNT; i++) { + ofpbuf_delete(pb->packets[i].buffer); + } + free(pb); + } +} + +uint32_t +pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port) +{ + struct packet *p = &pb->packets[pb->buffer_idx]; + pb->buffer_idx = (pb->buffer_idx + 1) & PKTBUF_MASK; + if (p->buffer) { + if (time_msec() < p->timeout) { + return UINT32_MAX; + } + ofpbuf_delete(p->buffer); + } + + /* Don't use maximum cookie value since all-1-bits ID is special. */ + if (++p->cookie >= COOKIE_MAX) { + p->cookie = 0; + } + p->buffer = ofpbuf_clone(buffer); + p->timeout = time_msec() + OVERWRITE_MSECS; + p->in_port = in_port; + return (p - pb->packets) | (p->cookie << PKTBUF_BITS); +} + +int +pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp, + uint16_t *in_port) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 20); + struct packet *p; + int error; + + p = &pb->packets[id & PKTBUF_MASK]; + if (p->cookie == id >> PKTBUF_BITS) { + struct ofpbuf *buffer = p->buffer; + if (buffer) { + *bufferp = buffer; + *in_port = p->in_port; + p->buffer = NULL; + return 0; + } else { + VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id); + error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY); + } + } else { + VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32, + id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS)); + error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE); + } + *bufferp = NULL; + *in_port = -1; + return error; +} + +void +pktbuf_discard(struct pktbuf *pb, uint32_t id) +{ + struct packet *p = &pb->packets[id & PKTBUF_MASK]; + if (p->cookie == id >> PKTBUF_BITS) { + ofpbuf_delete(p->buffer); + p->buffer = NULL; + } +} diff --git a/secchan/stp-secchan.h b/secchan/pktbuf.h similarity index 79% rename from secchan/stp-secchan.h rename to secchan/pktbuf.h index 2d1105f7..734c4939 100644 --- a/secchan/stp-secchan.h +++ b/secchan/pktbuf.h @@ -31,18 +31,21 @@ * derivatives without specific, written prior permission. */ -#ifndef STP_SECCHAN_H -#define STP_SECCHAN_H 1 +#ifndef PKTBUF_H +#define PKTBUF_H 1 -/* Extra time, in seconds, at boot before going into fail-open, to give the - * spanning tree protocol time to figure out the network layout. */ -#define STP_EXTRA_BOOT_TIME 30 +#include -struct port_watcher; -struct rconn; -struct secchan; +struct pktbuf; +struct ofpbuf; -void stp_start(struct secchan *, struct port_watcher *, - struct rconn *local, struct rconn *remote); +int pktbuf_capacity(void); -#endif /* stp-secchan.h */ +struct pktbuf *pktbuf_create(void); +void pktbuf_destroy(struct pktbuf *); +uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port); +int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp, + uint16_t *in_port); +void pktbuf_discard(struct pktbuf *, uint32_t id); + +#endif /* pktbuf.h */ diff --git a/secchan/port-watcher.c b/secchan/port-watcher.c deleted file mode 100644 index d5b19cce..00000000 --- a/secchan/port-watcher.c +++ /dev/null @@ -1,621 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "port-watcher.h" -#include -#include -#include -#include -#include "dynamic-string.h" -#include "netdev.h" -#include "ofpbuf.h" -#include "openflow/openflow.h" -#include "poll-loop.h" -#include "port-array.h" -#include "rconn.h" -#include "shash.h" -#include "svec.h" -#include "timeval.h" -#include "vconn.h" -#include "xtoxll.h" - -#define THIS_MODULE VLM_port_watcher -#include "vlog.h" - -struct port_watcher_cb { - port_changed_cb_func *port_changed; - void *aux; -}; - -struct port_watcher_local_cb { - local_port_changed_cb_func *local_port_changed; - void *aux; -}; - -struct port_watcher { - struct rconn *local_rconn; - struct rconn *remote_rconn; - struct port_array ports; - time_t last_feature_request; - bool got_feature_reply; - uint64_t datapath_id; - int n_txq; - struct port_watcher_cb cbs[2]; - int n_cbs; - struct port_watcher_local_cb local_cbs[4]; - int n_local_cbs; - char local_port_name[OFP_MAX_PORT_NAME_LEN + 1]; - struct netdev_monitor *mon; - struct shash port_by_name; -}; - -/* Returns the number of fields that differ from 'a' to 'b'. */ -static int -opp_differs(const struct ofp_phy_port *a, const struct ofp_phy_port *b) -{ - BUILD_ASSERT_DECL(sizeof *a == 48); /* Trips when we add or remove fields. */ - return ((a->port_no != b->port_no) - + (memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) != 0) - + (memcmp(a->name, b->name, sizeof a->name) != 0) - + (a->config != b->config) - + (a->state != b->state) - + (a->curr != b->curr) - + (a->advertised != b->advertised) - + (a->supported != b->supported) - + (a->peer != b->peer)); -} - -static void -sanitize_opp(struct ofp_phy_port *opp) -{ - size_t i; - - for (i = 0; i < sizeof opp->name; i++) { - char c = opp->name[i]; - if (c && (c < 0x20 || c > 0x7e)) { - opp->name[i] = '.'; - } - } - opp->name[sizeof opp->name - 1] = '\0'; -} - -static void -call_port_changed_callbacks(struct port_watcher *pw, int port_no, - const struct ofp_phy_port *old, - const struct ofp_phy_port *new) -{ - int i; - for (i = 0; i < pw->n_cbs; i++) { - port_changed_cb_func *port_changed = pw->cbs[i].port_changed; - (port_changed)(port_no, old, new, pw->cbs[i].aux); - } -} - -void -get_port_name(const struct ofp_phy_port *port, char *name, size_t name_size) -{ - char *p; - - memcpy(name, port->name, MIN(name_size, sizeof port->name)); - name[name_size - 1] = '\0'; - for (p = name; *p != '\0'; p++) { - if (*p < 32 || *p > 126) { - *p = '.'; - } - } -} - -static struct ofp_phy_port * -lookup_port(const struct port_watcher *pw, uint16_t port_no) -{ - return port_array_get(&pw->ports, port_no); -} - -static void -call_local_port_changed_callbacks(struct port_watcher *pw) -{ - char name[OFP_MAX_PORT_NAME_LEN + 1]; - const struct ofp_phy_port *port; - int i; - - /* Pass the local port to the callbacks, if it exists. - Pass a null pointer if there is no local port. */ - port = lookup_port(pw, OFPP_LOCAL); - - /* Log the name of the local port. */ - if (port) { - get_port_name(port, name, sizeof name); - } else { - name[0] = '\0'; - } - if (strcmp(pw->local_port_name, name)) { - if (name[0]) { - VLOG_INFO("Identified data path local port as \"%s\".", name); - } else { - VLOG_WARN("Data path has no local port."); - } - strcpy(pw->local_port_name, name); - } - - /* Invoke callbacks. */ - for (i = 0; i < pw->n_local_cbs; i++) { - local_port_changed_cb_func *cb = pw->local_cbs[i].local_port_changed; - (cb)(port, pw->local_cbs[i].aux); - } -} - -static void -update_phy_port(struct port_watcher *pw, struct ofp_phy_port *opp, - uint8_t reason) -{ - struct ofp_phy_port *old; - uint16_t port_no; - - port_no = ntohs(opp->port_no); - old = lookup_port(pw, port_no); - - if (reason == OFPPR_DELETE && old) { - call_port_changed_callbacks(pw, port_no, old, NULL); - free(old); - port_array_set(&pw->ports, port_no, NULL); - } else if (reason == OFPPR_MODIFY || reason == OFPPR_ADD) { - if (old) { - uint32_t s_mask = htonl(OFPPS_STP_MASK); - opp->state = (opp->state & ~s_mask) | (old->state & s_mask); - } - if (!old || opp_differs(opp, old)) { - struct ofp_phy_port new = *opp; - sanitize_opp(&new); - call_port_changed_callbacks(pw, port_no, old, &new); - if (old) { - *old = new; - } else { - port_array_set(&pw->ports, port_no, xmemdup(&new, sizeof new)); - } - } - } -} - -static void -update_netdev_monitor_devices(struct port_watcher *pw) -{ - struct ofp_phy_port *p; - struct svec netdevs; - unsigned int port_no; - - svec_init(&netdevs); - shash_clear(&pw->port_by_name); - for (p = port_array_first(&pw->ports, &port_no); p; - p = port_array_next(&pw->ports, &port_no)) { - const char *name = (const char *) p->name; - svec_add(&netdevs, name); - shash_add(&pw->port_by_name, name, p); - } - netdev_monitor_set_devices(pw->mon, netdevs.names, netdevs.n); - svec_destroy(&netdevs); -} - -static bool -port_watcher_local_packet_cb(struct relay *r, void *pw_) -{ - struct port_watcher *pw = pw_; - struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf; - struct ofp_header *oh = msg->data; - - if (oh->type == OFPT_FEATURES_REPLY - && msg->size >= offsetof(struct ofp_switch_features, ports)) { - struct ofp_switch_features *osf = msg->data; - bool seen[PORT_ARRAY_SIZE]; - struct ofp_phy_port *p; - unsigned int port_no; - size_t n_ports; - size_t i; - - pw->got_feature_reply = true; - if (pw->datapath_id != osf->datapath_id) { - pw->datapath_id = osf->datapath_id; - VLOG_INFO("Datapath id is %012"PRIx64, ntohll(pw->datapath_id)); - } - - /* Update each port included in the message. */ - memset(seen, false, sizeof seen); - n_ports = ((msg->size - offsetof(struct ofp_switch_features, ports)) - / sizeof *osf->ports); - for (i = 0; i < n_ports; i++) { - struct ofp_phy_port *opp = &osf->ports[i]; - update_phy_port(pw, opp, OFPPR_MODIFY); - seen[ntohs(opp->port_no)] = true; - } - - /* Delete all the ports not included in the message. */ - for (p = port_array_first(&pw->ports, &port_no); p; - p = port_array_next(&pw->ports, &port_no)) { - if (!seen[port_no]) { - update_phy_port(pw, p, OFPPR_DELETE); - } - } - - update_netdev_monitor_devices(pw); - - call_local_port_changed_callbacks(pw); - } else if (oh->type == OFPT_PORT_STATUS - && msg->size >= sizeof(struct ofp_port_status)) { - struct ofp_port_status *ops = msg->data; - update_phy_port(pw, &ops->desc, ops->reason); - if (ops->desc.port_no == htons(OFPP_LOCAL)) { - call_local_port_changed_callbacks(pw); - } - if (ops->reason == OFPPR_ADD || OFPPR_DELETE) { - update_netdev_monitor_devices(pw); - } - } - return false; -} - -static void -bring_netdev_up_or_down(const char *name, bool down) -{ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - struct netdev *netdev; - int retval; - - retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev); - if (!retval) { - if (down) { - retval = netdev_turn_flags_off(netdev, NETDEV_UP, true); - } else { - retval = netdev_turn_flags_on(netdev, NETDEV_UP, true); - } - if (retval) { - VLOG_WARN_RL(&rl, "failed to bring network device %s %s: %s", - name, down ? "down" : "up", strerror(retval)); - } - netdev_close(netdev); - } else { - VLOG_WARN_RL(&rl, "failed to open network device %s: %s", - name, strerror(retval)); - } -} - -static bool -port_watcher_remote_packet_cb(struct relay *r, void *pw_) -{ - struct port_watcher *pw = pw_; - struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; - struct ofp_header *oh = msg->data; - - if (oh->type == OFPT_PORT_MOD - && msg->size >= sizeof(struct ofp_port_mod)) { - struct ofp_port_mod *opm = msg->data; - uint16_t port_no = ntohs(opm->port_no); - struct ofp_phy_port *pw_opp = lookup_port(pw, port_no); - if (pw_opp->port_no != htons(OFPP_NONE)) { - struct ofp_phy_port old = *pw_opp; - pw_opp->config = ((pw_opp->config & ~opm->mask) - | (opm->config & opm->mask)); - call_port_changed_callbacks(pw, port_no, &old, pw_opp); - if (pw_opp->port_no == htons(OFPP_LOCAL)) { - call_local_port_changed_callbacks(pw); - } - - if (opm->mask & htonl(OFPPC_PORT_DOWN)) { - bring_netdev_up_or_down((const char *) pw_opp->name, - opm->config & htonl(OFPPC_PORT_DOWN)); - } - } - } - return false; -} - -/* Sets 'bit' in '*word' to 0 or 1 according to 'value'. */ -static void -set_bit(uint32_t bit, bool value, uint32_t *word) -{ - if (value) { - *word |= bit; - } else { - *word &= ~bit; - } -} - -static void -port_watcher_periodic_cb(void *pw_) -{ - struct port_watcher *pw = pw_; - const char *name; - - if (!pw->got_feature_reply - && time_now() >= pw->last_feature_request + 5 - && rconn_is_connected(pw->local_rconn)) { - struct ofpbuf *b; - make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &b); - rconn_send_with_limit(pw->local_rconn, b, &pw->n_txq, 1); - pw->last_feature_request = time_now(); - } - - netdev_monitor_run(pw->mon); - while ((name = netdev_monitor_poll(pw->mon)) != NULL) { - struct ofp_phy_port *opp; - struct ofp_phy_port new_opp; - enum netdev_flags flags; - int retval; - - opp = shash_find_data(&pw->port_by_name, name); - if (!opp) { - continue; - } - - retval = netdev_nodev_get_flags(name, &flags); - if (retval) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "could not get flags for %s", name); - continue; - } - - new_opp = *opp; - set_bit(htonl(OFPPC_PORT_DOWN), flags & NETDEV_UP, &new_opp.config); - set_bit(htonl(OFPPS_LINK_DOWN), flags & NETDEV_CARRIER, - &new_opp.state); - if (opp->config != new_opp.config || opp->state != new_opp.state) { - struct ofp_port_status *ops; - struct ofpbuf *b; - - /* Notify other secchan modules. */ - update_phy_port(pw, &new_opp, OFPPR_MODIFY); - if (new_opp.port_no == htons(OFPP_LOCAL)) { - call_local_port_changed_callbacks(pw); - } - - /* Notify the controller that the flags changed. */ - ops = make_openflow(sizeof *ops, OFPT_PORT_STATUS, &b); - ops->reason = OFPPR_MODIFY; - ops->desc = new_opp; - rconn_send(pw->remote_rconn, b, NULL); - } - } -} - -static void -port_watcher_wait_cb(void *pw_) -{ - struct port_watcher *pw = pw_; - if (!pw->got_feature_reply && rconn_is_connected(pw->local_rconn)) { - if (pw->last_feature_request != TIME_MIN) { - poll_timer_wait(pw->last_feature_request + 5 - time_now()); - } else { - poll_immediate_wake(); - } - } - netdev_monitor_wait(pw->mon); -} - -static void -put_duplexes(struct ds *ds, const char *name, uint32_t features, - uint32_t hd_bit, uint32_t fd_bit) -{ - if (features & (hd_bit | fd_bit)) { - ds_put_format(ds, " %s", name); - if (features & hd_bit) { - ds_put_cstr(ds, "(HD)"); - } - if (features & fd_bit) { - ds_put_cstr(ds, "(FD)"); - } - } -} - -static void -put_features(struct ds *ds, const char *name, uint32_t features) -{ - if (features & (OFPPF_10MB_HD | OFPPF_10MB_FD - | OFPPF_100MB_HD | OFPPF_100MB_FD - | OFPPF_1GB_HD | OFPPF_1GB_FD | OFPPF_10GB_FD)) { - ds_put_cstr(ds, name); - put_duplexes(ds, "10M", features, OFPPF_10MB_HD, OFPPF_10MB_FD); - put_duplexes(ds, "100M", features, - OFPPF_100MB_HD, OFPPF_100MB_FD); - put_duplexes(ds, "1G", features, OFPPF_1GB_HD, OFPPF_1GB_FD); - if (features & OFPPF_10GB_FD) { - ds_put_cstr(ds, " 10G"); - } - if (features & OFPPF_AUTONEG) { - ds_put_cstr(ds, " AUTO_NEG"); - } - if (features & OFPPF_PAUSE) { - ds_put_cstr(ds, " PAUSE"); - } - if (features & OFPPF_PAUSE_ASYM) { - ds_put_cstr(ds, " PAUSE_ASYM"); - } - } -} - -static void -log_port_status(uint16_t port_no, - const struct ofp_phy_port *old, - const struct ofp_phy_port *new, - void *aux UNUSED) -{ - if (VLOG_IS_DBG_ENABLED()) { - if (old && new && (opp_differs(old, new) - == ((old->config != new->config) - + (old->state != new->state)))) - { - /* Don't care if only state or config changed. */ - } else if (!new) { - if (old) { - VLOG_DBG("Port %d deleted", port_no); - } - } else { - struct ds ds = DS_EMPTY_INITIALIZER; - uint32_t curr = ntohl(new->curr); - uint32_t supported = ntohl(new->supported); - ds_put_format(&ds, "\"%s\", "ETH_ADDR_FMT, new->name, - ETH_ADDR_ARGS(new->hw_addr)); - if (curr) { - put_features(&ds, ", current", curr); - } - if (supported) { - put_features(&ds, ", supports", supported); - } - VLOG_DBG("Port %d %s: %s", - port_no, old ? "changed" : "added", ds_cstr(&ds)); - ds_destroy(&ds); - } - } -} - -void -port_watcher_register_callback(struct port_watcher *pw, - port_changed_cb_func *port_changed, - void *aux) -{ - assert(pw->n_cbs < ARRAY_SIZE(pw->cbs)); - pw->cbs[pw->n_cbs].port_changed = port_changed; - pw->cbs[pw->n_cbs].aux = aux; - pw->n_cbs++; -} - -void -port_watcher_register_local_port_callback(struct port_watcher *pw, - local_port_changed_cb_func *cb, - void *aux) -{ - assert(pw->n_local_cbs < ARRAY_SIZE(pw->local_cbs)); - pw->local_cbs[pw->n_local_cbs].local_port_changed = cb; - pw->local_cbs[pw->n_local_cbs].aux = aux; - pw->n_local_cbs++; -} - -uint32_t -port_watcher_get_config(const struct port_watcher *pw, uint16_t port_no) -{ - struct ofp_phy_port *p = lookup_port(pw, port_no); - return p ? ntohl(p->config) : 0; -} - -const char * -port_watcher_get_name(const struct port_watcher *pw, uint16_t port_no) -{ - struct ofp_phy_port *p = lookup_port(pw, port_no); - return p ? (const char *) p->name : NULL; -} - -const uint8_t * -port_watcher_get_hwaddr(const struct port_watcher *pw, uint16_t port_no) -{ - struct ofp_phy_port *p = lookup_port(pw, port_no); - return p ? p->hw_addr : NULL; -} - -void -port_watcher_set_flags(struct port_watcher *pw, uint16_t port_no, - uint32_t config, uint32_t c_mask, - uint32_t state, uint32_t s_mask) -{ - struct ofp_phy_port old; - struct ofp_phy_port *p; - struct ofp_port_mod *opm; - struct ofp_port_status *ops; - struct ofpbuf *b; - - p = lookup_port(pw, port_no); - if (!p) { - return; - } - - if (!((ntohl(p->state) ^ state) & s_mask) - && (!((ntohl(p->config) ^ config) & c_mask))) { - return; - } - old = *p; - - /* Update our idea of the flags. */ - p->config = htonl((ntohl(p->config) & ~c_mask) | (config & c_mask)); - p->state = htonl((ntohl(p->state) & ~s_mask) | (state & s_mask)); - call_port_changed_callbacks(pw, port_no, &old, p); - - /* Change the flags in the datapath. */ - opm = make_openflow(sizeof *opm, OFPT_PORT_MOD, &b); - opm->port_no = p->port_no; - memcpy(opm->hw_addr, p->hw_addr, OFP_ETH_ALEN); - opm->config = p->config; - opm->mask = htonl(c_mask); - opm->advertise = htonl(0); - rconn_send(pw->local_rconn, b, NULL); - - /* Notify the controller that the flags changed. */ - ops = make_openflow(sizeof *ops, OFPT_PORT_STATUS, &b); - ops->reason = OFPPR_MODIFY; - ops->desc = *p; - rconn_send(pw->remote_rconn, b, NULL); -} - -bool -port_watcher_is_ready(const struct port_watcher *pw) -{ - return pw->got_feature_reply; -} - -static const struct hook_class port_watcher_hook_class = { - port_watcher_local_packet_cb, /* local_packet_cb */ - port_watcher_remote_packet_cb, /* remote_packet_cb */ - port_watcher_periodic_cb, /* periodic_cb */ - port_watcher_wait_cb, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -port_watcher_start(struct secchan *secchan, - struct rconn *local_rconn, struct rconn *remote_rconn, - struct port_watcher **pwp) -{ - struct port_watcher *pw; - int retval; - - pw = *pwp = xcalloc(1, sizeof *pw); - pw->local_rconn = local_rconn; - pw->remote_rconn = remote_rconn; - pw->last_feature_request = TIME_MIN; - port_array_init(&pw->ports); - pw->local_port_name[0] = '\0'; - retval = netdev_monitor_create(&pw->mon); - if (retval) { - ofp_fatal(retval, "failed to start network device monitoring"); - } - shash_init(&pw->port_by_name); - port_watcher_register_callback(pw, log_port_status, NULL); - add_hook(secchan, &port_watcher_hook_class, pw); -} diff --git a/secchan/port-watcher.h b/secchan/port-watcher.h deleted file mode 100644 index 904e545a..00000000 --- a/secchan/port-watcher.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef PORT_WATCHER_H -#define PORT_WATCHER_H 1 - -#include -#include "compiler.h" -#include "secchan.h" - -struct ofp_phy_port; -struct port_watcher; -struct secchan; - -void port_watcher_start(struct secchan *, - struct rconn *local, struct rconn *remote, - struct port_watcher **); -bool port_watcher_is_ready(const struct port_watcher *); -uint32_t port_watcher_get_config(const struct port_watcher *, - uint16_t port_no); -const char *port_watcher_get_name(const struct port_watcher *, - uint16_t port_no) UNUSED; -const uint8_t *port_watcher_get_hwaddr(const struct port_watcher *, - uint16_t port_no); -void port_watcher_set_flags(struct port_watcher *, uint16_t port_no, - uint32_t config, uint32_t c_mask, - uint32_t state, uint32_t s_mask); - -typedef void port_changed_cb_func(uint16_t port_no, - const struct ofp_phy_port *old, - const struct ofp_phy_port *new, - void *aux); - -void port_watcher_register_callback(struct port_watcher *, - port_changed_cb_func *port_changed, - void *aux); - -typedef void local_port_changed_cb_func(const struct ofp_phy_port *new, - void *aux); - -void port_watcher_register_local_port_callback(struct port_watcher *pw, - local_port_changed_cb_func *cb, - void *aux); - -void get_port_name(const struct ofp_phy_port *, char *name, size_t name_size); - -#endif /* port-watcher.h */ diff --git a/secchan/secchan.8.in b/secchan/secchan.8.in index 8921a42a..de8fb70e 100644 --- a/secchan/secchan.8.in +++ b/secchan/secchan.8.in @@ -348,30 +348,6 @@ Listens for TCP connections on \fIport\fR (default: 6633). Listens for connections on Unix domain server socket named \fIfile\fR. .RE -.TP -\fB-m\fR, \fB--monitor=\fImethod\fR -Configures the switch to additionally listen for incoming OpenFlow -connections for switch monitoring with \fBdpctl\fR's \fBmonitor\fR -command. The \fImethod\fR must be given as one of the passive -OpenFlow connection methods listed above as acceptable for -\fB--listen\fR. - -When \fBdpctl monitor\fR makes a monitoring connection, \fBsecchan\fR -sends it a copy of every OpenFlow message sent to or received from the -kernel in the normal course of its operations. It does not send a -copy of any messages sent to or from the OpenFlow connection to the -controller. Most of these messages will be seen anyhow, however, -because \fBsecchan\fR mainly acts as a relay between the controller -and the kernel. \fBsecchan\fR also does not send a copy of any -messages sent to or from the OpenFlow connection to the controller. -Such messages will typically \fBnot\fR be seen, because \fBsecchan\fR -maintains a separate connection to the kernel for each management -connection. - -Messages are copied to the monitoring connections on a best-effort -basis. In particular, if the socket buffer of the monitoring -connection fills up, some messages will be lost. - .TP \fB--in-band\fR, \fB--out-of-band\fR Configures \fBsecchan\fR to operate in in-band or out-of-band control diff --git a/secchan/secchan.c b/secchan/secchan.c index a7fd4638..3411e4ea 100644 --- a/secchan/secchan.c +++ b/secchan/secchan.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -47,25 +48,20 @@ #include "daemon.h" #include "dirs.h" #include "discovery.h" -#include "executer.h" +#include "dpif.h" #include "fail-open.h" #include "fault.h" #include "in-band.h" #include "leak-checker.h" #include "list.h" +#include "netdev.h" #include "ofpbuf.h" +#include "ofproto.h" #include "openflow/openflow.h" #include "packets.h" -#include "port-watcher.h" #include "poll-loop.h" -#include "ratelimit.h" #include "rconn.h" #include "signals.h" -#ifdef SUPPORT_SNAT -#include "snat.h" -#endif -#include "flow-end.h" -#include "stp-secchan.h" #include "status.h" #include "timeval.h" #include "util.h" @@ -76,56 +72,16 @@ #include "vlog.h" #define THIS_MODULE VLM_secchan -struct hook { - const struct hook_class *class; - void *aux; -}; - -struct secchan { - struct hook *hooks; - size_t n_hooks, allocated_hooks; -}; - -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); - -static void reconfigure(struct secchan *); +static void reconfigure(struct ofproto *); static void parse_options(int argc, char *argv[], struct settings *); static void usage(void) NO_RETURN; -static char *vconn_name_without_subscription(const char *); -static struct pvconn *open_passive_vconn(const char *name); -static struct vconn *accept_vconn(struct pvconn *pvconn); - -static struct relay *relay_create(struct rconn *async, - struct rconn *local, struct rconn *remote, - bool is_mgmt_conn); -static struct relay *relay_accept(const struct settings *, struct pvconn *); -static void relay_run(struct relay *, struct secchan *); -static void relay_wait(struct relay *); -static void relay_destroy(struct relay *); - int main(int argc, char *argv[]) { - struct settings s; - - struct list relays = LIST_INITIALIZER(&relays); - - struct secchan secchan; - - struct pvconn *monitor; - - struct pvconn *listeners[MAX_MGMT]; - size_t n_listeners; - struct signal *sighup; - char *local_rconn_name; - struct rconn *async_rconn, *local_rconn, *remote_rconn; - struct relay *controller_relay; - struct discovery *discovery; - struct switch_status *switch_status; - struct port_watcher *pw; - int i; + struct ofproto *ofproto; + struct settings s; int retval; set_program_name(argv[0]); @@ -136,20 +92,6 @@ main(int argc, char *argv[]) signal(SIGPIPE, SIG_IGN); sighup = signal_register(SIGHUP); - secchan.hooks = NULL; - secchan.n_hooks = 0; - secchan.allocated_hooks = 0; - - /* Start listening for management and monitoring connections. */ - n_listeners = 0; - for (i = 0; i < s.n_listeners; i++) { - listeners[n_listeners++] = open_passive_vconn(s.listener_names[i]); - } - monitor = s.monitor_name ? open_passive_vconn(s.monitor_name) : NULL; - - /* Initialize switch status hook. */ - switch_status_start(&secchan, &s, &switch_status); - die_if_already_running(); daemonize(); @@ -162,149 +104,20 @@ main(int argc, char *argv[]) VLOG_INFO("OpenFlow reference implementation version %s", VERSION BUILDNR); VLOG_INFO("OpenFlow protocol version 0x%02x", OFP_VERSION); - /* Check datapath name, to try to catch command-line invocation errors. */ - if (strncmp(s.dp_name, "nl:", 3) && strncmp(s.dp_name, "unix:", 5) - && !s.controller_name) { - VLOG_WARN("Controller not specified and datapath is not nl: or " - "unix:. (Did you forget to specify the datapath?)"); - } - - if (!strncmp(s.dp_name, "nl:", 3)) { - /* Connect to datapath with a subscription for asynchronous events. By - * separating the connection for asynchronous events from that for - * request and replies we prevent the socket receive buffer from being - * filled up by received packet data, which in turn would prevent - * getting replies to any Netlink messages we send to the kernel. */ - async_rconn = rconn_create(0, s.max_backoff); - rconn_connect(async_rconn, s.dp_name); - switch_status_register_category(switch_status, "async", - rconn_status_cb, async_rconn); - } else { - /* No need for a separate asynchronous connection: we must be connected - * to the user datapath, which is smart enough to discard packet events - * instead of message replies. In fact, having a second connection - * would work against us since we'd get double copies of asynchronous - * event messages (the user datapath provides no way to turn off - * asynchronous events). */ - async_rconn = NULL; - } - - /* Connect to datapath without a subscription, for requests and replies. */ - local_rconn_name = vconn_name_without_subscription(s.dp_name); - local_rconn = rconn_create(0, s.max_backoff); - rconn_connect(local_rconn, local_rconn_name); - free(local_rconn_name); - switch_status_register_category(switch_status, "local", - rconn_status_cb, local_rconn); - - /* Connect to controller. */ - remote_rconn = rconn_create(s.probe_interval, s.max_backoff); - if (s.controller_name) { - retval = rconn_connect(remote_rconn, s.controller_name); - if (retval == EAFNOSUPPORT) { - ofp_fatal(0, "No support for %s vconn", s.controller_name); - } - } - switch_status_register_category(switch_status, "remote", - rconn_status_cb, remote_rconn); - - /* Start relaying. */ - controller_relay = relay_create(async_rconn, local_rconn, remote_rconn, - false); - list_push_back(&relays, &controller_relay->node); - - /* Set up hooks. */ - port_watcher_start(&secchan, local_rconn, remote_rconn, &pw); - discovery = s.discovery ? discovery_init(&s, pw, switch_status) : NULL; -#ifdef SUPPORT_SNAT - snat_start(&secchan, pw); -#endif - flow_end_start(&secchan, &s, local_rconn, remote_rconn); - if (s.enable_stp) { - stp_start(&secchan, pw, local_rconn, remote_rconn); - } - if (s.in_band) { - in_band_start(&secchan, &s, switch_status, pw, remote_rconn); - } - if (s.fail_mode == FAIL_OPEN) { - fail_open_start(&secchan, &s, switch_status, - local_rconn, remote_rconn); - } - if (s.rate_limit) { - rate_limit_start(&secchan, &s, switch_status, remote_rconn); - } - if (s.command_acl[0]) { - executer_start(&secchan, &s); - } - - reconfigure(&secchan); - - while (s.discovery || rconn_is_alive(remote_rconn)) { - struct relay *r, *n; - size_t i; + /* Start OpenFlow processing. */ + ofproto = ofproto_create(&s); + reconfigure(ofproto); + while (ofproto_is_alive(ofproto)) { if (signal_poll(sighup)) { - reconfigure(&secchan); + reconfigure(ofproto); } /* Do work. */ - LIST_FOR_EACH_SAFE (r, n, struct relay, node, &relays) { - relay_run(r, &secchan); - } - for (i = 0; i < n_listeners; i++) { - for (;;) { - struct relay *r = relay_accept(&s, listeners[i]); - if (!r) { - break; - } - list_push_back(&relays, &r->node); - } - } - if (monitor) { - struct vconn *new = accept_vconn(monitor); - if (new) { - /* XXX should monitor async_rconn too but rconn_add_monitor() - * takes ownership of the vconn passed in. */ - rconn_add_monitor(local_rconn, new); - } - } - for (i = 0; i < secchan.n_hooks; i++) { - if (secchan.hooks[i].class->periodic_cb) { - secchan.hooks[i].class->periodic_cb(secchan.hooks[i].aux); - } - } - if (s.discovery) { - char *controller_name; - if (rconn_is_connectivity_questionable(remote_rconn)) { - discovery_question_connectivity(discovery); - } - if (discovery_run(discovery, &controller_name)) { - if (controller_name) { - rconn_connect(remote_rconn, controller_name); - } else { - rconn_disconnect(remote_rconn); - } - } - } + ofproto_run(ofproto); /* Wait for something to happen. */ - LIST_FOR_EACH (r, struct relay, node, &relays) { - relay_wait(r); - } - for (i = 0; i < n_listeners; i++) { - pvconn_wait(listeners[i]); - } - if (monitor) { - pvconn_wait(monitor); - } - for (i = 0; i < secchan.n_hooks; i++) { - if (secchan.hooks[i].class->wait_cb) { - secchan.hooks[i].class->wait_cb(secchan.hooks[i].aux); - } - } - if (discovery) { - discovery_wait(discovery); - } + ofproto_wait(ofproto); signal_wait(sighup); poll_block(); } @@ -313,286 +126,10 @@ main(int argc, char *argv[]) } static void -reconfigure(struct secchan *secchan) +reconfigure(struct ofproto *ofproto) { - int i; - cfg_read(); - for (i = 0; i < secchan->n_hooks; i++) { - if (secchan->hooks[i].class->reconfigure_cb) { - secchan->hooks[i].class->reconfigure_cb(secchan->hooks[i].aux); - } - } -} - -static struct pvconn * -open_passive_vconn(const char *name) -{ - struct pvconn *pvconn; - int retval; - - retval = pvconn_open(name, &pvconn); - if (retval && retval != EAGAIN) { - ofp_fatal(retval, "opening %s", name); - } - return pvconn; -} - -static struct vconn * -accept_vconn(struct pvconn *pvconn) -{ - struct vconn *new; - int retval; - - retval = pvconn_accept(pvconn, OFP_VERSION, &new); - if (retval && retval != EAGAIN) { - VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval)); - } - return new; -} - -void -add_hook(struct secchan *secchan, const struct hook_class *class, void *aux) -{ - struct hook *hook; - - if (secchan->n_hooks >= secchan->allocated_hooks) { - secchan->hooks = x2nrealloc(secchan->hooks, &secchan->allocated_hooks, - sizeof *secchan->hooks); - } - hook = &secchan->hooks[secchan->n_hooks++]; - hook->class = class; - hook->aux = aux; -} - -struct ofp_packet_in * -get_ofp_packet_in(struct relay *r) -{ - struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf; - struct ofp_header *oh = msg->data; - if (oh->type == OFPT_PACKET_IN) { - if (msg->size >= offsetof (struct ofp_packet_in, data)) { - return msg->data; - } else { - VLOG_WARN("packet too short (%zu bytes) for packet_in", - msg->size); - } - } - return NULL; -} - -bool -get_ofp_packet_eth_header(struct relay *r, struct ofp_packet_in **opip, - struct eth_header **ethp) -{ - const int min_len = offsetof(struct ofp_packet_in, data) + ETH_HEADER_LEN; - struct ofp_packet_in *opi = get_ofp_packet_in(r); - if (opi && ntohs(opi->header.length) >= min_len) { - *opip = opi; - *ethp = (void *) opi->data; - return true; - } - return false; -} - -/* OpenFlow message relaying. */ - -/* Returns a malloc'd string containing a copy of 'vconn_name' modified not to - * subscribe to asynchronous messages such as 'ofp_packet_in' events (if - * possible). */ -static char * -vconn_name_without_subscription(const char *vconn_name) -{ - int nl_index; - if (sscanf(vconn_name, "nl:%d", &nl_index) == 1) { - /* nl:123 or nl:123:1 opens a netlink connection to local datapath 123. - * nl:123:0 opens a netlink connection to local datapath 123 without - * obtaining a subscription for ofp_packet_in or ofp_flow_expired - * messages. */ - return xasprintf("nl:%d:0", nl_index); - } else { - /* We don't have a way to specify not to subscribe to those messages - * for other transports. (That's a defect: really this should be in - * the OpenFlow protocol, not the Netlink transport). */ - VLOG_WARN_RL(&rl, "new management connection will receive " - "asynchronous messages"); - return xstrdup(vconn_name); - } -} - -static struct relay * -relay_accept(const struct settings *s, struct pvconn *pvconn) -{ - struct vconn *new_remote, *new_local; - struct rconn *r1, *r2; - char *vconn_name; - int retval; - - new_remote = accept_vconn(pvconn); - if (!new_remote) { - return NULL; - } - - vconn_name = vconn_name_without_subscription(s->dp_name); - retval = vconn_open(vconn_name, OFP_VERSION, &new_local); - if (retval) { - VLOG_ERR_RL(&rl, "could not connect to %s (%s)", - vconn_name, strerror(retval)); - vconn_close(new_remote); - free(vconn_name); - return NULL; - } - - /* Create and return relay. */ - r1 = rconn_create(0, 0); - rconn_connect_unreliably(r1, vconn_name, new_local); - free(vconn_name); - - r2 = rconn_create(0, 0); - rconn_connect_unreliably(r2, "passive", new_remote); - - return relay_create(NULL, r1, r2, true); -} - -static struct relay * -relay_create(struct rconn *async, struct rconn *local, struct rconn *remote, - bool is_mgmt_conn) -{ - struct relay *r = xcalloc(1, sizeof *r); - r->halves[HALF_LOCAL].rconn = local; - r->halves[HALF_REMOTE].rconn = remote; - r->is_mgmt_conn = is_mgmt_conn; - r->async_rconn = async; - return r; -} - -static bool -call_local_packet_cbs(struct secchan *secchan, struct relay *r) -{ - const struct hook *h; - for (h = secchan->hooks; h < &secchan->hooks[secchan->n_hooks]; h++) { - bool (*cb)(struct relay *, void *aux) = h->class->local_packet_cb; - if (cb && (cb)(r, h->aux)) { - return true; - } - } - return false; -} - -static bool -call_remote_packet_cbs(struct secchan *secchan, struct relay *r) -{ - const struct hook *h; - for (h = secchan->hooks; h < &secchan->hooks[secchan->n_hooks]; h++) { - bool (*cb)(struct relay *, void *aux) = h->class->remote_packet_cb; - if (cb && (cb)(r, h->aux)) { - return true; - } - } - return false; -} - -static void -relay_run(struct relay *r, struct secchan *secchan) -{ - int iteration; - int i; - - if (r->async_rconn) { - rconn_run(r->async_rconn); - } - for (i = 0; i < 2; i++) { - rconn_run(r->halves[i].rconn); - } - - /* Limit the number of iterations to prevent other tasks from starving. */ - for (iteration = 0; iteration < 50; iteration++) { - bool progress = false; - for (i = 0; i < 2; i++) { - struct half *this = &r->halves[i]; - struct half *peer = &r->halves[!i]; - - if (!this->rxbuf) { - this->rxbuf = rconn_recv(this->rconn); - if (!this->rxbuf && i == HALF_LOCAL && r->async_rconn) { - this->rxbuf = rconn_recv(r->async_rconn); - } - if (this->rxbuf && (i == HALF_REMOTE || !r->is_mgmt_conn)) { - if (i == HALF_LOCAL - ? call_local_packet_cbs(secchan, r) - : call_remote_packet_cbs(secchan, r)) - { - ofpbuf_delete(this->rxbuf); - this->rxbuf = NULL; - progress = true; - break; - } - } - } - - if (this->rxbuf && !this->n_txq) { - int retval = rconn_send(peer->rconn, this->rxbuf, - &this->n_txq); - if (retval != EAGAIN) { - if (!retval) { - progress = true; - } else { - ofpbuf_delete(this->rxbuf); - } - this->rxbuf = NULL; - } - } - } - if (!progress) { - break; - } - } - - if (r->is_mgmt_conn) { - for (i = 0; i < 2; i++) { - struct half *this = &r->halves[i]; - if (!rconn_is_alive(this->rconn)) { - relay_destroy(r); - return; - } - } - } -} - -static void -relay_wait(struct relay *r) -{ - int i; - - if (r->async_rconn) { - rconn_run_wait(r->async_rconn); - } - for (i = 0; i < 2; i++) { - struct half *this = &r->halves[i]; - - rconn_run_wait(this->rconn); - if (!this->rxbuf) { - rconn_recv_wait(this->rconn); - if (i == HALF_LOCAL && r->async_rconn) { - rconn_recv_wait(r->async_rconn); - } - } - } -} - -static void -relay_destroy(struct relay *r) -{ - int i; - - list_remove(&r->node); - rconn_destroy(r->async_rconn); - for (i = 0; i < 2; i++) { - struct half *this = &r->halves[i]; - rconn_destroy(this->rconn); - ofpbuf_delete(this->rxbuf); - } - free(r); + ofproto_reconfigure(ofproto); } /* User interface. */ @@ -601,7 +138,12 @@ static void parse_options(int argc, char *argv[], struct settings *s) { enum { - OPT_ACCEPT_VCONN = UCHAR_MAX + 1, + OPT_DATAPATH_ID = UCHAR_MAX + 1, + OPT_MANUFACTURER, + OPT_HARDWARE, + OPT_SOFTWARE, + OPT_SERIAL, + OPT_ACCEPT_VCONN, OPT_NO_RESOLV_CONF, OPT_BR_NAME, OPT_FAIL_MODE, @@ -621,6 +163,11 @@ parse_options(int argc, char *argv[], struct settings *s) LEAK_CHECKER_OPTION_ENUMS }; static struct option long_options[] = { + {"datapath-id", required_argument, 0, OPT_DATAPATH_ID}, + {"manufacturer", required_argument, 0, OPT_MANUFACTURER}, + {"hardware", required_argument, 0, OPT_HARDWARE}, + {"software", required_argument, 0, OPT_SOFTWARE}, + {"serial", required_argument, 0, OPT_SERIAL}, {"accept-vconn", required_argument, 0, OPT_ACCEPT_VCONN}, {"no-resolv-conf", no_argument, 0, OPT_NO_RESOLV_CONF}, {"config", required_argument, 0, 'F'}, @@ -630,7 +177,6 @@ parse_options(int argc, char *argv[], struct settings *s) {"max-idle", required_argument, 0, OPT_MAX_IDLE}, {"max-backoff", required_argument, 0, OPT_MAX_BACKOFF}, {"listen", required_argument, 0, 'l'}, - {"monitor", required_argument, 0, 'm'}, {"rate-limit", optional_argument, 0, OPT_RATE_LIMIT}, {"burst-limit", required_argument, 0, OPT_BURST_LIMIT}, {"stp", no_argument, 0, OPT_STP}, @@ -652,12 +198,14 @@ parse_options(int argc, char *argv[], struct settings *s) {0, 0, 0, 0}, }; char *short_options = long_options_to_short_options(long_options); - char *accept_re = NULL; - int retval; /* Set defaults that we can figure out before parsing options. */ + s->datapath_id = 0; + s->mfr_desc = "Nicira Networks, Inc."; + s->hw_desc = "Reference Implementation"; + s->sw_desc = VERSION BUILDNR; + s->serial_desc = "None"; s->n_listeners = 0; - s->monitor_name = NULL; s->fail_mode = FAIL_OPEN; s->max_idle = 15; s->probe_interval = 15; @@ -665,6 +213,7 @@ parse_options(int argc, char *argv[], struct settings *s) s->update_resolv_conf = true; s->rate_limit = 0; s->burst_limit = 0; + s->accept_controller_re = NULL; s->enable_stp = false; s->in_band = true; s->command_acl = ""; @@ -679,8 +228,37 @@ parse_options(int argc, char *argv[], struct settings *s) } switch (c) { + case OPT_DATAPATH_ID: + if (strlen(optarg) != 12 + || strspn(optarg, "0123456789abcdefABCDEF") != 12) { + ofp_fatal(0, "argument to --datapath-id must be " + "exactly 12 hex digits"); + } + s->datapath_id = strtoll(optarg, NULL, 16); + if (!s->datapath_id) { + ofp_fatal(0, "argument to --datapath-id must be nonzero"); + } + break; + + case OPT_MANUFACTURER: + s->mfr_desc = optarg; + break; + + case OPT_HARDWARE: + s->hw_desc = optarg; + break; + + case OPT_SOFTWARE: + s->sw_desc = optarg; + break; + + case OPT_SERIAL: + s->serial_desc = optarg; + break; + case OPT_ACCEPT_VCONN: - accept_re = optarg[0] == '^' ? optarg : xasprintf("^%s", optarg); + s->accept_controller_re = (optarg[0] == '^' ? optarg + : xasprintf("^%s", optarg)); break; case OPT_BR_NAME: @@ -791,13 +369,6 @@ parse_options(int argc, char *argv[], struct settings *s) s->listener_names[s->n_listeners++] = optarg; break; - case 'm': - if (s->monitor_name) { - ofp_fatal(0, "-m or --monitor may only be specified once"); - } - s->monitor_name = optarg; - break; - case 'h': usage(); @@ -841,18 +412,9 @@ parse_options(int argc, char *argv[], struct settings *s) s->controller_name = argc > 1 ? xstrdup(argv[1]) : NULL; /* Set accept_controller_regex. */ - if (!accept_re) { - accept_re = vconn_ssl_is_configured() ? "^ssl:.*" : ".*"; + if (!s->accept_controller_re) { + s->accept_controller_re = vconn_ssl_is_configured() ? "^ssl:.*" : ".*"; } - retval = regcomp(&s->accept_controller_regex, accept_re, - REG_NOSUB | REG_EXTENDED); - if (retval) { - size_t length = regerror(retval, &s->accept_controller_regex, NULL, 0); - char *buffer = xmalloc(length); - regerror(retval, &s->accept_controller_regex, buffer, length); - ofp_fatal(0, "%s: %s", accept_re, buffer); - } - s->accept_controller_re = accept_re; /* Mode of operation. */ s->discovery = s->controller_name == NULL; @@ -879,7 +441,7 @@ usage(void) { printf("%s: secure channel, a relay for OpenFlow messages.\n" "usage: %s [OPTIONS] DATAPATH [CONTROLLER]\n" - "DATAPATH is an active connection method to a local datapath.\n" + "DATAPATH is a local datapath (e.g. \"dp0\").\n" "CONTROLLER is an active OpenFlow connection method; if it is\n" "omitted, then secchan performs controller discovery.\n", program_name, program_name); @@ -887,6 +449,13 @@ usage(void) printf("\nConfiguration options:\n" " -F, --config=FILE|DIR reads configuration from FILE or DIR\n" " --br-name=NAME bridge name to use for configuration\n" + "\nOpenFlow options:\n" + " -d, --datapath-id=ID Use ID as the OpenFlow switch ID\n" + " (ID must consist of 12 hex digits)\n" + " --manufacturer=MFR Identify manufacturer as MFR\n" + " --hardware=HW Identify hardware as HW\n" + " --software=SW Identify software as SW\n" + " --serial=SERIAL Identify serial number as SERIAL\n" "\nController discovery options:\n" " --accept-vconn=REGEX accept matching discovered controllers\n" " --no-resolv-conf do not update /etc/resolv.conf\n" @@ -900,8 +469,6 @@ usage(void) " attempts (default: 15 seconds)\n" " -l, --listen=METHOD allow management connections on METHOD\n" " (a passive OpenFlow connection method)\n" - " -m, --monitor=METHOD copy traffic to/from kernel to METHOD\n" - " (a passive OpenFlow connection method)\n" " --out-of-band controller connection is out-of-band\n" " --stp enable 802.1D Spanning Tree Protocol\n" " --no-stp disable 802.1D Spanning Tree Protocol\n" diff --git a/secchan/secchan.h b/secchan/secchan.h index 09439a0c..a8564b86 100644 --- a/secchan/secchan.h +++ b/secchan/secchan.h @@ -34,13 +34,9 @@ #ifndef SECCHAN_H #define SECCHAN_H 1 -#include #include #include #include "list.h" -#include "packets.h" - -struct secchan; /* Behavior when the connection to the controller fails. */ enum fail_mode { @@ -60,12 +56,20 @@ struct settings { bool discovery; /* Discover the controller automatically? */ bool in_band; /* Connect to controller in-band? */ + /* Datapath. */ + uint64_t datapath_id; /* Datapath ID. */ + const char *dp_name; /* Name of local datapath. */ + + /* Description strings. */ + const char *mfr_desc; /* Manufacturer. */ + const char *hw_desc; /* Hardware. */ + const char *sw_desc; /* Software version. */ + const char *serial_desc; /* Serial number. */ + /* Related vconns and network devices. */ - const char *dp_name; /* Local datapath. */ const char *controller_name; /* Controller (if not discovery mode). */ const char *listener_names[MAX_MGMT]; /* Listen for mgmt connections. */ size_t n_listeners; /* Number of mgmt connection listeners. */ - const char *monitor_name; /* Listen for traffic monitor connections. */ /* Failure behavior. */ enum fail_mode fail_mode; /* Act as learning switch if no controller? */ @@ -78,8 +82,7 @@ struct settings { int burst_limit; /* Maximum number token bucket size. */ /* Discovery behavior. */ - regex_t accept_controller_regex; /* Controller vconns to accept. */ - const char *accept_controller_re; /* String version of regex. */ + const char *accept_controller_re; /* Controller vconns to accept. */ bool update_resolv_conf; /* Update /etc/resolv.conf? */ /* Spanning tree protocol. */ @@ -90,48 +93,4 @@ struct settings { char *command_dir; /* Directory that contains commands. */ }; -struct half { - struct rconn *rconn; - struct ofpbuf *rxbuf; - int n_txq; /* No. of packets queued for tx on 'rconn'. */ -}; - -struct relay { - struct list node; - -#define HALF_LOCAL 0 -#define HALF_REMOTE 1 - struct half halves[2]; - - /* The secchan has a primary connection (relay) to an OpenFlow controller. - * This primary connection actually makes two connections to the datapath: - * one for OpenFlow requests and responses, and one that is only used for - * receiving asynchronous events such as 'ofp_packet_in' events. This - * design keeps replies to OpenFlow requests from being dropped by the - * kernel due to a flooded network device. - * - * The secchan may also have any number of secondary "management" - * connections (relays). These connections do not receive asychronous - * events and thus have a null 'async_rconn'. */ - bool is_mgmt_conn; /* Is this a management connection? */ - struct rconn *async_rconn; /* For receiving asynchronous events. */ -}; - -struct hook_class { - bool (*local_packet_cb)(struct relay *, void *aux); - bool (*remote_packet_cb)(struct relay *, void *aux); - void (*periodic_cb)(void *aux); - void (*wait_cb)(void *aux); - void (*closing_cb)(struct relay *, void *aux); - void (*reconfigure_cb)(void *aux); -}; - -void add_hook(struct secchan *, const struct hook_class *, void *); - -struct ofp_packet_in *get_ofp_packet_in(struct relay *); -bool get_ofp_packet_eth_header(struct relay *, struct ofp_packet_in **, - struct eth_header **); -void get_ofp_packet_payload(struct ofp_packet_in *, struct ofpbuf *); - - #endif /* secchan.h */ diff --git a/secchan/snat.c b/secchan/snat.c deleted file mode 100644 index 31fbe26e..00000000 --- a/secchan/snat.c +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "snat.h" -#include -#include -#include -#include "openflow/nicira-ext.h" -#include "ofpbuf.h" -#include "openflow/openflow.h" -#include "port-watcher.h" - -#define THIS_MODULE VLM_snat -#include "vlog.h" - -struct snat_port_conf { - struct list node; - struct nx_snat_config config; -}; - -struct snat_data { - struct port_watcher *pw; - struct list port_list; -}; - - -/* Source-NAT configuration monitor. */ -#define SNAT_CMD_LEN 1024 - -/* Commands to configure iptables. There is no programmatic interface - * to iptables from the kernel, so we're stuck making command-line calls - * in user-space. */ -#define SNAT_FLUSH_ALL_CMD "/sbin/iptables -t nat -F" -#define SNAT_FLUSH_CHAIN_CMD "/sbin/iptables -t nat -F of-snat-%s" - -#define SNAT_ADD_CHAIN_CMD "/sbin/iptables -t nat -N of-snat-%s" -#define SNAT_CONF_CHAIN_CMD "/sbin/iptables -t nat -A POSTROUTING -o %s -j of-snat-%s" - -#define SNAT_ADD_IP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT --to %s-%s" -#define SNAT_ADD_TCP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p TCP --to %s-%s:%d-%d" -#define SNAT_ADD_UDP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p UDP --to %s-%s:%d-%d" - -#define SNAT_UNSET_CHAIN_CMD "/sbin/iptables -t nat -D POSTROUTING -o %s -j of-snat-%s" -#define SNAT_DEL_CHAIN_CMD "/sbin/iptables -t nat -X of-snat-%s" - -static void -snat_add_rules(const struct nx_snat_config *sc, const uint8_t *dev_name) -{ - char command[SNAT_CMD_LEN]; - char ip_str_start[16]; - char ip_str_end[16]; - - - snprintf(ip_str_start, sizeof ip_str_start, IP_FMT, - IP_ARGS(&sc->ip_addr_start)); - snprintf(ip_str_end, sizeof ip_str_end, IP_FMT, - IP_ARGS(&sc->ip_addr_end)); - - /* We always attempt to remove existing entries, so that we know - * there's a pristine state for SNAT on the interface. We just ignore - * the results of these calls, since iptables will complain about - * any non-existent entries. */ - - /* Flush the chain that does the SNAT. */ - snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name); - system(command); - - /* We always try to create the a new chain. */ - snprintf(command, sizeof(command), SNAT_ADD_CHAIN_CMD, dev_name); - system(command); - - /* Disassociate any old SNAT chain from the POSTROUTING chain. */ - snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name, - dev_name); - system(command); - - /* Associate the new chain with the POSTROUTING hook. */ - snprintf(command, sizeof(command), SNAT_CONF_CHAIN_CMD, dev_name, - dev_name); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem flushing chain for add"); - return; - } - - /* If configured, restrict TCP source port ranges. */ - if ((sc->tcp_start != 0) && (sc->tcp_end != 0)) { - snprintf(command, sizeof(command), SNAT_ADD_TCP_CMD, - dev_name, ip_str_start, ip_str_end, - ntohs(sc->tcp_start), ntohs(sc->tcp_end)); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem adding TCP rule"); - return; - } - } - - /* If configured, restrict UDP source port ranges. */ - if ((sc->udp_start != 0) && (sc->udp_end != 0)) { - snprintf(command, sizeof(command), SNAT_ADD_UDP_CMD, - dev_name, ip_str_start, ip_str_end, - ntohs(sc->udp_start), ntohs(sc->udp_end)); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem adding UDP rule"); - return; - } - } - - /* Add a rule that covers all IP traffic that would not be covered - * by the prior TCP or UDP ranges. */ - snprintf(command, sizeof(command), SNAT_ADD_IP_CMD, - dev_name, ip_str_start, ip_str_end); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem adding base rule"); - return; - } -} - -static void -snat_del_rules(const uint8_t *dev_name) -{ - char command[SNAT_CMD_LEN]; - - /* Flush the chain that does the SNAT. */ - snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem flushing chain for deletion"); - return; - } - - /* Disassociate the SNAT chain from the POSTROUTING chain. */ - snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name, - dev_name); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem unsetting chain"); - return; - } - - /* Now we can finally delete our SNAT chain. */ - snprintf(command, sizeof(command), SNAT_DEL_CHAIN_CMD, dev_name); - if (system(command) != 0) { - VLOG_ERR("SNAT: problem deleting chain"); - return; - } -} - -static void -snat_config(const struct nx_snat_config *sc, struct snat_data *snat) -{ - struct snat_port_conf *c, *spc=NULL; - const uint8_t *netdev_name; - - netdev_name = (const uint8_t *) port_watcher_get_name(snat->pw, - ntohs(sc->port)); - if (!netdev_name) { - return; - } - - LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) { - if (c->config.port == sc->port) { - spc = c; - break; - } - } - - if (sc->command == NXSC_ADD) { - if (!spc) { - spc = xmalloc(sizeof(*c)); - if (!spc) { - VLOG_ERR("SNAT: no memory for new entry"); - return; - } - list_push_back(&snat->port_list, &spc->node); - } - memcpy(&spc->config, sc, sizeof(spc->config)); - snat_add_rules(sc, netdev_name); - } else if (spc) { - snat_del_rules(netdev_name); - list_remove(&spc->node); - } -} - -static bool -snat_remote_packet_cb(struct relay *r, void *snat_) -{ - struct snat_data *snat = snat_; - struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; - struct nicira_header *request = msg->data; - struct nx_act_config *nac = msg->data; - int n_configs, i; - - - if (msg->size < sizeof(struct nx_act_config)) { - return false; - } - request = msg->data; - if (request->header.type != OFPT_VENDOR - || request->vendor != htonl(NX_VENDOR_ID) - || request->subtype != htonl(NXT_ACT_SET_CONFIG)) { - return false; - } - - /* We're only interested in attempts to configure SNAT */ - if (nac->type != htons(NXAST_SNAT)) { - return false; - } - - n_configs = (msg->size - sizeof *nac) / sizeof *nac->snat; - for (i=0; isnat[i], snat); - } - - return false; -} - -static void -snat_port_changed_cb(uint16_t port_no UNUSED, - const struct ofp_phy_port *old, - const struct ofp_phy_port *new, - void *snat_) -{ - struct snat_data *snat = snat_; - struct snat_port_conf *c; - - /* We're only interested in ports that went away */ - if (old && !new) { - return; - } - - LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) { - if (c->config.port == old->port_no) { - snat_del_rules(old->name); - list_remove(&c->node); - return; - } - } -} - -static const struct hook_class snat_hook_class = { - NULL, /* local_packet_cb */ - snat_remote_packet_cb, /* remote_packet_cb */ - NULL, /* periodic_cb */ - NULL, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -snat_start(struct secchan *secchan, struct port_watcher *pw) -{ - int ret; - struct snat_data *snat; - - ret = system(SNAT_FLUSH_ALL_CMD); - if (ret != 0) { - VLOG_ERR("SNAT: problem flushing tables"); - } - - snat = xcalloc(1, sizeof *snat); - snat->pw = pw; - list_init(&snat->port_list); - - port_watcher_register_callback(pw, snat_port_changed_cb, snat); - add_hook(secchan, &snat_hook_class, snat); -} diff --git a/secchan/snat.h b/secchan/snat.h deleted file mode 100644 index 8f71b151..00000000 --- a/secchan/snat.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef SNAT_H -#define SNAT_H 1 - -#include "secchan.h" - -struct port_watcher; -struct secchan; - -void snat_start(struct secchan *, struct port_watcher *); - -#endif /* snat.h */ diff --git a/secchan/status.c b/secchan/status.c index ff0c6f4f..fd57e514 100644 --- a/secchan/status.c +++ b/secchan/status.c @@ -38,10 +38,10 @@ #include #include #include "dynamic-string.h" -#include "openflow/nicira-ext.h" #include "ofpbuf.h" -#include "openflow/openflow.h" +#include "openflow/nicira-ext.h" #include "rconn.h" +#include "secchan.h" #include "timeval.h" #include "vconn.h" @@ -55,7 +55,6 @@ struct switch_status_category { }; struct switch_status { - const struct settings *s; time_t booted; struct switch_status_category *categories; size_t n_categories, allocated_categories; @@ -67,31 +66,18 @@ struct status_reply { struct ds output; }; -static bool -switch_status_remote_packet_cb(struct relay *r, void *ss_) +int +switch_status_handle_request(struct switch_status *ss, struct rconn *rconn, + struct nicira_header *request) { - struct switch_status *ss = ss_; - struct rconn *rc = r->halves[HALF_REMOTE].rconn; - struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; struct switch_status_category *c; - struct nicira_header *request; struct nicira_header *reply; struct status_reply sr; struct ofpbuf *b; int retval; - if (msg->size < sizeof(struct nicira_header)) { - return false; - } - request = msg->data; - if (request->header.type != OFPT_VENDOR - || request->vendor != htonl(NX_VENDOR_ID) - || request->subtype != htonl(NXT_STATUS_REQUEST)) { - return false; - } - sr.request.string = (void *) (request + 1); - sr.request.length = msg->size - sizeof *request; + sr.request.length = ntohs(request->header.length) - sizeof *request; ds_init(&sr.output); for (c = ss->categories; c < &ss->categories[ss->n_categories]; c++) { if (!memcmp(c->name, sr.request.string, @@ -105,12 +91,12 @@ switch_status_remote_packet_cb(struct relay *r, void *ss_) reply->vendor = htonl(NX_VENDOR_ID); reply->subtype = htonl(NXT_STATUS_REPLY); memcpy(reply + 1, sr.output.string, sr.output.length); - retval = rconn_send(rc, b, NULL); + retval = rconn_send(rconn, b, NULL); if (retval && retval != EAGAIN) { VLOG_WARN("send failed (%s)", strerror(retval)); } ds_destroy(&sr.output); - return true; + return 0; } void @@ -165,27 +151,15 @@ switch_status_cb(struct status_reply *sr, void *ss_) status_reply_put(sr, "pid=%ld", (long int) getpid()); } -static const struct hook_class switch_status_hook_class = { - NULL, /* local_packet_cb */ - switch_status_remote_packet_cb, /* remote_packet_cb */ - NULL, /* periodic_cb */ - NULL, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -switch_status_start(struct secchan *secchan, const struct settings *s, - struct switch_status **ssp) +struct switch_status * +switch_status_create(const struct settings *settings) { struct switch_status *ss = xcalloc(1, sizeof *ss); - ss->s = s; ss->booted = time_now(); switch_status_register_category(ss, "config", - config_status_cb, (void *) s); + config_status_cb, (void *) settings); switch_status_register_category(ss, "switch", switch_status_cb, ss); - *ssp = ss; - add_hook(secchan, &switch_status_hook_class, ss); + return ss; } void diff --git a/secchan/status.h b/secchan/status.h index 68793eff..dbb0300a 100644 --- a/secchan/status.h +++ b/secchan/status.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -34,14 +34,19 @@ #ifndef STATUS_H #define STATUS_H 1 -#include "secchan.h" +#include "compiler.h" +struct nicira_header; +struct rconn; struct secchan; +struct settings; struct status_reply; -struct switch_status; -void switch_status_start(struct secchan *, const struct settings *, - struct switch_status **); +struct switch_status *switch_status_create(const struct settings *); + +int switch_status_handle_request(struct switch_status *, struct rconn *, + struct nicira_header *); + void switch_status_register_category(struct switch_status *, const char *category, void (*cb)(struct status_reply *, diff --git a/secchan/stp-secchan.c b/secchan/stp-secchan.c deleted file mode 100644 index c8e4ca71..00000000 --- a/secchan/stp-secchan.c +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "stp-secchan.h" -#include -#include -#include "flow.h" -#include "secchan.h" -#include "ofpbuf.h" -#include "openflow/openflow.h" -#include "poll-loop.h" -#include "port-watcher.h" -#include "rconn.h" -#include "stp.h" -#include "timeval.h" -#include "vconn.h" - -#define THIS_MODULE VLM_stp_secchan -#include "vlog.h" - -struct stp_data { - struct stp *stp; - struct port_watcher *pw; - struct rconn *local_rconn; - struct rconn *remote_rconn; - long long int last_tick; - int n_txq; -}; - -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); - -static bool -stp_local_packet_cb(struct relay *r, void *stp_) -{ - struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf; - struct ofp_header *oh; - struct stp_data *stp = stp_; - struct ofp_packet_in *opi; - struct eth_header *eth; - struct llc_header *llc; - struct ofpbuf payload; - uint16_t port_no; - struct flow flow; - - oh = msg->data; - if (oh->type == OFPT_FEATURES_REPLY - && msg->size >= offsetof(struct ofp_switch_features, ports)) { - struct ofp_switch_features *osf = msg->data; - osf->capabilities |= htonl(OFPC_STP); - return false; - } - - if (!get_ofp_packet_eth_header(r, &opi, ð) - || !eth_addr_equals(eth->eth_dst, stp_eth_addr)) { - return false; - } - - port_no = ntohs(opi->in_port); - if (port_no >= STP_MAX_PORTS) { - /* STP only supports 255 ports. */ - return false; - } - if (port_watcher_get_config(stp->pw, port_no) & OFPPC_NO_STP) { - /* We're not doing STP on this port. */ - return false; - } - - if (opi->reason == OFPR_ACTION) { - /* The controller set up a flow for this, so we won't intercept it. */ - return false; - } - - get_ofp_packet_payload(opi, &payload); - flow_extract(&payload, port_no, &flow); - if (flow.dl_type != htons(OFP_DL_TYPE_NOT_ETH_TYPE)) { - VLOG_DBG("non-LLC frame received on STP multicast address"); - return false; - } - llc = ofpbuf_at_assert(&payload, sizeof *eth, sizeof *llc); - if (llc->llc_dsap != STP_LLC_DSAP) { - VLOG_DBG("bad DSAP 0x%02"PRIx8" received on STP multicast address", - llc->llc_dsap); - return false; - } - - /* Trim off padding on payload. */ - if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) { - payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN; - } - if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) { - struct stp_port *p = stp_get_port(stp->stp, port_no); - stp_received_bpdu(p, payload.data, payload.size); - } - - return true; -} - -static void -stp_periodic_cb(void *stp_) -{ - struct stp_data *stp = stp_; - long long int now = time_msec(); - long long int elapsed = now - stp->last_tick; - struct stp_port *p; - - if (!port_watcher_is_ready(stp->pw)) { - /* Can't start STP until we know port flags, because port flags can - * disable STP. */ - return; - } - if (elapsed <= 0) { - return; - } - - stp_tick(stp->stp, MIN(INT_MAX, elapsed)); - stp->last_tick = now; - - while (stp_get_changed_port(stp->stp, &p)) { - int port_no = stp_port_no(p); - enum stp_state s_state = stp_port_get_state(p); - - if (s_state != STP_DISABLED) { - VLOG_INFO("STP: Port %d entered %s state", - port_no, stp_state_name(s_state)); - } - if (!(port_watcher_get_config(stp->pw, port_no) & OFPPC_NO_STP)) { - uint32_t p_config = 0; - uint32_t p_state; - switch (s_state) { - case STP_LISTENING: - p_state = OFPPS_STP_LISTEN; - break; - case STP_LEARNING: - p_state = OFPPS_STP_LEARN; - break; - case STP_DISABLED: - case STP_FORWARDING: - p_state = OFPPS_STP_FORWARD; - break; - case STP_BLOCKING: - p_state = OFPPS_STP_BLOCK; - break; - default: - VLOG_DBG_RL(&rl, "STP: Port %d has bad state %x", - port_no, s_state); - p_state = OFPPS_STP_FORWARD; - break; - } - if (!stp_forward_in_state(s_state)) { - p_config = OFPPC_NO_FLOOD; - } - port_watcher_set_flags(stp->pw, port_no, - p_config, OFPPC_NO_FLOOD, - p_state, OFPPS_STP_MASK); - } else { - /* We don't own those flags. */ - } - } -} - -static void -stp_wait_cb(void *stp_ UNUSED) -{ - poll_timer_wait(1000); -} - -static void -send_bpdu(struct ofpbuf *pkt, int port_no, void *stp_) -{ - struct stp_data *stp = stp_; - const uint8_t *port_mac = port_watcher_get_hwaddr(stp->pw, port_no); - if (port_mac) { - struct eth_header *eth = pkt->l2; - struct ofpbuf *opo; - - memcpy(eth->eth_src, port_mac, ETH_ADDR_LEN); - opo = make_unbuffered_packet_out(pkt, OFPP_NONE, port_no); - - rconn_send_with_limit(stp->local_rconn, opo, &stp->n_txq, OFPP_MAX); - } else { - VLOG_WARN_RL(&rl, "cannot send BPDU on missing port %d", port_no); - } - ofpbuf_delete(pkt); -} - -static bool -stp_is_port_supported(uint16_t port_no) -{ - return port_no < STP_MAX_PORTS; -} - -static void -stp_port_changed_cb(uint16_t port_no, - const struct ofp_phy_port *old UNUSED, - const struct ofp_phy_port *new, - void *stp_) -{ - struct stp_data *stp = stp_; - struct stp_port *p; - - if (!stp_is_port_supported(port_no)) { - return; - } - - p = stp_get_port(stp->stp, port_no); - if (!new - || new->config & htonl(OFPPC_NO_STP | OFPPC_PORT_DOWN) - || new->state & htonl(OFPPS_LINK_DOWN)) { - stp_port_disable(p); - } else { - int speed = 0; - stp_port_enable(p); - if (new->curr & (OFPPF_10MB_HD | OFPPF_10MB_FD)) { - speed = 10; - } else if (new->curr & (OFPPF_100MB_HD | OFPPF_100MB_FD)) { - speed = 100; - } else if (new->curr & (OFPPF_1GB_HD | OFPPF_1GB_FD)) { - speed = 1000; - } else if (new->curr & OFPPF_10GB_FD) { - speed = 10000; - } - stp_port_set_speed(p, speed); - } -} - -static void -stp_local_port_changed_cb(const struct ofp_phy_port *port, void *stp_) -{ - struct stp_data *stp = stp_; - if (port) { - stp_set_bridge_id(stp->stp, eth_addr_to_uint64(port->hw_addr)); - } -} - -static const struct hook_class stp_hook_class = { - stp_local_packet_cb, /* local_packet_cb */ - NULL, /* remote_packet_cb */ - stp_periodic_cb, /* periodic_cb */ - stp_wait_cb, /* wait_cb */ - NULL, /* closing_cb */ - NULL, /* reconfigure_cb */ -}; - -void -stp_start(struct secchan *secchan, struct port_watcher *pw, - struct rconn *local, struct rconn *remote) -{ - uint8_t dpid[ETH_ADDR_LEN]; - struct stp_data *stp; - - stp = xcalloc(1, sizeof *stp); - eth_addr_random(dpid); - stp->stp = stp_create("stp", eth_addr_to_uint64(dpid), send_bpdu, stp); - stp->pw = pw; - stp->local_rconn = local; - stp->remote_rconn = remote; - stp->last_tick = time_msec(); - - port_watcher_register_callback(pw, stp_port_changed_cb, stp); - port_watcher_register_local_port_callback(pw, stp_local_port_changed_cb, - stp); - add_hook(secchan, &stp_hook_class, stp); -} diff --git a/tests/test-classifier.c b/tests/test-classifier.c index dc857446..53bf0002 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -157,7 +157,7 @@ read_uint32(const void *p) } static bool -match(const struct cls_rule *wild, const struct flow *fixed) +match(const struct cls_rule *wild, const flow_t *fixed) { int f_idx; @@ -189,7 +189,7 @@ match(const struct cls_rule *wild, const struct flow *fixed) } static struct cls_rule * -tcls_lookup(const struct tcls *cls, const struct flow *flow) +tcls_lookup(const struct tcls *cls, const flow_t *flow) { size_t i; @@ -302,7 +302,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) for (i = 0; i < N_FLOW_VALUES; i++) { struct cls_rule *cr0, *cr1; - struct flow flow; + flow_t flow; unsigned int x; x = i; @@ -390,7 +390,7 @@ make_rule(int wc_fields, int priority, int value_pat) const struct cls_field *f; struct test_rule *rule; uint32_t wildcards; - struct flow flow; + flow_t flow; wildcards = 0; memset(&flow, 0, sizeof flow); diff --git a/tests/test-flows.c b/tests/test-flows.c index 2bea3b33..74016c85 100644 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@ -40,7 +40,7 @@ main(int argc UNUSED, char *argv[]) while (fread(&expected_match, sizeof expected_match, 1, flows)) { struct ofpbuf *packet; struct ofp_match extracted_match; - struct flow flow; + flow_t flow; n++; diff --git a/udatapath/.gitignore b/udatapath/.gitignore deleted file mode 100644 index 5ce50666..00000000 --- a/udatapath/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/Makefile -/Makefile.in -/udatapath -/udatapath.8 diff --git a/udatapath/automake.mk b/udatapath/automake.mk deleted file mode 100644 index 207fa5fd..00000000 --- a/udatapath/automake.mk +++ /dev/null @@ -1,27 +0,0 @@ -bin_PROGRAMS += udatapath/udatapath -man_MANS += udatapath/udatapath.8 - -udatapath_udatapath_SOURCES = \ - udatapath/chain.c \ - udatapath/chain.h \ - udatapath/crc32.c \ - udatapath/crc32.h \ - udatapath/datapath.c \ - udatapath/datapath.h \ - udatapath/dp_act.c \ - udatapath/dp_act.h \ - udatapath/nx_act.c \ - udatapath/nx_act.h \ - udatapath/nx_msg.c \ - udatapath/nx_msg.h \ - udatapath/udatapath.c \ - udatapath/switch-flow.c \ - udatapath/switch-flow.h \ - udatapath/table.h \ - udatapath/table-hash.c \ - udatapath/table-linear.c - -udatapath_udatapath_LDADD = lib/libopenflow.a $(SSL_LIBS) $(FAULT_LIBS) - -EXTRA_DIST += udatapath/udatapath.8.in -DISTCLEANFILES += udatapath/udatapath.8 diff --git a/udatapath/chain.c b/udatapath/chain.c deleted file mode 100644 index 2ca289da..00000000 --- a/udatapath/chain.c +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "chain.h" -#include -#include -#include -#include "switch-flow.h" -#include "table.h" - -#define THIS_MODULE VLM_chain -#include "vlog.h" - -/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or - * negative error. If 'table' is null it is assumed that table creation failed - * due to out-of-memory. */ -static int add_table(struct sw_chain *chain, struct sw_table *table) -{ - if (table == NULL) - return -ENOMEM; - if (chain->n_tables >= CHAIN_MAX_TABLES) { - VLOG_ERR("too many tables in chain\n"); - table->destroy(table); - return -ENOBUFS; - } - chain->tables[chain->n_tables++] = table; - return 0; -} - -/* Creates and returns a new chain. Returns NULL if the chain cannot be - * created. */ -struct sw_chain *chain_create(struct datapath *dp) -{ - struct sw_chain *chain = calloc(1, sizeof *chain); - if (chain == NULL) - return NULL; - - chain->dp = dp; - if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, - 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) - || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) { - chain_destroy(chain); - return NULL; - } - - return chain; -} - -/* Searches 'chain' for a flow matching 'key', which must not have any wildcard - * fields. Returns the flow if successful, otherwise a null pointer. */ -struct sw_flow * -chain_lookup(struct sw_chain *chain, const struct sw_flow_key *key) -{ - int i; - - assert(!key->wildcards); - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - struct sw_flow *flow = t->lookup(t, key); - t->n_lookup++; - if (flow) { - t->n_matched++; - return flow; - } - } - return NULL; -} - -/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if - * successful or a negative error. - * - * If successful, 'flow' becomes owned by the chain, otherwise it is retained - * by the caller. */ -int -chain_insert(struct sw_chain *chain, struct sw_flow *flow) -{ - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - if (t->insert(t, flow)) - return 0; - } - - return -ENOBUFS; -} - -/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards - * and priority must match. Returns the number of flows that were modified. - * - * Expensive in the general case as currently implemented, since it requires - * iterating through the entire contents of each table for keys that contain - * wildcards. Relatively cheap for fully specified keys. */ -int -chain_modify(struct sw_chain *chain, const struct sw_flow_key *key, - uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - int count = 0; - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - count += t->modify(t, key, priority, strict, actions, actions_len); - } - - return count; -} - -/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port' - * is not OFPP_NONE, then matching entries must have that port as an - * argument for an output action. If 'strict" is set, then wildcards and - * priority must match. Returns the number of flows that were deleted. - * - * Expensive in the general case as currently implemented, since it requires - * iterating through the entire contents of each table for keys that contain - * wildcards. Relatively cheap for fully specified keys. */ -int -chain_delete(struct sw_chain *chain, const struct sw_flow_key *key, - uint16_t out_port, uint16_t priority, int strict) -{ - int count = 0; - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - count += t->delete(chain->dp, t, key, out_port, priority, strict); - } - - return count; - -} - -/* Deletes timed-out flow entries from all the tables in 'chain' and appends - * the deleted flows to 'deleted'. - * - * Expensive as currently implemented, since it iterates through the entire - * contents of each table. */ -void -chain_timeout(struct sw_chain *chain, struct list *deleted) -{ - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - t->timeout(t, deleted); - } -} - -/* Destroys 'chain', which must not have any users. */ -void -chain_destroy(struct sw_chain *chain) -{ - int i; - - for (i = 0; i < chain->n_tables; i++) { - struct sw_table *t = chain->tables[i]; - t->destroy(t); - } - free(chain); -} diff --git a/udatapath/chain.h b/udatapath/chain.h deleted file mode 100644 index 639ae95b..00000000 --- a/udatapath/chain.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef CHAIN_H -#define CHAIN_H 1 - -#include -#include - -struct sw_flow; -struct sw_flow_key; -struct ofp_action_header; -struct list; -struct datapath; - -#define TABLE_LINEAR_MAX_FLOWS 100 -#define TABLE_HASH_MAX_FLOWS 65536 -#define TABLE_MAC_MAX_FLOWS 1024 -#define TABLE_MAC_NUM_BUCKETS 1024 - -/* Set of tables chained together in sequence from cheap to expensive. */ -#define CHAIN_MAX_TABLES 4 -struct sw_chain { - int n_tables; - struct sw_table *tables[CHAIN_MAX_TABLES]; - - struct datapath *dp; -}; - -struct sw_chain *chain_create(struct datapath *); -struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *); -int chain_insert(struct sw_chain *, struct sw_flow *); -int chain_modify(struct sw_chain *, const struct sw_flow_key *, - uint16_t, int, const struct ofp_action_header *, size_t); -int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t, - uint16_t, int); -void chain_timeout(struct sw_chain *, struct list *deleted); -void chain_destroy(struct sw_chain *); - -#endif /* chain.h */ diff --git a/udatapath/crc32.c b/udatapath/crc32.c deleted file mode 100644 index f6c2c0b3..00000000 --- a/udatapath/crc32.c +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "crc32.h" - -void -crc32_init(struct crc32 *crc, unsigned int polynomial) -{ - int i; - - for (i = 0; i < CRC32_TABLE_SIZE; ++i) { - unsigned int reg = i << 24; - int j; - for (j = 0; j < CRC32_TABLE_BITS; j++) { - int topBit = (reg & 0x80000000) != 0; - reg <<= 1; - if (topBit) - reg ^= polynomial; - } - crc->table[i] = reg; - } -} - -unsigned int -crc32_calculate(const struct crc32 *crc, const void *data_, size_t n_bytes) -{ - const uint8_t *data = data_; - unsigned int result = 0; - size_t i; - - for (i = 0; i < n_bytes; i++) { - unsigned int top = result >> 24; - top ^= data[i]; - result = (result << 8) ^ crc->table[top]; - } - return result; -} diff --git a/udatapath/crc32.h b/udatapath/crc32.h deleted file mode 100644 index 355aefdf..00000000 --- a/udatapath/crc32.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef CRC32_H -#define CRC32_H 1 - -#include -#include - -#define CRC32_TABLE_BITS 8 -#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS) - -struct crc32 { - unsigned int table[CRC32_TABLE_SIZE]; -}; - -void crc32_init(struct crc32 *, unsigned int polynomial); -unsigned int crc32_calculate(const struct crc32 *, const void *, size_t); - -#endif /* crc32.h */ diff --git a/udatapath/datapath.c b/udatapath/datapath.c deleted file mode 100644 index e5e11d54..00000000 --- a/udatapath/datapath.c +++ /dev/null @@ -1,1668 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include "datapath.h" -#include -#include -#include -#include -#include -#include -#include "chain.h" -#include "csum.h" -#include "flow.h" -#include "netdev.h" -#include "ofpbuf.h" -#include "openflow/openflow.h" -#include "openflow/nicira-ext.h" -#include "packets.h" -#include "poll-loop.h" -#include "rconn.h" -#include "stp.h" -#include "switch-flow.h" -#include "table.h" -#include "vconn.h" -#include "xtoxll.h" -#include "nx_msg.h" -#include "dp_act.h" - -#define THIS_MODULE VLM_datapath -#include "vlog.h" - -extern char mfr_desc; -extern char hw_desc; -extern char sw_desc; -extern char serial_num; - -/* Capabilities supported by this implementation. */ -#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \ - | OFPC_TABLE_STATS \ - | OFPC_PORT_STATS \ - | OFPC_MULTI_PHY_TX ) - -/* Actions supported by this implementation. */ -#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ - | (1 << OFPAT_SET_VLAN_VID) \ - | (1 << OFPAT_SET_VLAN_PCP) \ - | (1 << OFPAT_STRIP_VLAN) \ - | (1 << OFPAT_SET_DL_SRC) \ - | (1 << OFPAT_SET_DL_DST) \ - | (1 << OFPAT_SET_NW_SRC) \ - | (1 << OFPAT_SET_NW_DST) \ - | (1 << OFPAT_SET_TP_SRC) \ - | (1 << OFPAT_SET_TP_DST) ) - -/* The origin of a received OpenFlow message, to enable sending a reply. */ -struct sender { - struct remote *remote; /* The device that sent the message. */ - uint32_t xid; /* The OpenFlow transaction ID. */ -}; - -/* A connection to a secure channel. */ -struct remote { - struct list node; - struct rconn *rconn; -#define TXQ_LIMIT 128 /* Max number of packets to queue for tx. */ - int n_txq; /* Number of packets queued for tx on rconn. */ - - /* Support for reliable, multi-message replies to requests. - * - * If an incoming request needs to have a reliable reply that might - * require multiple messages, it can use remote_start_dump() to set up - * a callback that will be called as buffer space for replies. */ - int (*cb_dump)(struct datapath *, void *aux); - void (*cb_done)(void *aux); - void *cb_aux; -}; - -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); - -static struct remote *remote_create(struct datapath *, struct rconn *); -static void remote_run(struct datapath *, struct remote *); -static void remote_wait(struct remote *); -static void remote_destroy(struct remote *); - -static void update_port_flags(struct datapath *, const struct ofp_port_mod *); -static void send_port_status(struct sw_port *p, uint8_t status); - -/* Buffers are identified by a 31-bit opaque ID. We divide the ID - * into a buffer number (low bits) and a cookie (high bits). The buffer number - * is an index into an array of buffers. The cookie distinguishes between - * different packets that have occupied a single buffer. Thus, the more - * buffers we have, the lower-quality the cookie... */ -#define PKT_BUFFER_BITS 8 -#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) -#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) - -#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) - -int run_flow_through_tables(struct datapath *, struct ofpbuf *, - struct sw_port *); -void fwd_port_input(struct datapath *, struct ofpbuf *, struct sw_port *); -int fwd_control_input(struct datapath *, const struct sender *, - const void *, size_t); - -uint32_t save_buffer(struct ofpbuf *); -static struct ofpbuf *retrieve_buffer(uint32_t id); -static void discard_buffer(uint32_t id); - -static struct sw_port * -lookup_port(struct datapath *dp, uint16_t port_no) -{ - return (port_no < DP_MAX_PORTS ? &dp->ports[port_no] - : port_no == OFPP_LOCAL ? dp->local_port - : NULL); -} - -/* Generates and returns a random datapath id. */ -static uint64_t -gen_datapath_id(void) -{ - uint8_t ea[ETH_ADDR_LEN]; - eth_addr_random(ea); - ea[0] = 0x00; /* Set Nicira OUI. */ - ea[1] = 0x23; - ea[2] = 0x20; - return eth_addr_to_uint64(ea); -} - -int -dp_new(struct datapath **dp_, uint64_t dpid) -{ - struct datapath *dp; - - dp = calloc(1, sizeof *dp); - if (!dp) { - return ENOMEM; - } - - dp->last_timeout = time_now(); - list_init(&dp->remotes); - dp->listeners = NULL; - dp->n_listeners = 0; - dp->id = dpid <= UINT64_C(0xffffffffffff) ? dpid : gen_datapath_id(); - dp->chain = chain_create(dp); - if (!dp->chain) { - VLOG_ERR("could not create chain"); - free(dp); - return ENOMEM; - } - - list_init(&dp->port_list); - dp->flags = 0; - dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; - *dp_ = dp; - return 0; -} - -static int -new_port(struct datapath *dp, struct sw_port *port, uint16_t port_no, - const char *netdev_name, const uint8_t *new_mac) -{ - struct netdev *netdev; - struct in6_addr in6; - struct in_addr in4; - int error; - - error = netdev_open(netdev_name, NETDEV_ETH_TYPE_ANY, &netdev); - if (error) { - return error; - } - if (new_mac && !eth_addr_equals(netdev_get_etheraddr(netdev), new_mac)) { - /* Generally the device has to be down before we change its hardware - * address. Don't bother to check for an error because it's really - * the netdev_set_etheraddr() call below that we care about. */ - netdev_set_flags(netdev, 0, false); - error = netdev_set_etheraddr(netdev, new_mac); - if (error) { - VLOG_WARN("failed to change %s Ethernet address " - "to "ETH_ADDR_FMT": %s", - netdev_name, ETH_ADDR_ARGS(new_mac), strerror(error)); - } - } - error = netdev_set_flags(netdev, NETDEV_UP | NETDEV_PROMISC, false); - if (error) { - VLOG_ERR("failed to set promiscuous mode on %s device", netdev_name); - netdev_close(netdev); - return error; - } - if (netdev_get_in4(netdev, &in4)) { - VLOG_ERR("%s device has assigned IP address %s", - netdev_name, inet_ntoa(in4)); - } - if (netdev_get_in6(netdev, &in6)) { - char in6_name[INET6_ADDRSTRLEN + 1]; - inet_ntop(AF_INET6, &in6, in6_name, sizeof in6_name); - VLOG_ERR("%s device has assigned IPv6 address %s", - netdev_name, in6_name); - } - - memset(port, '\0', sizeof *port); - - port->dp = dp; - port->netdev = netdev; - port->port_no = port_no; - list_push_back(&dp->port_list, &port->node); - - /* Notify the ctlpath that this port has been added */ - send_port_status(port, OFPPR_ADD); - - return 0; -} - -int -dp_add_port(struct datapath *dp, const char *netdev) -{ - int port_no; - for (port_no = 0; port_no < DP_MAX_PORTS; port_no++) { - struct sw_port *port = &dp->ports[port_no]; - if (!port->netdev) { - return new_port(dp, port, port_no, netdev, NULL); - } - } - return EXFULL; -} - -int -dp_add_local_port(struct datapath *dp, const char *netdev) -{ - if (!dp->local_port) { - uint8_t ea[ETH_ADDR_LEN]; - struct sw_port *port; - int error; - - port = xcalloc(1, sizeof *port); - eth_addr_from_uint64(dp->id, ea); - error = new_port(dp, port, OFPP_LOCAL, netdev, ea); - if (!error) { - dp->local_port = port; - } else { - free(port); - } - return error; - } else { - return EXFULL; - } -} - -void -dp_add_pvconn(struct datapath *dp, struct pvconn *pvconn) -{ - dp->listeners = xrealloc(dp->listeners, - sizeof *dp->listeners * (dp->n_listeners + 1)); - dp->listeners[dp->n_listeners++] = pvconn; -} - -void -dp_run(struct datapath *dp) -{ - time_t now = time_now(); - struct sw_port *p, *pn; - struct remote *r, *rn; - struct ofpbuf *buffer = NULL; - size_t i; - - if (now != dp->last_timeout) { - struct list deleted = LIST_INITIALIZER(&deleted); - struct sw_flow *f, *n; - - chain_timeout(dp->chain, &deleted); - LIST_FOR_EACH_SAFE (f, n, struct sw_flow, node, &deleted) { - dp_send_flow_end(dp, f, f->reason); - list_remove(&f->node); - flow_free(f); - } - dp->last_timeout = now; - } - poll_timer_wait(1000); - - LIST_FOR_EACH_SAFE (p, pn, struct sw_port, node, &dp->port_list) { - int error; - - if (!buffer) { - /* Allocate buffer with some headroom to add headers in forwarding - * to the controller or adding a vlan tag, plus an extra 2 bytes to - * allow IP headers to be aligned on a 4-byte boundary. */ - const int headroom = 128 + 2; - const int hard_header = VLAN_ETH_HEADER_LEN; - const int mtu = netdev_get_mtu(p->netdev); - buffer = ofpbuf_new(headroom + hard_header + mtu); - buffer->data = (char*)buffer->data + headroom; - } - error = netdev_recv(p->netdev, buffer); - if (!error) { - p->rx_packets++; - p->rx_bytes += buffer->size; - fwd_port_input(dp, buffer, p); - buffer = NULL; - } else if (error != EAGAIN) { - VLOG_ERR_RL(&rl, "error receiving data from %s: %s", - netdev_get_name(p->netdev), strerror(error)); - } - } - ofpbuf_delete(buffer); - - /* Talk to remotes. */ - LIST_FOR_EACH_SAFE (r, rn, struct remote, node, &dp->remotes) { - remote_run(dp, r); - } - - for (i = 0; i < dp->n_listeners; ) { - struct pvconn *pvconn = dp->listeners[i]; - struct vconn *new_vconn; - int retval = pvconn_accept(pvconn, OFP_VERSION, &new_vconn); - if (!retval) { - remote_create(dp, rconn_new_from_vconn("passive", new_vconn)); - } else if (retval != EAGAIN) { - VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval)); - dp->listeners[i] = dp->listeners[--dp->n_listeners]; - continue; - } - i++; - } -} - -static void -remote_run(struct datapath *dp, struct remote *r) -{ - int i; - - rconn_run(r->rconn); - - /* Do some remote processing, but cap it at a reasonable amount so that - * other processing doesn't starve. */ - for (i = 0; i < 50; i++) { - if (!r->cb_dump) { - struct ofpbuf *buffer; - struct ofp_header *oh; - - buffer = rconn_recv(r->rconn); - if (!buffer) { - break; - } - - if (buffer->size >= sizeof *oh) { - struct sender sender; - - oh = buffer->data; - sender.remote = r; - sender.xid = oh->xid; - fwd_control_input(dp, &sender, buffer->data, buffer->size); - } else { - VLOG_WARN_RL(&rl, "received too-short OpenFlow message"); - } - ofpbuf_delete(buffer); - } else { - if (r->n_txq < TXQ_LIMIT) { - int error = r->cb_dump(dp, r->cb_aux); - if (error <= 0) { - if (error) { - VLOG_WARN_RL(&rl, "dump callback error: %s", - strerror(-error)); - } - r->cb_done(r->cb_aux); - r->cb_dump = NULL; - } - } else { - break; - } - } - } - - if (!rconn_is_alive(r->rconn)) { - remote_destroy(r); - } -} - -static void -remote_wait(struct remote *r) -{ - rconn_run_wait(r->rconn); - rconn_recv_wait(r->rconn); -} - -static void -remote_destroy(struct remote *r) -{ - if (r) { - if (r->cb_dump && r->cb_done) { - r->cb_done(r->cb_aux); - } - list_remove(&r->node); - rconn_destroy(r->rconn); - free(r); - } -} - -static struct remote * -remote_create(struct datapath *dp, struct rconn *rconn) -{ - struct remote *remote = xmalloc(sizeof *remote); - list_push_back(&dp->remotes, &remote->node); - remote->rconn = rconn; - remote->cb_dump = NULL; - remote->n_txq = 0; - return remote; -} - -/* Starts a callback-based, reliable, possibly multi-message reply to a - * request made by 'remote'. - * - * 'dump' designates a function that will be called when the 'remote' send - * queue has an empty slot. It should compose a message and send it on - * 'remote'. On success, it should return 1 if it should be called again when - * another send queue slot opens up, 0 if its transmissions are complete, or a - * negative errno value on failure. - * - * 'done' designates a function to clean up any resources allocated for the - * dump. It must handle being called before the dump is complete (which will - * happen if 'remote' is closed unexpectedly). - * - * 'aux' is passed to 'dump' and 'done'. */ -static void -remote_start_dump(struct remote *remote, - int (*dump)(struct datapath *, void *), - void (*done)(void *), - void *aux) -{ - assert(!remote->cb_dump); - remote->cb_dump = dump; - remote->cb_done = done; - remote->cb_aux = aux; -} - -void -dp_wait(struct datapath *dp) -{ - struct sw_port *p; - struct remote *r; - size_t i; - - LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { - netdev_recv_wait(p->netdev); - } - LIST_FOR_EACH (r, struct remote, node, &dp->remotes) { - remote_wait(r); - } - for (i = 0; i < dp->n_listeners; i++) { - pvconn_wait(dp->listeners[i]); - } -} - -/* Send packets out all the ports except the originating one. If the - * "flood" argument is set, don't send out ports with flooding disabled. - */ -static int -output_all(struct datapath *dp, struct ofpbuf *buffer, int in_port, int flood) -{ - struct sw_port *p; - int prev_port; - - prev_port = -1; - LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { - if (p->port_no == in_port) { - continue; - } - if (flood && p->config & OFPPC_NO_FLOOD) { - continue; - } - if (prev_port != -1) { - dp_output_port(dp, ofpbuf_clone(buffer), in_port, prev_port, - false); - } - prev_port = p->port_no; - } - if (prev_port != -1) - dp_output_port(dp, buffer, in_port, prev_port, false); - else - ofpbuf_delete(buffer); - - return 0; -} - -static void -output_packet(struct datapath *dp, struct ofpbuf *buffer, uint16_t out_port) -{ - struct sw_port *p = lookup_port(dp, out_port); - if (p && p->netdev != NULL) { - if (!(p->config & OFPPC_PORT_DOWN)) { - if (!netdev_send(p->netdev, buffer)) { - p->tx_packets++; - p->tx_bytes += buffer->size; - } else { - p->tx_dropped++; - } - } - ofpbuf_delete(buffer); - return; - } - - ofpbuf_delete(buffer); - VLOG_DBG_RL(&rl, "can't forward to bad port %d\n", out_port); -} - -/* Takes ownership of 'buffer' and transmits it to 'out_port' on 'dp'. - */ -void -dp_output_port(struct datapath *dp, struct ofpbuf *buffer, - int in_port, int out_port, bool ignore_no_fwd) -{ - - assert(buffer); - switch (out_port) { - case OFPP_IN_PORT: - output_packet(dp, buffer, in_port); - break; - - case OFPP_TABLE: { - struct sw_port *p = lookup_port(dp, in_port); - if (run_flow_through_tables(dp, buffer, p)) { - ofpbuf_delete(buffer); - } - break; - } - - case OFPP_FLOOD: - output_all(dp, buffer, in_port, 1); - break; - - case OFPP_ALL: - output_all(dp, buffer, in_port, 0); - break; - - case OFPP_CONTROLLER: - dp_output_control(dp, buffer, in_port, 0, OFPR_ACTION); - break; - - case OFPP_LOCAL: - default: - if (in_port == out_port) { - VLOG_DBG_RL(&rl, "can't directly forward to input port"); - return; - } - output_packet(dp, buffer, out_port); - break; - } -} - -static void * -make_openflow_reply(size_t openflow_len, uint8_t type, - const struct sender *sender, struct ofpbuf **bufferp) -{ - return make_openflow_xid(openflow_len, type, sender ? sender->xid : 0, - bufferp); -} - -static int -send_openflow_buffer_to_remote(struct ofpbuf *buffer, struct remote *remote) -{ - int retval = rconn_send_with_limit(remote->rconn, buffer, &remote->n_txq, - TXQ_LIMIT); - if (retval) { - VLOG_WARN_RL(&rl, "send to %s failed: %s", - rconn_get_name(remote->rconn), strerror(retval)); - } - return retval; -} - -static int -send_openflow_buffer(struct datapath *dp, struct ofpbuf *buffer, - const struct sender *sender) -{ - update_openflow_length(buffer); - if (sender) { - /* Send back to the sender. */ - return send_openflow_buffer_to_remote(buffer, sender->remote); - } else { - /* Broadcast to all remotes. */ - struct remote *r, *prev = NULL; - LIST_FOR_EACH (r, struct remote, node, &dp->remotes) { - if (prev) { - send_openflow_buffer_to_remote(ofpbuf_clone(buffer), prev); - } - prev = r; - } - if (prev) { - send_openflow_buffer_to_remote(buffer, prev); - } else { - ofpbuf_delete(buffer); - } - return 0; - } -} - -/* Takes ownership of 'buffer' and transmits it to 'dp''s controller. If the - * packet can be saved in a buffer, then only the first max_len bytes of - * 'buffer' are sent; otherwise, all of 'buffer' is sent. 'reason' indicates - * why 'buffer' is being sent. 'max_len' sets the maximum number of bytes that - * the caller wants to be sent; a value of 0 indicates the entire packet should - * be sent. */ -void -dp_output_control(struct datapath *dp, struct ofpbuf *buffer, int in_port, - size_t max_len, int reason) -{ - struct ofp_packet_in *opi; - size_t total_len; - uint32_t buffer_id; - - buffer_id = save_buffer(buffer); - total_len = buffer->size; - if (buffer_id != UINT32_MAX && max_len && buffer->size > max_len) { - buffer->size = max_len; - } - - opi = ofpbuf_push_uninit(buffer, offsetof(struct ofp_packet_in, data)); - opi->header.version = OFP_VERSION; - opi->header.type = OFPT_PACKET_IN; - opi->header.length = htons(buffer->size); - opi->header.xid = htonl(0); - opi->buffer_id = htonl(buffer_id); - opi->total_len = htons(total_len); - opi->in_port = htons(in_port); - opi->reason = reason; - opi->pad = 0; - send_openflow_buffer(dp, buffer, NULL); -} - -static void -fill_port_desc(struct sw_port *p, struct ofp_phy_port *desc) -{ - uint32_t curr, advertised, supported, peer; - - desc->port_no = htons(p->port_no); - strncpy((char *) desc->name, netdev_get_name(p->netdev), - sizeof desc->name); - desc->name[sizeof desc->name - 1] = '\0'; - memcpy(desc->hw_addr, netdev_get_etheraddr(p->netdev), ETH_ADDR_LEN); - desc->config = htonl(p->config); - desc->state = htonl(p->state); - netdev_get_features(p->netdev, &curr, &advertised, &supported, &peer); - desc->curr = htonl(curr); - desc->supported = htonl(supported); - desc->advertised = htonl(advertised); - desc->peer = htonl(peer); -} - -static void -dp_send_features_reply(struct datapath *dp, const struct sender *sender) -{ - struct ofpbuf *buffer; - struct ofp_switch_features *ofr; - struct sw_port *p; - - ofr = make_openflow_reply(sizeof *ofr, OFPT_FEATURES_REPLY, - sender, &buffer); - ofr->datapath_id = htonll(dp->id); - ofr->n_tables = dp->chain->n_tables; - ofr->n_buffers = htonl(N_PKT_BUFFERS); - ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES); - ofr->actions = htonl(OFP_SUPPORTED_ACTIONS); - LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { - struct ofp_phy_port *opp = ofpbuf_put_uninit(buffer, sizeof *opp); - memset(opp, 0, sizeof *opp); - fill_port_desc(p, opp); - } - send_openflow_buffer(dp, buffer, sender); -} - -void -update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm) -{ - struct sw_port *p = lookup_port(dp, ntohs(opm->port_no)); - - /* Make sure the port id hasn't changed since this was sent */ - if (!p || memcmp(opm->hw_addr, netdev_get_etheraddr(p->netdev), - ETH_ADDR_LEN) != 0) { - return; - } - - - if (opm->mask) { - uint32_t config_mask = ntohl(opm->mask); - p->config &= ~config_mask; - p->config |= ntohl(opm->config) & config_mask; - } -} - -static void -send_port_status(struct sw_port *p, uint8_t status) -{ - struct ofpbuf *buffer; - struct ofp_port_status *ops; - ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &buffer); - ops->reason = status; - memset(ops->pad, 0, sizeof ops->pad); - fill_port_desc(p, &ops->desc); - - send_openflow_buffer(p->dp, buffer, NULL); -} - -void -dp_send_flow_end(struct datapath *dp, struct sw_flow *flow, - enum nx_flow_end_reason reason) -{ - struct ofpbuf *buffer; - struct nx_flow_end *nfe; - - if (!dp->send_flow_end) { - return; - } - - nfe = make_openflow_xid(sizeof *nfe, OFPT_VENDOR, 0, &buffer); - if (!nfe) { - return; - } - nfe->header.vendor = htonl(NX_VENDOR_ID); - nfe->header.subtype = htonl(NXT_FLOW_END); - - flow_to_match(&flow->key.flow, flow->key.wildcards, &nfe->match); - - nfe->priority = htons(flow->priority); - nfe->reason = reason; - - nfe->tcp_flags = flow->tcp_flags; - nfe->ip_tos = flow->ip_tos; - - memset(nfe->pad, 0, sizeof nfe->pad); - - nfe->init_time = htonll(flow->created); - nfe->used_time = htonll(flow->used); - nfe->end_time = htonll(time_msec()); - - nfe->packet_count = htonll(flow->packet_count); - nfe->byte_count = htonll(flow->byte_count); - - send_openflow_buffer(dp, buffer, NULL); -} - -void -dp_send_error_msg(struct datapath *dp, const struct sender *sender, - uint16_t type, uint16_t code, const void *data, size_t len) -{ - struct ofpbuf *buffer; - struct ofp_error_msg *oem; - oem = make_openflow_reply(sizeof(*oem)+len, OFPT_ERROR, sender, &buffer); - oem->type = htons(type); - oem->code = htons(code); - memcpy(oem->data, data, len); - send_openflow_buffer(dp, buffer, sender); -} - -static void -fill_flow_stats(struct ofpbuf *buffer, struct sw_flow *flow, - int table_idx, uint64_t now) -{ - struct ofp_flow_stats *ofs; - int length = sizeof *ofs + flow->sf_acts->actions_len; - ofs = ofpbuf_put_uninit(buffer, length); - ofs->length = htons(length); - ofs->table_id = table_idx; - ofs->pad = 0; - ofs->match.wildcards = htonl(flow->key.wildcards); - ofs->match.in_port = flow->key.flow.in_port; - memcpy(ofs->match.dl_src, flow->key.flow.dl_src, ETH_ADDR_LEN); - memcpy(ofs->match.dl_dst, flow->key.flow.dl_dst, ETH_ADDR_LEN); - ofs->match.dl_vlan = flow->key.flow.dl_vlan; - ofs->match.dl_type = flow->key.flow.dl_type; - ofs->match.nw_src = flow->key.flow.nw_src; - ofs->match.nw_dst = flow->key.flow.nw_dst; - ofs->match.nw_proto = flow->key.flow.nw_proto; - ofs->match.pad = 0; - ofs->match.tp_src = flow->key.flow.tp_src; - ofs->match.tp_dst = flow->key.flow.tp_dst; - ofs->duration = htonl((now - flow->created) / 1000); - ofs->priority = htons(flow->priority); - ofs->idle_timeout = htons(flow->idle_timeout); - ofs->hard_timeout = htons(flow->hard_timeout); - memset(ofs->pad2, 0, sizeof ofs->pad2); - ofs->packet_count = htonll(flow->packet_count); - ofs->byte_count = htonll(flow->byte_count); - memcpy(ofs->actions, flow->sf_acts->actions, flow->sf_acts->actions_len); -} - - -/* 'buffer' was received on 'p', which may be a a physical switch port or a - * null pointer. Process it according to 'dp''s flow table. Returns 0 if - * successful, in which case 'buffer' is destroyed, or -ESRCH if there is no - * matching flow, in which case 'buffer' still belongs to the caller. */ -int run_flow_through_tables(struct datapath *dp, struct ofpbuf *buffer, - struct sw_port *p) -{ - struct sw_flow_key key; - struct sw_flow *flow; - - key.wildcards = 0; - if (flow_extract(buffer, p ? p->port_no : OFPP_NONE, &key.flow) - && (dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) { - /* Drop fragment. */ - ofpbuf_delete(buffer); - return 0; - } - if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) - && p->config & (!eth_addr_equals(key.flow.dl_dst, stp_eth_addr) - ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) { - ofpbuf_delete(buffer); - return 0; - } - - flow = chain_lookup(dp->chain, &key); - if (flow != NULL) { - flow_used(flow, buffer); - execute_actions(dp, buffer, &key, flow->sf_acts->actions, - flow->sf_acts->actions_len, false); - return 0; - } else { - return -ESRCH; - } -} - -/* 'buffer' was received on 'p', which may be a a physical switch port or a - * null pointer. Process it according to 'dp''s flow table, sending it up to - * the controller if no flow matches. Takes ownership of 'buffer'. */ -void fwd_port_input(struct datapath *dp, struct ofpbuf *buffer, - struct sw_port *p) -{ - if (run_flow_through_tables(dp, buffer, p)) { - dp_output_control(dp, buffer, p->port_no, - dp->miss_send_len, OFPR_NO_MATCH); - } -} - -static int -recv_features_request(struct datapath *dp, const struct sender *sender, - const void *msg UNUSED) -{ - dp_send_features_reply(dp, sender); - return 0; -} - -static int -recv_get_config_request(struct datapath *dp, const struct sender *sender, - const void *msg UNUSED) -{ - struct ofpbuf *buffer; - struct ofp_switch_config *osc; - - osc = make_openflow_reply(sizeof *osc, OFPT_GET_CONFIG_REPLY, - sender, &buffer); - - osc->flags = htons(dp->flags); - osc->miss_send_len = htons(dp->miss_send_len); - - return send_openflow_buffer(dp, buffer, sender); -} - -static int -recv_set_config(struct datapath *dp, const struct sender *sender UNUSED, - const void *msg) -{ - const struct ofp_switch_config *osc = msg; - int flags; - - flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK); - if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL - && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) { - flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP; - } - dp->flags = flags; - dp->miss_send_len = ntohs(osc->miss_send_len); - return 0; -} - -static int -recv_packet_out(struct datapath *dp, const struct sender *sender, - const void *msg) -{ - const struct ofp_packet_out *opo = msg; - struct sw_flow_key key; - uint16_t v_code; - struct ofpbuf *buffer; - size_t actions_len = ntohs(opo->actions_len); - - if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) { - VLOG_DBG_RL(&rl, "message too short for number of actions"); - return -EINVAL; - } - - if (ntohl(opo->buffer_id) == (uint32_t) -1) { - /* FIXME: can we avoid copying data here? */ - int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len; - buffer = ofpbuf_new(data_len); - ofpbuf_put(buffer, (uint8_t *)opo->actions + actions_len, data_len); - } else { - buffer = retrieve_buffer(ntohl(opo->buffer_id)); - if (!buffer) { - return -ESRCH; - } - } - - flow_extract(buffer, ntohs(opo->in_port), &key.flow); - - v_code = dp_validate_actions(dp, &key, opo->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, - msg, ntohs(opo->header.length)); - goto error; - } - - execute_actions(dp, buffer, &key, opo->actions, actions_len, true); - - return 0; - -error: - ofpbuf_delete(buffer); - return -EINVAL; -} - -static int -recv_port_mod(struct datapath *dp, const struct sender *sender UNUSED, - const void *msg) -{ - const struct ofp_port_mod *opm = msg; - - update_port_flags(dp, opm); - - return 0; -} - -static int -add_flow(struct datapath *dp, const struct sender *sender, - const struct ofp_flow_mod *ofm) -{ - int error = -ENOMEM; - uint16_t v_code; - struct sw_flow *flow; - size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm; - - /* Allocate memory. */ - flow = flow_alloc(actions_len); - if (flow == NULL) - goto error; - - flow_extract_match(&flow->key, &ofm->match); - - v_code = dp_validate_actions(dp, &flow->key, ofm->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, - ofm, ntohs(ofm->header.length)); - goto error_free_flow; - } - - /* Fill out flow. */ - flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1; - flow->idle_timeout = ntohs(ofm->idle_timeout); - flow->hard_timeout = ntohs(ofm->hard_timeout); - flow->used = flow->created = time_msec(); - flow->sf_acts->actions_len = actions_len; - flow->byte_count = 0; - flow->packet_count = 0; - flow->tcp_flags = 0; - flow->ip_tos = 0; - memcpy(flow->sf_acts->actions, ofm->actions, actions_len); - - /* Act. */ - error = chain_insert(dp->chain, flow); - if (error == -ENOBUFS) { - dp_send_error_msg(dp, sender, OFPET_FLOW_MOD_FAILED, - OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length)); - goto error_free_flow; - } else if (error) { - goto error_free_flow; - } - error = 0; - if (ntohl(ofm->buffer_id) != UINT32_MAX) { - struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); - if (buffer) { - struct sw_flow_key key; - uint16_t in_port = ntohs(ofm->match.in_port); - flow_extract(buffer, in_port, &key.flow); - flow_used(flow, buffer); - execute_actions(dp, buffer, &key, - ofm->actions, actions_len, false); - } else { - error = -ESRCH; - } - } - return error; - -error_free_flow: - flow_free(flow); -error: - if (ntohl(ofm->buffer_id) != (uint32_t) -1) - discard_buffer(ntohl(ofm->buffer_id)); - return error; -} - -static int -mod_flow(struct datapath *dp, const struct sender *sender, - const struct ofp_flow_mod *ofm) -{ - int error = -ENOMEM; - uint16_t v_code; - size_t actions_len; - struct sw_flow_key key; - uint16_t priority; - int strict; - - flow_extract_match(&key, &ofm->match); - - actions_len = ntohs(ofm->header.length) - sizeof *ofm; - - v_code = dp_validate_actions(dp, &key, ofm->actions, actions_len); - if (v_code != ACT_VALIDATION_OK) { - dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, - ofm, ntohs(ofm->header.length)); - goto error; - } - - priority = key.wildcards ? ntohs(ofm->priority) : -1; - strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0; - chain_modify(dp->chain, &key, priority, strict, ofm->actions, actions_len); - - if (ntohl(ofm->buffer_id) != UINT32_MAX) { - struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); - if (buffer) { - struct sw_flow_key skb_key; - uint16_t in_port = ntohs(ofm->match.in_port); - flow_extract(buffer, in_port, &skb_key.flow); - execute_actions(dp, buffer, &skb_key, - ofm->actions, actions_len, false); - } else { - error = -ESRCH; - } - } - return error; - -error: - if (ntohl(ofm->buffer_id) != (uint32_t) -1) - discard_buffer(ntohl(ofm->buffer_id)); - return error; -} - -static int -recv_flow(struct datapath *dp, const struct sender *sender, - const void *msg) -{ - const struct ofp_flow_mod *ofm = msg; - uint16_t command = ntohs(ofm->command); - - if (command == OFPFC_ADD) { - return add_flow(dp, sender, ofm); - } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) { - return mod_flow(dp, sender, ofm); - } else if (command == OFPFC_DELETE) { - struct sw_flow_key key; - flow_extract_match(&key, &ofm->match); - return chain_delete(dp->chain, &key, ofm->out_port, 0, 0) ? 0 : -ESRCH; - } else if (command == OFPFC_DELETE_STRICT) { - struct sw_flow_key key; - uint16_t priority; - flow_extract_match(&key, &ofm->match); - priority = key.wildcards ? ntohs(ofm->priority) : -1; - return chain_delete(dp->chain, &key, ofm->out_port, - priority, 1) ? 0 : -ESRCH; - } else { - return -ENODEV; - } -} - -static int -desc_stats_dump(struct datapath *dp UNUSED, void *state UNUSED, - struct ofpbuf *buffer) -{ - struct ofp_desc_stats *ods = ofpbuf_put_uninit(buffer, sizeof *ods); - - strncpy(ods->mfr_desc, &mfr_desc, sizeof ods->mfr_desc); - strncpy(ods->hw_desc, &hw_desc, sizeof ods->hw_desc); - strncpy(ods->sw_desc, &sw_desc, sizeof ods->sw_desc); - strncpy(ods->serial_num, &serial_num, sizeof ods->serial_num); - - return 0; -} - -struct flow_stats_state { - int table_idx; - struct sw_table_position position; - struct ofp_flow_stats_request rq; - uint64_t now; /* Current time in milliseconds */ - - struct ofpbuf *buffer; -}; - -#define MAX_FLOW_STATS_BYTES 4096 - -static int -flow_stats_init(const void *body, int body_len UNUSED, void **state) -{ - const struct ofp_flow_stats_request *fsr = body; - struct flow_stats_state *s = xmalloc(sizeof *s); - s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id; - memset(&s->position, 0, sizeof s->position); - s->rq = *fsr; - *state = s; - return 0; -} - -static int flow_stats_dump_callback(struct sw_flow *flow, void *private) -{ - struct flow_stats_state *s = private; - fill_flow_stats(s->buffer, flow, s->table_idx, s->now); - return s->buffer->size >= MAX_FLOW_STATS_BYTES; -} - -static int flow_stats_dump(struct datapath *dp, void *state, - struct ofpbuf *buffer) -{ - struct flow_stats_state *s = state; - struct sw_flow_key match_key; - - flow_extract_match(&match_key, &s->rq.match); - s->buffer = buffer; - s->now = time_msec(); - while (s->table_idx < dp->chain->n_tables - && (s->rq.table_id == 0xff || s->rq.table_id == s->table_idx)) - { - struct sw_table *table = dp->chain->tables[s->table_idx]; - - if (table->iterate(table, &match_key, s->rq.out_port, - &s->position, flow_stats_dump_callback, s)) - break; - - s->table_idx++; - memset(&s->position, 0, sizeof s->position); - } - return s->buffer->size >= MAX_FLOW_STATS_BYTES; -} - -static void flow_stats_done(void *state) -{ - free(state); -} - -struct aggregate_stats_state { - struct ofp_aggregate_stats_request rq; -}; - -static int -aggregate_stats_init(const void *body, int body_len UNUSED, void **state) -{ - const struct ofp_aggregate_stats_request *rq = body; - struct aggregate_stats_state *s = xmalloc(sizeof *s); - s->rq = *rq; - *state = s; - return 0; -} - -static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private) -{ - struct ofp_aggregate_stats_reply *rpy = private; - rpy->packet_count += flow->packet_count; - rpy->byte_count += flow->byte_count; - rpy->flow_count++; - return 0; -} - -static int aggregate_stats_dump(struct datapath *dp, void *state, - struct ofpbuf *buffer) -{ - struct aggregate_stats_state *s = state; - struct ofp_aggregate_stats_request *rq = &s->rq; - struct ofp_aggregate_stats_reply *rpy; - struct sw_table_position position; - struct sw_flow_key match_key; - int table_idx; - - rpy = ofpbuf_put_uninit(buffer, sizeof *rpy); - memset(rpy, 0, sizeof *rpy); - - flow_extract_match(&match_key, &rq->match); - table_idx = rq->table_id == 0xff ? 0 : rq->table_id; - memset(&position, 0, sizeof position); - while (table_idx < dp->chain->n_tables - && (rq->table_id == 0xff || rq->table_id == table_idx)) - { - struct sw_table *table = dp->chain->tables[table_idx]; - int error; - - error = table->iterate(table, &match_key, rq->out_port, &position, - aggregate_stats_dump_callback, rpy); - if (error) - return error; - - table_idx++; - memset(&position, 0, sizeof position); - } - - rpy->packet_count = htonll(rpy->packet_count); - rpy->byte_count = htonll(rpy->byte_count); - rpy->flow_count = htonl(rpy->flow_count); - return 0; -} - -static void aggregate_stats_done(void *state) -{ - free(state); -} - -static int -table_stats_dump(struct datapath *dp, void *state UNUSED, - struct ofpbuf *buffer) -{ - int i; - for (i = 0; i < dp->chain->n_tables; i++) { - struct ofp_table_stats *ots = ofpbuf_put_uninit(buffer, sizeof *ots); - struct sw_table_stats stats; - dp->chain->tables[i]->stats(dp->chain->tables[i], &stats); - strncpy(ots->name, stats.name, sizeof ots->name); - ots->table_id = i; - ots->wildcards = htonl(stats.wildcards); - memset(ots->pad, 0, sizeof ots->pad); - ots->max_entries = htonl(stats.max_flows); - ots->active_count = htonl(stats.n_flows); - ots->lookup_count = htonll(stats.n_lookup); - ots->matched_count = htonll(stats.n_matched); - } - return 0; -} - -struct port_stats_state { - int port; -}; - -static int -port_stats_init(const void *body UNUSED, int body_len UNUSED, void **state) -{ - struct port_stats_state *s = xmalloc(sizeof *s); - s->port = 0; - *state = s; - return 0; -} - -static void -dump_port_stats(struct sw_port *port, struct ofpbuf *buffer) -{ - struct ofp_port_stats *ops = ofpbuf_put_uninit(buffer, sizeof *ops); - ops->port_no = htons(port->port_no); - memset(ops->pad, 0, sizeof ops->pad); - ops->rx_packets = htonll(port->rx_packets); - ops->tx_packets = htonll(port->tx_packets); - ops->rx_bytes = htonll(port->rx_bytes); - ops->tx_bytes = htonll(port->tx_bytes); - ops->rx_dropped = htonll(-1); - ops->tx_dropped = htonll(port->tx_dropped); - ops->rx_errors = htonll(-1); - ops->tx_errors = htonll(-1); - ops->rx_frame_err = htonll(-1); - ops->rx_over_err = htonll(-1); - ops->rx_crc_err = htonll(-1); - ops->collisions = htonll(-1); -} - -static int port_stats_dump(struct datapath *dp, void *state, - struct ofpbuf *buffer) -{ - struct port_stats_state *s = state; - int i; - - for (i = s->port; i < DP_MAX_PORTS; i++) { - struct sw_port *p = &dp->ports[i]; - if (p->netdev) { - dump_port_stats(p, buffer); - } - } - s->port = i; - - if (dp->local_port) { - dump_port_stats(dp->local_port, buffer); - s->port = OFPP_LOCAL + 1; - } - return 0; -} - -static void port_stats_done(void *state) -{ - free(state); -} - -struct stats_type { - /* Value for 'type' member of struct ofp_stats_request. */ - int type; - - /* Minimum and maximum acceptable number of bytes in body member of - * struct ofp_stats_request. */ - size_t min_body, max_body; - - /* Prepares to dump some kind of datapath statistics. 'body' and - * 'body_len' are the 'body' member of the struct ofp_stats_request. - * Returns zero if successful, otherwise a negative error code. - * May initialize '*state' to state information. May be null if no - * initialization is required.*/ - int (*init)(const void *body, int body_len, void **state); - - /* Appends statistics for 'dp' to 'buffer', which initially contains a - * struct ofp_stats_reply. On success, it should return 1 if it should be - * called again later with another buffer, 0 if it is done, or a negative - * errno value on failure. */ - int (*dump)(struct datapath *dp, void *state, struct ofpbuf *buffer); - - /* Cleans any state created by the init or dump functions. May be null - * if no cleanup is required. */ - void (*done)(void *state); -}; - -static const struct stats_type stats[] = { - { - OFPST_DESC, - 0, - 0, - NULL, - desc_stats_dump, - NULL - }, - { - OFPST_FLOW, - sizeof(struct ofp_flow_stats_request), - sizeof(struct ofp_flow_stats_request), - flow_stats_init, - flow_stats_dump, - flow_stats_done - }, - { - OFPST_AGGREGATE, - sizeof(struct ofp_aggregate_stats_request), - sizeof(struct ofp_aggregate_stats_request), - aggregate_stats_init, - aggregate_stats_dump, - aggregate_stats_done - }, - { - OFPST_TABLE, - 0, - 0, - NULL, - table_stats_dump, - NULL - }, - { - OFPST_PORT, - 0, - 0, - port_stats_init, - port_stats_dump, - port_stats_done - }, -}; - -struct stats_dump_cb { - bool done; - struct ofp_stats_request *rq; - struct sender sender; - const struct stats_type *s; - void *state; -}; - -static int -stats_dump(struct datapath *dp, void *cb_) -{ - struct stats_dump_cb *cb = cb_; - struct ofp_stats_reply *osr; - struct ofpbuf *buffer; - int err; - - if (cb->done) { - return 0; - } - - osr = make_openflow_reply(sizeof *osr, OFPT_STATS_REPLY, &cb->sender, - &buffer); - osr->type = htons(cb->s->type); - osr->flags = 0; - - err = cb->s->dump(dp, cb->state, buffer); - if (err >= 0) { - int err2; - if (!err) { - cb->done = true; - } else { - /* Buffer might have been reallocated, so find our data again. */ - osr = ofpbuf_at_assert(buffer, 0, sizeof *osr); - osr->flags = ntohs(OFPSF_REPLY_MORE); - } - err2 = send_openflow_buffer(dp, buffer, &cb->sender); - if (err2) { - err = err2; - } - } - - return err; -} - -static void -stats_done(void *cb_) -{ - struct stats_dump_cb *cb = cb_; - if (cb) { - if (cb->s->done) { - cb->s->done(cb->state); - } - free(cb); - } -} - -static int -recv_stats_request(struct datapath *dp UNUSED, const struct sender *sender, - const void *oh) -{ - const struct ofp_stats_request *rq = oh; - size_t rq_len = ntohs(rq->header.length); - const struct stats_type *st; - struct stats_dump_cb *cb; - int type, body_len; - int err; - - type = ntohs(rq->type); - for (st = stats; ; st++) { - if (st >= &stats[ARRAY_SIZE(stats)]) { - VLOG_WARN_RL(&rl, "received stats request of unknown type %d", - type); - return -EINVAL; - } else if (type == st->type) { - break; - } - } - - cb = xmalloc(sizeof *cb); - cb->done = false; - cb->rq = xmemdup(rq, rq_len); - cb->sender = *sender; - cb->s = st; - cb->state = NULL; - - body_len = rq_len - offsetof(struct ofp_stats_request, body); - if (body_len < cb->s->min_body || body_len > cb->s->max_body) { - VLOG_WARN_RL(&rl, "stats request type %d with bad body length %d", - type, body_len); - err = -EINVAL; - goto error; - } - - if (cb->s->init) { - err = cb->s->init(rq->body, body_len, &cb->state); - if (err) { - VLOG_WARN_RL(&rl, - "failed initialization of stats request type %d: %s", - type, strerror(-err)); - goto error; - } - } - - remote_start_dump(sender->remote, stats_dump, stats_done, cb); - return 0; - -error: - free(cb->rq); - free(cb); - return err; -} - -static int -recv_echo_request(struct datapath *dp, const struct sender *sender, - const void *oh) -{ - return send_openflow_buffer(dp, make_echo_reply(oh), sender); -} - -static int -recv_echo_reply(struct datapath *dp UNUSED, const struct sender *sender UNUSED, - const void *oh UNUSED) -{ - return 0; -} - -static int -recv_vendor(struct datapath *dp, const struct sender *sender, - const void *oh) -{ - const struct ofp_vendor_header *ovh = oh; - - switch (ntohl(ovh->vendor)) - { - case NX_VENDOR_ID: - return nx_recv_msg(dp, sender, oh); - - default: - VLOG_WARN_RL(&rl, "unknown vendor: 0x%x\n", ntohl(ovh->vendor)); - dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_VENDOR, oh, ntohs(ovh->header.length)); - return -EINVAL; - } -} - -/* 'msg', which is 'length' bytes long, was received from the control path. - * Apply it to 'chain'. */ -int -fwd_control_input(struct datapath *dp, const struct sender *sender, - const void *msg, size_t length) -{ - int (*handler)(struct datapath *, const struct sender *, const void *); - struct ofp_header *oh; - size_t min_size; - - /* Check encapsulated length. */ - oh = (struct ofp_header *) msg; - if (ntohs(oh->length) > length) { - return -EINVAL; - } - assert(oh->version == OFP_VERSION); - - /* Figure out how to handle it. */ - switch (oh->type) { - case OFPT_FEATURES_REQUEST: - min_size = sizeof(struct ofp_header); - handler = recv_features_request; - break; - case OFPT_GET_CONFIG_REQUEST: - min_size = sizeof(struct ofp_header); - handler = recv_get_config_request; - break; - case OFPT_SET_CONFIG: - min_size = sizeof(struct ofp_switch_config); - handler = recv_set_config; - break; - case OFPT_PACKET_OUT: - min_size = sizeof(struct ofp_packet_out); - handler = recv_packet_out; - break; - case OFPT_FLOW_MOD: - min_size = sizeof(struct ofp_flow_mod); - handler = recv_flow; - break; - case OFPT_PORT_MOD: - min_size = sizeof(struct ofp_port_mod); - handler = recv_port_mod; - break; - case OFPT_STATS_REQUEST: - min_size = sizeof(struct ofp_stats_request); - handler = recv_stats_request; - break; - case OFPT_ECHO_REQUEST: - min_size = sizeof(struct ofp_header); - handler = recv_echo_request; - break; - case OFPT_ECHO_REPLY: - min_size = sizeof(struct ofp_header); - handler = recv_echo_reply; - break; - case OFPT_VENDOR: - min_size = sizeof(struct ofp_vendor_header); - handler = recv_vendor; - break; - default: - dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE, - msg, length); - return -EINVAL; - } - - /* Handle it. */ - if (length < min_size) - return -EFAULT; - return handler(dp, sender, msg); -} - -/* Packet buffering. */ - -#define OVERWRITE_SECS 1 - -struct packet_buffer { - struct ofpbuf *buffer; - uint32_t cookie; - time_t timeout; -}; - -static struct packet_buffer buffers[N_PKT_BUFFERS]; -static unsigned int buffer_idx; - -uint32_t save_buffer(struct ofpbuf *buffer) -{ - struct packet_buffer *p; - uint32_t id; - - buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; - p = &buffers[buffer_idx]; - if (p->buffer) { - /* Don't buffer packet if existing entry is less than - * OVERWRITE_SECS old. */ - if (time_now() < p->timeout) { /* FIXME */ - return -1; - } else { - ofpbuf_delete(p->buffer); - } - } - /* Don't use maximum cookie value since the all-bits-1 id is - * special. */ - if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) - p->cookie = 0; - p->buffer = ofpbuf_clone(buffer); /* FIXME */ - p->timeout = time_now() + OVERWRITE_SECS; /* FIXME */ - id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); - - return id; -} - -static struct ofpbuf *retrieve_buffer(uint32_t id) -{ - struct ofpbuf *buffer = NULL; - struct packet_buffer *p; - - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - buffer = p->buffer; - p->buffer = NULL; - } else { - printf("cookie mismatch: %x != %x\n", - id >> PKT_BUFFER_BITS, p->cookie); - } - - return buffer; -} - -static void discard_buffer(uint32_t id) -{ - struct packet_buffer *p; - - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - ofpbuf_delete(p->buffer); - p->buffer = NULL; - } -} diff --git a/udatapath/datapath.h b/udatapath/datapath.h deleted file mode 100644 index 37cbdbed..00000000 --- a/udatapath/datapath.h +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -/* Interface exported by OpenFlow module. */ - -#ifndef DATAPATH_H -#define DATAPATH_H 1 - -#include -#include -#include "openflow/nicira-ext.h" -#include "ofpbuf.h" -#include "timeval.h" -#include "list.h" - -struct rconn; -struct pvconn; -struct sw_flow; -struct sender; - -struct sw_port { - uint32_t config; /* Some subset of OFPPC_* flags. */ - uint32_t state; /* Some subset of OFPPS_* flags. */ - struct datapath *dp; - struct netdev *netdev; - struct list node; /* Element in datapath.ports. */ - unsigned long long int rx_packets, tx_packets; - unsigned long long int rx_bytes, tx_bytes; - unsigned long long int tx_dropped; - uint16_t port_no; -}; - -#define DP_MAX_PORTS 255 -BUILD_ASSERT_DECL(DP_MAX_PORTS <= OFPP_MAX); - -struct datapath { - /* Remote connections. */ - struct list remotes; /* All connections (including controller). */ - - /* Listeners. */ - struct pvconn **listeners; - size_t n_listeners; - - time_t last_timeout; - - /* Unique identifier for this datapath */ - uint64_t id; - - struct sw_chain *chain; /* Forwarding rules. */ - - /* Configuration set from controller. */ - uint16_t flags; - uint16_t miss_send_len; - - /* Flag controlling whether Flow End messages are generated. */ - uint8_t send_flow_end; - - /* Switch ports. */ - struct sw_port ports[DP_MAX_PORTS]; - struct sw_port *local_port; /* OFPP_LOCAL port, if any. */ - struct list port_list; /* All ports, including local_port. */ -}; - -int dp_new(struct datapath **, uint64_t dpid); -int dp_add_port(struct datapath *, const char *netdev); -int dp_add_local_port(struct datapath *, const char *netdev); -void dp_add_pvconn(struct datapath *, struct pvconn *); -void dp_run(struct datapath *); -void dp_wait(struct datapath *); -void dp_send_error_msg(struct datapath *, const struct sender *, - uint16_t, uint16_t, const void *, size_t); -void dp_send_flow_end(struct datapath *, struct sw_flow *, - enum nx_flow_end_reason); -void dp_output_port(struct datapath *, struct ofpbuf *, int in_port, - int out_port, bool ignore_no_fwd); -void dp_output_control(struct datapath *, struct ofpbuf *, int in_port, - size_t max_len, int reason); - -#endif /* datapath.h */ diff --git a/udatapath/dp_act.c b/udatapath/dp_act.c deleted file mode 100644 index 2b2115eb..00000000 --- a/udatapath/dp_act.c +++ /dev/null @@ -1,478 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -/* Functions for executing OpenFlow actions. */ - -#include -#include "csum.h" -#include "packets.h" -#include "dp_act.h" -#include "openflow/nicira-ext.h" -#include "nx_act.h" - - -static uint16_t -validate_output(struct datapath *dp UNUSED, const struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_output *oa = (struct ofp_action_output *)ah; - - /* To prevent loops, make sure there's no action to send to the - * OFP_TABLE virtual port. - */ - if (oa->port == htons(OFPP_NONE) || - (!(key->wildcards & OFPFW_IN_PORT) - && oa->port == key->flow.in_port)) { - return OFPBAC_BAD_OUT_PORT; - } - return ACT_VALIDATION_OK; -} - -static void -do_output(struct datapath *dp, struct ofpbuf *buffer, int in_port, - size_t max_len, int out_port, bool ignore_no_fwd) -{ - if (out_port != OFPP_CONTROLLER) { - dp_output_port(dp, buffer, in_port, out_port, ignore_no_fwd); - } else { - dp_output_control(dp, buffer, in_port, max_len, OFPR_ACTION); - } -} - -/* Modify vlan tag control information (TCI). Only sets the TCI bits - * indicated by 'mask'. If no vlan tag is present, one is added. - */ -static void -modify_vlan_tci(struct ofpbuf *buffer, struct sw_flow_key *key, - uint16_t tci, uint16_t mask) -{ - struct vlan_eth_header *veh; - - if (key->flow.dl_vlan != htons(OFP_VLAN_NONE)) { - /* Modify vlan id, but maintain other TCI values */ - veh = buffer->l2; - veh->veth_tci &= ~htons(mask); - veh->veth_tci |= htons(tci); - } else { - /* Insert new vlan id. */ - struct eth_header *eh = buffer->l2; - struct vlan_eth_header tmp; - memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); - memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); - tmp.veth_type = htons(ETH_TYPE_VLAN); - tmp.veth_tci = htons(tci); - tmp.veth_next_type = eh->eth_type; - - veh = ofpbuf_push_uninit(buffer, VLAN_HEADER_LEN); - memcpy(veh, &tmp, sizeof tmp); - buffer->l2 = (char*)buffer->l2 - VLAN_HEADER_LEN; - } - - key->flow.dl_vlan = veh->veth_tci & htons(VLAN_VID_MASK); -} - - -/* Remove an existing vlan header if it exists. */ -static void -vlan_pull_tag(struct ofpbuf *buffer) -{ - struct vlan_eth_header *veh = buffer->l2; - - if (veh->veth_type == htons(ETH_TYPE_VLAN)) { - struct eth_header tmp; - - memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); - memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); - tmp.eth_type = veh->veth_next_type; - - buffer->size -= VLAN_HEADER_LEN; - buffer->data = (char*)buffer->data + VLAN_HEADER_LEN; - buffer->l2 = (char*)buffer->l2 + VLAN_HEADER_LEN; - memcpy(buffer->data, &tmp, sizeof tmp); - } -} - -static void -set_vlan_vid(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah; - uint16_t tci = ntohs(va->vlan_vid); - - modify_vlan_tci(buffer, key, tci, VLAN_VID_MASK); -} - -static void -set_vlan_pcp(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah; - uint16_t tci = (uint16_t)va->vlan_pcp << 13; - - modify_vlan_tci(buffer, key, tci, VLAN_PCP_MASK); -} - -static void -strip_vlan(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah UNUSED) -{ - vlan_pull_tag(buffer); - key->flow.dl_vlan = htons(OFP_VLAN_NONE); -} - -static void -set_dl_addr(struct ofpbuf *buffer, struct sw_flow_key *key UNUSED, - const struct ofp_action_header *ah) -{ - struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah; - struct eth_header *eh = buffer->l2; - - if (da->type == htons(OFPAT_SET_DL_SRC)) { - memcpy(eh->eth_src, da->dl_addr, sizeof eh->eth_src); - } else { - memcpy(eh->eth_dst, da->dl_addr, sizeof eh->eth_dst); - } -} - -static void -set_nw_addr(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah; - uint16_t eth_proto = ntohs(key->flow.dl_type); - - if (eth_proto == ETH_TYPE_IP) { - struct ip_header *nh = buffer->l3; - uint8_t nw_proto = key->flow.nw_proto; - uint32_t new, *field; - - new = na->nw_addr; - field = na->type == OFPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; - if (nw_proto == IP_TYPE_TCP) { - struct tcp_header *th = buffer->l4; - th->tcp_csum = recalc_csum32(th->tcp_csum, *field, new); - } else if (nw_proto == IP_TYPE_UDP) { - struct udp_header *th = buffer->l4; - if (th->udp_csum) { - th->udp_csum = recalc_csum32(th->udp_csum, *field, new); - if (!th->udp_csum) { - th->udp_csum = 0xffff; - } - } - } - nh->ip_csum = recalc_csum32(nh->ip_csum, *field, new); - *field = new; - } -} - -static void -set_tp_port(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah; - uint16_t eth_proto = ntohs(key->flow.dl_type); - - if (eth_proto == ETH_TYPE_IP) { - uint8_t nw_proto = key->flow.nw_proto; - uint16_t new, *field; - - new = ta->tp_port; - if (nw_proto == IP_TYPE_TCP) { - struct tcp_header *th = buffer->l4; - field = ta->type == OFPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; - th->tcp_csum = recalc_csum16(th->tcp_csum, *field, new); - *field = new; - } else if (nw_proto == IP_TYPE_UDP) { - struct udp_header *th = buffer->l4; - field = ta->type == OFPAT_SET_TP_SRC ? &th->udp_src : &th->udp_dst; - th->udp_csum = recalc_csum16(th->udp_csum, *field, new); - *field = new; - } - } -} - -struct openflow_action { - size_t min_size; - size_t max_size; - uint16_t (*validate)(struct datapath *dp, - const struct sw_flow_key *key, - const struct ofp_action_header *ah); - void (*execute)(struct ofpbuf *buffer, - struct sw_flow_key *key, - const struct ofp_action_header *ah); -}; - -static const struct openflow_action of_actions[] = { - [OFPAT_OUTPUT] = { - sizeof(struct ofp_action_output), - sizeof(struct ofp_action_output), - validate_output, - NULL /* This is optimized into execute_actions */ - }, - [OFPAT_SET_VLAN_VID] = { - sizeof(struct ofp_action_vlan_vid), - sizeof(struct ofp_action_vlan_vid), - NULL, - set_vlan_vid - }, - [OFPAT_SET_VLAN_PCP] = { - sizeof(struct ofp_action_vlan_pcp), - sizeof(struct ofp_action_vlan_pcp), - NULL, - set_vlan_pcp - }, - [OFPAT_STRIP_VLAN] = { - sizeof(struct ofp_action_header), - sizeof(struct ofp_action_header), - NULL, - strip_vlan - }, - [OFPAT_SET_DL_SRC] = { - sizeof(struct ofp_action_dl_addr), - sizeof(struct ofp_action_dl_addr), - NULL, - set_dl_addr - }, - [OFPAT_SET_DL_DST] = { - sizeof(struct ofp_action_dl_addr), - sizeof(struct ofp_action_dl_addr), - NULL, - set_dl_addr - }, - [OFPAT_SET_NW_SRC] = { - sizeof(struct ofp_action_nw_addr), - sizeof(struct ofp_action_nw_addr), - NULL, - set_nw_addr - }, - [OFPAT_SET_NW_DST] = { - sizeof(struct ofp_action_nw_addr), - sizeof(struct ofp_action_nw_addr), - NULL, - set_nw_addr - }, - [OFPAT_SET_TP_SRC] = { - sizeof(struct ofp_action_tp_port), - sizeof(struct ofp_action_tp_port), - NULL, - set_tp_port - }, - [OFPAT_SET_TP_DST] = { - sizeof(struct ofp_action_tp_port), - sizeof(struct ofp_action_tp_port), - NULL, - set_tp_port - } - /* OFPAT_VENDOR is not here, since it would blow up the array size. */ -}; - -/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK - * or an OFPET_BAD_ACTION error code. */ -static uint16_t -validate_ofpat(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t type, uint16_t len) -{ - uint16_t ret = ACT_VALIDATION_OK; - const struct openflow_action *act = &of_actions[type]; - - if ((len < act->min_size) || (len > act->max_size)) { - return OFPBAC_BAD_LEN; - } - - if (act->validate) { - ret = act->validate(dp, key, ah); - } - - return ret; -} - -/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK - * or an OFPET_BAD_ACTION error code. */ -static uint16_t -validate_vendor(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t len) -{ - struct ofp_action_vendor_header *avh; - int ret = ACT_VALIDATION_OK; - - if (len < sizeof(struct ofp_action_vendor_header)) { - return OFPBAC_BAD_LEN; - } - - avh = (struct ofp_action_vendor_header *)ah; - - switch(ntohl(avh->vendor)) { - case NX_VENDOR_ID: - ret = nx_validate_act(dp, key, avh, len); - break; - - default: - return OFPBAC_BAD_VENDOR; - } - - return ret; -} - -/* Validates a list of actions. If a problem is found, a code for the - * OFPET_BAD_ACTION error type is returned. If the action list validates, - * ACT_VALIDATION_OK is returned. */ -uint16_t -dp_validate_actions(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_header *actions, size_t actions_len) -{ - uint8_t *p = (uint8_t *)actions; - int err; - - while (actions_len >= sizeof(struct ofp_action_header)) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = ntohs(ah->len); - uint16_t type; - - /* Make there's enough remaining data for the specified length - * and that the action length is a multiple of 64 bits. */ - if (!len || (actions_len < len) || (len % 8) != 0) { - return OFPBAC_BAD_LEN; - } - - type = ntohs(ah->type); - if (type < ARRAY_SIZE(of_actions)) { - err = validate_ofpat(dp, key, ah, type, len); - if (err != ACT_VALIDATION_OK) { - return err; - } - } else if (type == OFPAT_VENDOR) { - err = validate_vendor(dp, key, ah, len); - if (err != ACT_VALIDATION_OK) { - return err; - } - } else { - return OFPBAC_BAD_TYPE; - } - - p += len; - actions_len -= len; - } - - /* Check if there's any trailing garbage. */ - if (actions_len != 0) { - return OFPBAC_BAD_LEN; - } - - return ACT_VALIDATION_OK; -} - -/* Execute a built-in OpenFlow action against 'buffer'. */ -static void -execute_ofpat(struct ofpbuf *buffer, struct sw_flow_key *key, - const struct ofp_action_header *ah, uint16_t type) -{ - const struct openflow_action *act = &of_actions[type]; - - if (act->execute) { - act->execute(buffer, key, ah); - } -} - -/* Execute a vendor-defined action against 'buffer'. */ -static void -execute_vendor(struct ofpbuf *buffer, const struct sw_flow_key *key, - const struct ofp_action_header *ah) -{ - struct ofp_action_vendor_header *avh - = (struct ofp_action_vendor_header *)ah; - - switch(ntohl(avh->vendor)) { - case NX_VENDOR_ID: - nx_execute_act(buffer, key, avh); - break; - - default: - /* This should not be possible due to prior validation. */ - printf("attempt to execute action with unknown vendor: %#x\n", - ntohl(avh->vendor)); - break; - } -} - -/* Execute a list of actions against 'buffer'. */ -void execute_actions(struct datapath *dp, struct ofpbuf *buffer, - struct sw_flow_key *key, - const struct ofp_action_header *actions, size_t actions_len, - int ignore_no_fwd) -{ - /* Every output action needs a separate clone of 'buffer', but the common - * case is just a single output action, so that doing a clone and then - * freeing the original buffer is wasteful. So the following code is - * slightly obscure just to avoid that. */ - int prev_port; - size_t max_len=0; /* Initialze to make compiler happy */ - uint16_t in_port = ntohs(key->flow.in_port); - uint8_t *p = (uint8_t *)actions; - - prev_port = -1; - - /* The action list was already validated, so we can be a bit looser - * in our sanity-checking. */ - while (actions_len > 0) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = htons(ah->len); - - if (prev_port != -1) { - do_output(dp, ofpbuf_clone(buffer), in_port, max_len, - prev_port, ignore_no_fwd); - prev_port = -1; - } - - if (ah->type == htons(OFPAT_OUTPUT)) { - struct ofp_action_output *oa = (struct ofp_action_output *)p; - prev_port = ntohs(oa->port); - max_len = ntohs(oa->max_len); - } else { - uint16_t type = ntohs(ah->type); - - if (type < ARRAY_SIZE(of_actions)) { - execute_ofpat(buffer, key, ah, type); - } else if (type == OFPAT_VENDOR) { - execute_vendor(buffer, key, ah); - } - } - - p += len; - actions_len -= len; - } - if (prev_port != -1) { - do_output(dp, buffer, in_port, max_len, prev_port, ignore_no_fwd); - } else { - ofpbuf_delete(buffer); - } -} diff --git a/udatapath/nx_act.c b/udatapath/nx_act.c deleted file mode 100644 index 6ece0c9c..00000000 --- a/udatapath/nx_act.c +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -/* Functions for Nicira-extended actions. */ -#include "openflow/nicira-ext.h" -#include "nx_act.h" - -uint16_t -nx_validate_act(struct datapath *dp UNUSED, - const struct sw_flow_key *key UNUSED, - const struct ofp_action_vendor_header *avh UNUSED, - uint16_t len UNUSED) -{ - /* Nothing to validate yet */ - return OFPBAC_BAD_VENDOR_TYPE; -} - -void -nx_execute_act(struct ofpbuf *buffer UNUSED, - const struct sw_flow_key *key UNUSED, - const struct ofp_action_vendor_header *avh UNUSED) -{ - /* Nothing to execute yet */ -} - diff --git a/udatapath/nx_act.h b/udatapath/nx_act.h deleted file mode 100644 index 92d10654..00000000 --- a/udatapath/nx_act.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef NX_ACT_H -#define NX_ACT_H 1 - -#include "switch-flow.h" -#include "datapath.h" - - -uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh, uint16_t len); - -void nx_execute_act(struct ofpbuf *buffer, - const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh); - -#endif /* nx_act.h */ diff --git a/udatapath/nx_msg.c b/udatapath/nx_msg.c deleted file mode 100644 index 0d11e026..00000000 --- a/udatapath/nx_msg.c +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include -#include "openflow/nicira-ext.h" -#include "nx_msg.h" - -int nx_recv_msg(struct datapath *dp, const struct sender *sender, - const void *oh) -{ - const struct nicira_header *nh = oh; - - switch (ntohl(nh->subtype)) { - case NXT_FLOW_END_CONFIG: { - const struct nx_flow_end_config *nfec = oh; - dp->send_flow_end = nfec->enable; - return 0; - } - - default: - dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST, - OFPBRC_BAD_SUBTYPE, oh, ntohs(nh->header.length)); - return -EINVAL; - } - - return -EINVAL; -} diff --git a/udatapath/nx_msg.h b/udatapath/nx_msg.h deleted file mode 100644 index 4ed272a0..00000000 --- a/udatapath/nx_msg.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef NX_MSG_H -#define NX_MSG_H 1 - -#include "datapath.h" - -struct sender; - -int nx_recv_msg(struct datapath *, const struct sender *, const void *); - -#endif /* nx_msg.h */ diff --git a/udatapath/switch-flow.c b/udatapath/switch-flow.c deleted file mode 100644 index 79c7eff5..00000000 --- a/udatapath/switch-flow.c +++ /dev/null @@ -1,287 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "switch-flow.h" -#include -#include -#include -#include -#include "ofpbuf.h" -#include "openflow/openflow.h" -#include "openflow/nicira-ext.h" -#include "packets.h" -#include "timeval.h" - -/* Internal function used to compare fields in flow. */ -static inline int -flow_fields_match(const struct flow *a, const struct flow *b, uint16_t w, - uint32_t src_mask, uint32_t dst_mask) -{ - return ((w & OFPFW_IN_PORT || a->in_port == b->in_port) - && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan) - && (w & OFPFW_DL_SRC || eth_addr_equals(a->dl_src, b->dl_src)) - && (w & OFPFW_DL_DST || eth_addr_equals(a->dl_dst, b->dl_dst)) - && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type) - && !((a->nw_src ^ b->nw_src) & src_mask) - && !((a->nw_dst ^ b->nw_dst) & dst_mask) - && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto) - && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src) - && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst)); -} - -/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal - * modulo wildcards in 'b', zero otherwise. */ -inline int -flow_matches_1wild(const struct sw_flow_key *a, const struct sw_flow_key *b) -{ - return flow_fields_match(&a->flow, &b->flow, b->wildcards, - b->nw_src_mask, b->nw_dst_mask); -} - -/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal - * modulo wildcards in 'a' or 'b', zero otherwise. */ -inline int -flow_matches_2wild(const struct sw_flow_key *a, const struct sw_flow_key *b) -{ - return flow_fields_match(&a->flow, &b->flow, a->wildcards | b->wildcards, - a->nw_src_mask & b->nw_src_mask, - a->nw_dst_mask & b->nw_dst_mask); -} - -/* Returns nonzero if 't' (the table entry's key) and 'd' (the key - * describing the match) match, that is, if their fields are - * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the - * wildcards must match in both 't_key' and 'd_key'. Note that the - * table's wildcards are ignored unless 'strict' is set. */ -int -flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d, - int strict) -{ - if (strict && d->wildcards != t->wildcards) { - return 0; - } - return flow_matches_1wild(t, d); -} - -void -flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) -{ - to->wildcards = ntohl(from->wildcards) & OFPFW_ALL; - to->flow.reserved = 0; - to->flow.in_port = from->in_port; - to->flow.dl_vlan = from->dl_vlan; - memcpy(to->flow.dl_src, from->dl_src, ETH_ADDR_LEN); - memcpy(to->flow.dl_dst, from->dl_dst, ETH_ADDR_LEN); - to->flow.dl_type = from->dl_type; - - to->flow.nw_src = to->flow.nw_dst = to->flow.nw_proto = 0; - to->flow.tp_src = to->flow.tp_dst = 0; - -#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST) -#define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO) - if (to->wildcards & OFPFW_DL_TYPE) { - /* Can't sensibly match on network or transport headers if the - * data link type is unknown. */ - to->wildcards |= OFPFW_NW | OFPFW_TP; - } else if (from->dl_type == htons(ETH_TYPE_IP)) { - to->flow.nw_src = from->nw_src; - to->flow.nw_dst = from->nw_dst; - to->flow.nw_proto = from->nw_proto; - - if (to->wildcards & OFPFW_NW_PROTO) { - /* Can't sensibly match on transport headers if the network - * protocol is unknown. */ - to->wildcards |= OFPFW_TP; - } else if (from->nw_proto == IPPROTO_TCP - || from->nw_proto == IPPROTO_UDP - || from->nw_proto == IPPROTO_ICMP) { - to->flow.tp_src = from->tp_src; - to->flow.tp_dst = from->tp_dst; - } else { - /* Transport layer fields are undefined. Mark them as - * exact-match to allow such flows to reside in table-hash, - * instead of falling into table-linear. */ - to->wildcards &= ~OFPFW_TP; - } - } else { - /* Network and transport layer fields are undefined. Mark them - * as exact-match to allow such flows to reside in table-hash, - * instead of falling into table-linear. */ - to->wildcards &= ~(OFPFW_NW | OFPFW_TP); - } - - /* We set these late because code above adjusts to->wildcards. */ - to->nw_src_mask = flow_nw_bits_to_mask(to->wildcards, OFPFW_NW_SRC_SHIFT); - to->nw_dst_mask = flow_nw_bits_to_mask(to->wildcards, OFPFW_NW_DST_SHIFT); -} - -/* Allocates and returns a new flow with room for 'actions_len' actions. - * Returns the new flow or a null pointer on failure. */ -struct sw_flow * -flow_alloc(size_t actions_len) -{ - struct sw_flow_actions *sfa; - size_t size = sizeof *sfa + actions_len; - struct sw_flow *flow = malloc(sizeof *flow); - if (!flow) - return NULL; - - sfa = malloc(size); - if (!sfa) { - free(flow); - return NULL; - } - sfa->actions_len = actions_len; - flow->sf_acts = sfa; - return flow; -} - -/* Frees 'flow' immediately. */ -void -flow_free(struct sw_flow *flow) -{ - if (!flow) { - return; - } - free(flow->sf_acts); - free(flow); -} - -/* Copies 'actions' into a newly allocated structure for use by 'flow' - * and frees the structure that defined the previous actions. */ -void flow_replace_acts(struct sw_flow *flow, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_flow_actions *sfa; - int size = sizeof *sfa + actions_len; - - sfa = malloc(size); - if (unlikely(!sfa)) - return; - - sfa->actions_len = actions_len; - memcpy(sfa->actions, actions, actions_len); - - free(flow->sf_acts); - flow->sf_acts = sfa; - - return; -} - -/* Prints a representation of 'key' to the kernel log. */ -void -print_flow(const struct sw_flow_key *key) -{ - const struct flow *f = &key->flow; - printf("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x" - "->%02x:%02x:%02x:%02x:%02x:%02x " - "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n", - key->wildcards, ntohs(f->in_port), ntohs(f->dl_vlan), - f->dl_src[0], f->dl_src[1], f->dl_src[2], - f->dl_src[3], f->dl_src[4], f->dl_src[5], - f->dl_dst[0], f->dl_dst[1], f->dl_dst[2], - f->dl_dst[3], f->dl_dst[4], f->dl_dst[5], - ntohs(f->dl_type), - ((unsigned char *)&f->nw_src)[0], - ((unsigned char *)&f->nw_src)[1], - ((unsigned char *)&f->nw_src)[2], - ((unsigned char *)&f->nw_src)[3], - ((unsigned char *)&f->nw_dst)[0], - ((unsigned char *)&f->nw_dst)[1], - ((unsigned char *)&f->nw_dst)[2], - ((unsigned char *)&f->nw_dst)[3], - ntohs(f->tp_src), ntohs(f->tp_dst)); -} - -bool flow_timeout(struct sw_flow *flow) -{ - uint64_t now = time_msec(); - if (flow->idle_timeout != OFP_FLOW_PERMANENT - && now > flow->used + flow->idle_timeout * 1000) { - flow->reason = NXFER_IDLE_TIMEOUT; - return true; - } else if (flow->hard_timeout != OFP_FLOW_PERMANENT - && now > flow->created + flow->hard_timeout * 1000) { - flow->reason = NXFER_HARD_TIMEOUT; - return true; - } else { - return false; - } -} - -/* Returns nonzero if 'flow' contains an output action to 'out_port' or - * has the value OFPP_NONE. 'out_port' is in network-byte order. */ -int flow_has_out_port(struct sw_flow *flow, uint16_t out_port) -{ - struct sw_flow_actions *sf_acts = flow->sf_acts; - size_t actions_len = sf_acts->actions_len; - uint8_t *p = (uint8_t *)sf_acts->actions; - - if (out_port == htons(OFPP_NONE)) - return 1; - - while (actions_len > 0) { - struct ofp_action_header *ah = (struct ofp_action_header *)p; - size_t len = ntohs(ah->len); - - if (ah->type == htons(OFPAT_OUTPUT)) { - struct ofp_action_output *oa = (struct ofp_action_output *)p; - if (oa->port == out_port) { - return 1; - } - } - p += len; - actions_len -= len; - } - - return 0; -} - -void flow_used(struct sw_flow *flow, struct ofpbuf *buffer) -{ - flow->used = time_msec(); - - if (flow->key.flow.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = buffer->l3; - flow->ip_tos = nh->ip_tos; - - if (flow->key.flow.nw_proto == IP_TYPE_TCP) { - struct tcp_header *th = buffer->l4; - flow->tcp_flags |= TCP_FLAGS(th->tcp_ctl); - } - } - - flow->packet_count++; - flow->byte_count += buffer->size; -} diff --git a/udatapath/switch-flow.h b/udatapath/switch-flow.h deleted file mode 100644 index 5c5ab7b0..00000000 --- a/udatapath/switch-flow.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef SWITCH_FLOW_H -#define SWITCH_FLOW_H 1 - -#include -#include "openflow/openflow.h" -#include "flow.h" -#include "list.h" - -struct ofp_match; - -/* Identification data for a flow. */ -struct sw_flow_key { - struct flow flow; /* Flow data (in network byte order). */ - uint32_t wildcards; /* Wildcard fields (in host byte order). */ - uint32_t nw_src_mask; /* 1-bit in each significant nw_src bit. */ - uint32_t nw_dst_mask; /* 1-bit in each significant nw_dst bit. */ -}; - -struct sw_flow_actions { - size_t actions_len; - struct ofp_action_header actions[0]; -}; - -struct sw_flow { - struct sw_flow_key key; - - uint16_t priority; /* Only used on entries with wildcards. */ - uint16_t idle_timeout; /* Idle time before discarding (seconds). */ - uint16_t hard_timeout; /* Hard expiration time (seconds) */ - uint64_t used; /* Last used time. */ - uint64_t created; /* When the flow was created. */ - uint64_t packet_count; /* Number of packets seen. */ - uint64_t byte_count; /* Number of bytes seen. */ - uint8_t reason; /* Reason flow expired (one of NXFER_*). */ - - uint8_t tcp_flags; /* Union of seen TCP flags. */ - uint8_t ip_tos; /* IP TOS value. */ - - struct sw_flow_actions *sf_acts; - - /* Private to table implementations. */ - struct list node; - struct list iter_node; - unsigned long int serial; -}; - -int flow_matches_1wild(const struct sw_flow_key *, const struct sw_flow_key *); -int flow_matches_2wild(const struct sw_flow_key *, const struct sw_flow_key *); -int flow_matches_desc(const struct sw_flow_key *, const struct sw_flow_key *, - int); -int flow_has_out_port(struct sw_flow *flow, uint16_t out_port); -struct sw_flow *flow_alloc(size_t); -void flow_free(struct sw_flow *); -void flow_deferred_free(struct sw_flow *); -void flow_deferred_free_acts(struct sw_flow_actions *); -void flow_replace_acts(struct sw_flow *, const struct ofp_action_header *, - size_t); -void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from); - -void print_flow(const struct sw_flow_key *); -bool flow_timeout(struct sw_flow *flow); -void flow_used(struct sw_flow *flow, struct ofpbuf *buffer); - -#endif /* switch-flow.h */ diff --git a/udatapath/table-hash.c b/udatapath/table-hash.c deleted file mode 100644 index f713df5f..00000000 --- a/udatapath/table-hash.c +++ /dev/null @@ -1,430 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "table.h" -#include -#include -#include -#include "openflow/nicira-ext.h" -#include "crc32.h" -#include "datapath.h" -#include "flow.h" -#include "switch-flow.h" - -struct sw_table_hash { - struct sw_table swt; - struct crc32 crc32; - unsigned int n_flows; - unsigned int bucket_mask; /* Number of buckets minus 1. */ - struct sw_flow **buckets; -}; - -static struct sw_flow **find_bucket(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int crc = crc32_calculate(&th->crc32, key, - offsetof(struct sw_flow_key, wildcards)); - return &th->buckets[crc & th->bucket_mask]; -} - -static struct sw_flow *table_hash_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_flow *flow = *find_bucket(swt, key); - return flow && !flow_compare(&flow->key.flow, &key->flow) ? flow : NULL; -} - -static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - struct sw_flow **bucket; - int retval; - - if (flow->key.wildcards != 0) - return 0; - - bucket = find_bucket(swt, &flow->key); - if (*bucket == NULL) { - th->n_flows++; - *bucket = flow; - retval = 1; - } else { - struct sw_flow *old_flow = *bucket; - if (!flow_compare(&old_flow->key.flow, &flow->key.flow)) { - *bucket = flow; - flow_free(old_flow); - retval = 1; - } else { - retval = 0; - } - } - return retval; -} - -static int table_hash_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int count = 0; - - if (key->wildcards == 0) { - struct sw_flow **bucket = find_bucket(swt, key); - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count = 1; - } - } else { - unsigned int i; - - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count++; - } - } - } - return count; -} - -/* Caller must update n_flows. */ -static void -do_delete(struct sw_flow **bucket) -{ - flow_free(*bucket); - *bucket = NULL; -} - -/* Returns number of deleted flows. We ignore the priority - * argument, since all exact-match entries are the same (highest) - * priority. */ -static int table_hash_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - uint16_t priority UNUSED, int strict) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int count = 0; - - if (key->wildcards == 0) { - struct sw_flow **bucket = find_bucket(swt, key); - struct sw_flow *flow = *bucket; - if (flow && !flow_compare(&flow->key.flow, &key->flow) - && flow_has_out_port(flow, out_port)) { - dp_send_flow_end(dp, flow, NXFER_DELETE); - do_delete(bucket); - count = 1; - } - } else { - unsigned int i; - - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow && flow_matches_desc(&flow->key, key, strict) - && flow_has_out_port(flow, out_port)) { - dp_send_flow_end(dp, flow, NXFER_DELETE); - do_delete(bucket); - count++; - } - } - } - th->n_flows -= count; - return count; -} - -static void table_hash_timeout(struct sw_table *swt, struct list *deleted) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int i; - - for (i = 0; i <= th->bucket_mask; i++) { - struct sw_flow **bucket = &th->buckets[i]; - struct sw_flow *flow = *bucket; - if (flow && flow_timeout(flow)) { - list_push_back(deleted, &flow->node); - *bucket = NULL; - th->n_flows--; - } - } -} - -static void table_hash_destroy(struct sw_table *swt) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - unsigned int i; - for (i = 0; i <= th->bucket_mask; i++) { - if (th->buckets[i]) { - flow_free(th->buckets[i]); - } - } - free(th->buckets); - free(th); -} - -static int table_hash_iterate(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *private), - void *private) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - - if (position->private[0] > th->bucket_mask) - return 0; - - if (key->wildcards == 0) { - struct sw_flow *flow = table_hash_lookup(swt, key); - position->private[0] = -1; - if (!flow || !flow_has_out_port(flow, out_port)) { - return 0; - } - return callback(flow, private); - } else { - int i; - - for (i = position->private[0]; i <= th->bucket_mask; i++) { - struct sw_flow *flow = th->buckets[i]; - if (flow && flow_matches_1wild(&flow->key, key) - && flow_has_out_port(flow, out_port)) { - int error = callback(flow, private); - if (error) { - position->private[0] = i + 1; - return error; - } - } - } - return 0; - } -} - -static void table_hash_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_hash *th = (struct sw_table_hash *) swt; - stats->name = "hash"; - stats->wildcards = 0; /* No wildcards are supported. */ - stats->n_flows = th->n_flows; - stats->max_flows = th->bucket_mask + 1; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - -struct sw_table *table_hash_create(unsigned int polynomial, - unsigned int n_buckets) -{ - struct sw_table_hash *th; - struct sw_table *swt; - - th = malloc(sizeof *th); - if (th == NULL) - return NULL; - memset(th, '\0', sizeof *th); - - assert(!(n_buckets & (n_buckets - 1))); - th->buckets = calloc(n_buckets, sizeof *th->buckets); - if (th->buckets == NULL) { - printf("failed to allocate %u buckets\n", n_buckets); - free(th); - return NULL; - } - th->n_flows = 0; - th->bucket_mask = n_buckets - 1; - - swt = &th->swt; - swt->lookup = table_hash_lookup; - swt->insert = table_hash_insert; - swt->modify = table_hash_modify; - swt->delete = table_hash_delete; - swt->timeout = table_hash_timeout; - swt->destroy = table_hash_destroy; - swt->iterate = table_hash_iterate; - swt->stats = table_hash_stats; - - crc32_init(&th->crc32, polynomial); - - return swt; -} - -/* Double-hashing table. */ - -struct sw_table_hash2 { - struct sw_table swt; - struct sw_table *subtable[2]; -}; - -static struct sw_flow *table_hash2_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - int i; - - for (i = 0; i < 2; i++) { - struct sw_flow *flow = *find_bucket(t2->subtable[i], key); - if (flow && !flow_compare(&flow->key.flow, &key->flow)) - return flow; - } - return NULL; -} - -static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - - if (table_hash_insert(t2->subtable[0], flow)) - return 1; - return table_hash_insert(t2->subtable[1], flow); -} - -static int table_hash2_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - return (table_hash_modify(t2->subtable[0], key, priority, strict, - actions, actions_len) - + table_hash_modify(t2->subtable[1], key, priority, strict, - actions, actions_len)); -} - -static int table_hash2_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - uint16_t priority, int strict) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - return (table_hash_delete(dp, t2->subtable[0], key, out_port, - priority, strict) - + table_hash_delete(dp, t2->subtable[1], key, out_port, - priority, strict)); -} - -static void table_hash2_timeout(struct sw_table *swt, struct list *deleted) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - table_hash_timeout(t2->subtable[0], deleted); - table_hash_timeout(t2->subtable[1], deleted); -} - -static void table_hash2_destroy(struct sw_table *swt) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - table_hash_destroy(t2->subtable[0]); - table_hash_destroy(t2->subtable[1]); - free(t2); -} - -static int table_hash2_iterate(struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *), - void *private) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - int i; - - for (i = position->private[1]; i < 2; i++) { - int error = table_hash_iterate(t2->subtable[i], key, out_port, - position, callback, private); - if (error) { - return error; - } - position->private[0] = 0; - position->private[1]++; - } - return 0; -} - -static void table_hash2_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; - struct sw_table_stats substats[2]; - int i; - - for (i = 0; i < 2; i++) - table_hash_stats(t2->subtable[i], &substats[i]); - stats->name = "hash2"; - stats->wildcards = 0; /* No wildcards are supported. */ - stats->n_flows = substats[0].n_flows + substats[1].n_flows; - stats->max_flows = substats[0].max_flows + substats[1].max_flows; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - -struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, - unsigned int poly1, unsigned int buckets1) - -{ - struct sw_table_hash2 *t2; - struct sw_table *swt; - - t2 = malloc(sizeof *t2); - if (t2 == NULL) - return NULL; - memset(t2, '\0', sizeof *t2); - - t2->subtable[0] = table_hash_create(poly0, buckets0); - if (t2->subtable[0] == NULL) - goto out_free_t2; - - t2->subtable[1] = table_hash_create(poly1, buckets1); - if (t2->subtable[1] == NULL) - goto out_free_subtable0; - - swt = &t2->swt; - swt->lookup = table_hash2_lookup; - swt->insert = table_hash2_insert; - swt->modify = table_hash2_modify; - swt->delete = table_hash2_delete; - swt->timeout = table_hash2_timeout; - swt->destroy = table_hash2_destroy; - swt->iterate = table_hash2_iterate; - swt->stats = table_hash2_stats; - - return swt; - -out_free_subtable0: - table_hash_destroy(t2->subtable[0]); -out_free_t2: - free(t2); - return NULL; -} diff --git a/udatapath/table-linear.c b/udatapath/table-linear.c deleted file mode 100644 index ae960ad0..00000000 --- a/udatapath/table-linear.c +++ /dev/null @@ -1,245 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include "table.h" -#include -#include "flow.h" -#include "list.h" -#include "openflow/openflow.h" -#include "openflow/nicira-ext.h" -#include "switch-flow.h" -#include "datapath.h" - -struct sw_table_linear { - struct sw_table swt; - - unsigned int max_flows; - unsigned int n_flows; - struct list flows; - struct list iter_flows; - unsigned long int next_serial; -}; - -static struct sw_flow *table_linear_lookup(struct sw_table *swt, - const struct sw_flow_key *key) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) { - if (flow_matches_1wild(key, &flow->key)) - return flow; - } - return NULL; -} - -static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *f; - - /* Loop through the existing list of entries. New entries will - * always be placed behind those with equal priority. Just replace - * any flows that match exactly. - */ - LIST_FOR_EACH (f, struct sw_flow, node, &tl->flows) { - if (f->priority == flow->priority - && f->key.wildcards == flow->key.wildcards - && flow_matches_2wild(&f->key, &flow->key)) { - flow->serial = f->serial; - list_replace(&flow->node, &f->node); - list_replace(&flow->iter_node, &f->iter_node); - flow_free(f); - return 1; - } - - if (f->priority < flow->priority) - break; - } - - /* Make sure there's room in the table. */ - if (tl->n_flows >= tl->max_flows) { - return 0; - } - tl->n_flows++; - - /* Insert the entry immediately in front of where we're pointing. */ - flow->serial = tl->next_serial++; - list_insert(&f->node, &flow->node); - list_push_front(&tl->iter_flows, &flow->iter_node); - - return 1; -} - -static int table_linear_modify(struct sw_table *swt, - const struct sw_flow_key *key, uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - unsigned int count = 0; - - LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) { - if (flow_matches_desc(&flow->key, key, strict) - && (!strict || (flow->priority == priority))) { - flow_replace_acts(flow, actions, actions_len); - count++; - } - } - return count; -} - -static void -do_delete(struct sw_flow *flow) -{ - list_remove(&flow->node); - list_remove(&flow->iter_node); - flow_free(flow); -} - -static int table_linear_delete(struct datapath *dp, struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - uint16_t priority, int strict) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow, *n; - unsigned int count = 0; - - LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) { - if (flow_matches_desc(&flow->key, key, strict) - && flow_has_out_port(flow, out_port) - && (!strict || (flow->priority == priority))) { - dp_send_flow_end(dp, flow, NXFER_DELETE); - do_delete(flow); - count++; - } - } - tl->n_flows -= count; - return count; -} - -static void table_linear_timeout(struct sw_table *swt, struct list *deleted) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow, *n; - - LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) { - if (flow_timeout(flow)) { - list_remove(&flow->node); - list_remove(&flow->iter_node); - list_push_back(deleted, &flow->node); - tl->n_flows--; - } - } -} - -static void table_linear_destroy(struct sw_table *swt) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - - while (!list_is_empty(&tl->flows)) { - struct sw_flow *flow = CONTAINER_OF(list_front(&tl->flows), - struct sw_flow, node); - list_remove(&flow->node); - flow_free(flow); - } - free(tl); -} - -static int table_linear_iterate(struct sw_table *swt, - const struct sw_flow_key *key, - uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *, void *), - void *private) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - struct sw_flow *flow; - unsigned long start; - - start = ~position->private[0]; - LIST_FOR_EACH (flow, struct sw_flow, iter_node, &tl->iter_flows) { - if (flow->serial <= start - && flow_matches_2wild(key, &flow->key) - && flow_has_out_port(flow, out_port)) { - int error = callback(flow, private); - if (error) { - position->private[0] = ~(flow->serial - 1); - return error; - } - } - } - return 0; -} - -static void table_linear_stats(struct sw_table *swt, - struct sw_table_stats *stats) -{ - struct sw_table_linear *tl = (struct sw_table_linear *) swt; - stats->name = "linear"; - stats->wildcards = OFPFW_ALL; - stats->n_flows = tl->n_flows; - stats->max_flows = tl->max_flows; - stats->n_lookup = swt->n_lookup; - stats->n_matched = swt->n_matched; -} - - -struct sw_table *table_linear_create(unsigned int max_flows) -{ - struct sw_table_linear *tl; - struct sw_table *swt; - - tl = calloc(1, sizeof *tl); - if (tl == NULL) - return NULL; - - swt = &tl->swt; - swt->lookup = table_linear_lookup; - swt->insert = table_linear_insert; - swt->modify = table_linear_modify; - swt->delete = table_linear_delete; - swt->timeout = table_linear_timeout; - swt->destroy = table_linear_destroy; - swt->iterate = table_linear_iterate; - swt->stats = table_linear_stats; - - tl->max_flows = max_flows; - tl->n_flows = 0; - list_init(&tl->flows); - list_init(&tl->iter_flows); - tl->next_serial = 0; - - return swt; -} diff --git a/udatapath/table.h b/udatapath/table.h deleted file mode 100644 index 5d8bc662..00000000 --- a/udatapath/table.h +++ /dev/null @@ -1,144 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -/* Individual switching tables. Generally grouped together in a chain (see - * chain.h). */ - -#ifndef TABLE_H -#define TABLE_H 1 - -#include -#include -#include "datapath.h" - -struct sw_flow; -struct sw_flow_key; -struct ofp_action_header; -struct list; - -/* Table statistics. */ -struct sw_table_stats { - const char *name; /* Human-readable name. */ - uint32_t wildcards; /* Bitmap of OFPFW_* wildcards that are - supported by the table. */ - unsigned int n_flows; /* Number of active flows. */ - unsigned int max_flows; /* Flow capacity. */ - unsigned long int n_lookup; /* Number of packets looked up. */ - unsigned long int n_matched; /* Number of packets that have hit. */ -}; - -/* Position within an iteration of a sw_table. - * - * The contents are private to the table implementation, except that a position - * initialized to all-zero-bits represents the start of a table. */ -struct sw_table_position { - unsigned long private[4]; -}; - -/* A single table of flows. */ -struct sw_table { - /* The number of packets that have been looked up and matched, - * respecitvely. To make these 100% accurate, they should be atomic. - * However, we're primarily concerned about speed. */ - unsigned long long n_lookup; - unsigned long long n_matched; - - /* Searches 'table' for a flow matching 'key', which must not have any - * wildcard fields. Returns the flow if successful, a null pointer - * otherwise. */ - struct sw_flow *(*lookup)(struct sw_table *table, - const struct sw_flow_key *key); - - /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns - * 0 if successful or a negative error. Error can be due to an - * over-capacity table or because the flow is not one of the kind that - * the table accepts. - * - * If successful, 'flow' becomes owned by 'table', otherwise it is - * retained by the caller. */ - int (*insert)(struct sw_table *table, struct sw_flow *flow); - - /* Modifies the actions in 'table' that match 'key'. If 'strict' - * set, wildcards and priority must match. Returns the number of flows - * that were modified. */ - int (*modify)(struct sw_table *table, const struct sw_flow_key *key, - uint16_t priority, int strict, - const struct ofp_action_header *actions, size_t actions_len); - - /* Deletes from 'table' any and all flows that match 'key' from - * 'table'. If 'out_port' is not OFPP_NONE, then matching entries - * must have that port as an argument for an output action. If - * 'strict' is set, wildcards and priority must match. Returns the - * number of flows that were deleted. */ - int (*delete)(struct datapath *dp, struct sw_table *table, - const struct sw_flow_key *key, - uint16_t out_port, uint16_t priority, int strict); - - /* Performs timeout processing on all the flow entries in 'table'. - * Appends all the flow entries removed from 'table' to 'deleted' for the - * caller to free. */ - void (*timeout)(struct sw_table *table, struct list *deleted); - - /* Destroys 'table', which must not have any users. */ - void (*destroy)(struct sw_table *table); - - /* Iterates through the flow entries in 'table', passing each one - * matches 'key' and output port 'out_port' to 'callback'. The - * callback function should return 0 to continue iteration or a - * nonzero error code to stop. The iterator function returns either - * 0 if the table iteration completed or the value returned by the - * callback function otherwise. - * - * The iteration starts at 'position', which may be initialized to - * all-zero-bits to iterate from the beginning of the table. If the - * iteration terminates due to an error from the callback function, - * 'position' is updated to a value that can be passed back to the - * iterator function to resume iteration later with the following - * flow. */ - int (*iterate)(struct sw_table *table, - const struct sw_flow_key *key, uint16_t out_port, - struct sw_table_position *position, - int (*callback)(struct sw_flow *flow, void *private), - void *private); - - /* Dumps statistics for 'table' into 'stats'. */ - void (*stats)(struct sw_table *table, struct sw_table_stats *stats); -}; - -struct sw_table *table_hash_create(unsigned int polynomial, - unsigned int n_buckets); -struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, - unsigned int poly1, unsigned int buckets1); -struct sw_table *table_linear_create(unsigned int max_flows); - -#endif /* table.h */ diff --git a/udatapath/udatapath.8.in b/udatapath/udatapath.8.in deleted file mode 100644 index 0be9a4f6..00000000 --- a/udatapath/udatapath.8.in +++ /dev/null @@ -1,140 +0,0 @@ -.ds PN udatapath - -.TH udatapath 8 "May 2008" "OpenFlow" "OpenFlow Manual" - -.SH NAME -udatapath \- userspace implementation of datapath for OpenFlow switch - -.SH SYNOPSIS -.B udatapath -[\fIoptions\fR] -\fB-i\fR \fInetdev\fR[\fB,\fInetdev\fR].\|.\|. -\fImethod\fR [\fImethod\fR].\|.\|. - -.SH DESCRIPTION -The \fBudatapath\fR is a userspace implementation of an OpenFlow -datapath. It monitors one or more network device interfaces, -forwarding packets between them according to the entries in the flow -table that it maintains. When it is used with \fBsecchan\fR(8), to -connect the datapath to an OpenFlow controller, the combination is an -OpenFlow switch. - -For access to network devices, the udatapath program must normally run as -root. - -The mandatory \fImethod\fR argument specifies how \fBsecchan\fR(8) -communicates with \fBudatapath\fR, as a passive OpenFlow connection -method. Ordinarily \fImethod\fR takes the following form: - -.TP -\fBpunix:\fIfile\fR -Listens for connections on the Unix domain server socket named -\fIfile\fR. - -.PP -The following connection methods are also supported, but their use -would be unusual because \fBudatapath\fR and \fBsecchan\fR should run -on the same machine: - -.TP -\fBpssl:\fR[\fIport\fR] -Listens for SSL connections \fIport\fR (default: 976). The -\fB--private-key\fR, \fB--certificate\fR, and \fB--ca-cert\fR options -are mandatory when this form is used. (\fBofp\-pki\fR(8) does not set -up a suitable PKI for use with this option.) - -.TP -\fBptcp:\fR[\fIport\fR] -Listens for TCP connections from remote OpenFlow switches on -\fIport\fR (default: 975). - -.SH OPTIONS -.TP -\fB-i\fR, \fB--interfaces=\fR\fInetdev\fR[\fB,\fInetdev\fR].\|.\|. -Specifies each \fInetdev\fR (e.g., \fBeth0\fR) as a switch port. The -specified network devices should not have any configured IP addresses. -This option may be given any number of times to specify additional -network devices. - -.TP -\fB-L\fR, \fB--local-port=\fInetdev\fR -Specifies the network device to use as the userspace datapath's -``local port,'' which is a network device that \fBsecchan\fR(8) -bridges to the physical switch ports for use in in-band control. When -this option is not specified, the default is \fBtap:\fR, which causes -a new TAP virtual network device to be allocated with a default name -assigned by the kernel. To do the same, but assign a specific name -\fBname\fR to the TAP network device, specify the option as -\fB--local-port=tap:\fIname\fR. - -Either way, the existence of TAP devices created by \fBudatapath\fR is -temporary: they are destroyed when \fBudatapath\fR exits. If this is -undesirable, you may use \fBtunctl\fR(8) to create a persistent TAP -network device and then pass it to \fBudatapath\fR, like so: - -.RS -.IP 1. -Create a persistent TAP network device: \fBtunctl -t mytap\fR. (The -\fBtunctl\fR(8) utility is part of User Mode Linux. It is not -included with the OpenFlow reference implementation.) -.IP 2. -Invoke \fBudatapath\fR(8) using \fBmytap\fR, e.g. \fBudatapath ---local-port=mytap\fR .\|.\|. (Note the lack of \fBtap:\fR prefix on -the \fB--local-port\fR argument.) -.IP 3. -Invoke \fBsecchan\fR(8), etc., and use the switch as desired. -.IP 4. -When \fBsecchan\fR and \fBudatapath\fR have terminated and the TAP -network device is no longer needed, you may destroy it with: \fBtunctl --d mytap\fR -.RE - -.IP -It does not ordinarily make sense to specify the name of a physical -network device on \fB-L\fR or \fB--local-port\fR. - -.TP -\fB--no-local-port\fR -Do not provide a local port as part of the datapath. When this option -is used, the switch will not support in-band control. - -.TP -\fB-d\fR, \fB--datapath-id=\fIdpid\fR -Specifies the OpenFlow datapath ID (a 48-bit number that uniquely -identifies a controller) as \fIdpid\fR, which consists of exactly 12 -hex digits. Without this option, \fBudatapath\fR picks an ID randomly. - -.TP -\fB-p\fR, \fB--private-key=\fIprivkey.pem\fR -Specifies a PEM file containing the private key used as the datapath's -identity for SSL connections to \fBsecchan\fR(8). - -.TP -\fB-c\fR, \fB--certificate=\fIcert.pem\fR -Specifies a PEM file containing a certificate, signed by the -datapath's certificate authority (CA), that certifies the datapath's -private key to identify a trustworthy datapath. - -.TP -\fB-C\fR, \fB--ca-cert=\fIcacert.pem\fR -Specifies a PEM file containing the CA certificate used to verify that -the datapath is connected to a trustworthy secure channel. - -.so lib/daemon.man -.so lib/vlog.man -.so lib/common.man - -.SH BUGS -The userspace datapath's performance lags significantly behind that of -the kernel-based switch. It should only be used when the kernel-based -switch cannot be. - -On Linux, general-purpose support for VLAN tag rewriting is precluded -by the Linux kernel AF_PACKET implementation. - -.SH "SEE ALSO" - -.BR secchan (8), -.BR dpctl (8), -.BR controller (8), -.BR vlogconf (8). diff --git a/udatapath/udatapath.c b/udatapath/udatapath.c deleted file mode 100644 index 46932dae..00000000 --- a/udatapath/udatapath.c +++ /dev/null @@ -1,308 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "command-line.h" -#include "daemon.h" -#include "datapath.h" -#include "fault.h" -#include "openflow/openflow.h" -#include "poll-loop.h" -#include "queue.h" -#include "util.h" -#include "rconn.h" -#include "timeval.h" -#include "vconn.h" -#include "dirs.h" -#include "vconn-ssl.h" -#include "vlog-socket.h" - -#define THIS_MODULE VLM_udatapath -#include "vlog.h" - -/* Strings to describe the manufacturer, hardware, and software. This data - * is queriable through the switch description stats message. */ -char mfr_desc[DESC_STR_LEN] = "Nicira Networks"; -char hw_desc[DESC_STR_LEN] = "Reference User-Space Switch"; -char sw_desc[DESC_STR_LEN] = VERSION BUILDNR; -char serial_num[SERIAL_NUM_LEN] = "None"; - -static void parse_options(int argc, char *argv[]); -static void usage(void) NO_RETURN; - -static struct datapath *dp; -static uint64_t dpid = UINT64_MAX; -static char *port_list; -static char *local_port = "tap:"; - -static void add_ports(struct datapath *dp, char *port_list); - -int -main(int argc, char *argv[]) -{ - int n_listeners; - int error; - int i; - - set_program_name(argv[0]); - register_fault_handlers(); - time_init(); - vlog_init(); - parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); - - if (argc - optind < 1) { - ofp_fatal(0, "at least one listener argument is required; " - "use --help for usage"); - } - - error = dp_new(&dp, dpid); - - n_listeners = 0; - for (i = optind; i < argc; i++) { - const char *pvconn_name = argv[i]; - struct pvconn *pvconn; - int retval; - - retval = pvconn_open(pvconn_name, &pvconn); - if (!retval || retval == EAGAIN) { - dp_add_pvconn(dp, pvconn); - n_listeners++; - } else { - ofp_error(retval, "opening %s", pvconn_name); - } - } - if (!n_listeners) { - ofp_fatal(0, "could not listen for any connections"); - } - - if (port_list) { - add_ports(dp, port_list); - } - if (local_port) { - error = dp_add_local_port(dp, local_port); - if (error) { - ofp_fatal(error, "failed to add local port %s", local_port); - } - } - - error = vlog_server_listen(NULL, NULL); - if (error) { - ofp_fatal(error, "could not listen for vlog connections"); - } - - die_if_already_running(); - daemonize(); - - for (;;) { - dp_run(dp); - dp_wait(dp); - poll_block(); - } - - return 0; -} - -static void -add_ports(struct datapath *dp, char *port_list) -{ - char *port, *save_ptr; - - /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that - * can cause segfaults here: - * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. - * Using ",," instead of the obvious "," works around it. */ - for (port = strtok_r(port_list, ",,", &save_ptr); port; - port = strtok_r(NULL, ",,", &save_ptr)) { - int error = dp_add_port(dp, port); - if (error) { - ofp_fatal(error, "failed to add port %s", port); - } - } -} - -static void -parse_options(int argc, char *argv[]) -{ - enum { - OPT_MFR_DESC = UCHAR_MAX + 1, - OPT_HW_DESC, - OPT_SW_DESC, - OPT_SERIAL_NUM, - OPT_BOOTSTRAP_CA_CERT, - OPT_NO_LOCAL_PORT - }; - - static struct option long_options[] = { - {"interfaces", required_argument, 0, 'i'}, - {"local-port", required_argument, 0, 'L'}, - {"no-local-port", no_argument, 0, OPT_NO_LOCAL_PORT}, - {"datapath-id", required_argument, 0, 'd'}, - {"verbose", optional_argument, 0, 'v'}, - {"help", no_argument, 0, 'h'}, - {"version", no_argument, 0, 'V'}, - {"mfr-desc", required_argument, 0, OPT_MFR_DESC}, - {"hw-desc", required_argument, 0, OPT_HW_DESC}, - {"sw-desc", required_argument, 0, OPT_SW_DESC}, - {"serial_num", required_argument, 0, OPT_SERIAL_NUM}, - DAEMON_LONG_OPTIONS, -#ifdef HAVE_OPENSSL - VCONN_SSL_LONG_OPTIONS - {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT}, -#endif - {0, 0, 0, 0}, - }; - char *short_options = long_options_to_short_options(long_options); - - for (;;) { - int indexptr; - int c; - - c = getopt_long(argc, argv, short_options, long_options, &indexptr); - if (c == -1) { - break; - } - - switch (c) { - case 'd': - if (strlen(optarg) != 12 - || strspn(optarg, "0123456789abcdefABCDEF") != 12) { - ofp_fatal(0, "argument to -d or --datapath-id must be " - "exactly 12 hex digits"); - } - dpid = strtoll(optarg, NULL, 16); - if (!dpid) { - ofp_fatal(0, "argument to -d or --datapath-id must " - "be nonzero"); - } - break; - - case 'h': - usage(); - - case 'V': - printf("%s %s compiled "__DATE__" "__TIME__"\n", - program_name, VERSION BUILDNR); - exit(EXIT_SUCCESS); - - case 'v': - vlog_set_verbosity(optarg); - break; - - case 'i': - if (!port_list) { - port_list = optarg; - } else { - port_list = xasprintf("%s,%s", port_list, optarg); - } - break; - - case 'L': - local_port = optarg; - break; - - case OPT_NO_LOCAL_PORT: - local_port = NULL; - break; - - case OPT_MFR_DESC: - strncpy(mfr_desc, optarg, sizeof mfr_desc); - break; - - case OPT_HW_DESC: - strncpy(hw_desc, optarg, sizeof hw_desc); - break; - - case OPT_SW_DESC: - strncpy(sw_desc, optarg, sizeof sw_desc); - break; - - case OPT_SERIAL_NUM: - strncpy(serial_num, optarg, sizeof serial_num); - break; - - DAEMON_OPTION_HANDLERS - -#ifdef HAVE_OPENSSL - VCONN_SSL_OPTION_HANDLERS - - case OPT_BOOTSTRAP_CA_CERT: - vconn_ssl_set_ca_cert_file(optarg, true); - break; -#endif - - case '?': - exit(EXIT_FAILURE); - - default: - abort(); - } - } - free(short_options); -} - -static void -usage(void) -{ - printf("%s: userspace OpenFlow datapath\n" - "usage: %s [OPTIONS] LISTEN...\n" - "where LISTEN is a passive OpenFlow connection method on which\n" - "to listen for incoming connections from the secure channel.\n", - program_name, program_name); - vconn_usage(false, true, false); - printf("\nConfiguration options:\n" - " -i, --interfaces=NETDEV[,NETDEV]...\n" - " add specified initial switch ports\n" - " -L, --local-port=NETDEV set network device for local port\n" - " --no-local-port disable local port\n" - " -d, --datapath-id=ID Use ID as the OpenFlow switch ID\n" - " (ID must consist of 12 hex digits)\n" - "\nOther options:\n" - " -D, --detach run in background as daemon\n" - " -P, --pidfile[=FILE] create pidfile (default: %s/udatapath.pid)\n" - " -f, --force with -P, start even if already running\n" - " -v, --verbose=MODULE[:FACILITY[:LEVEL]] set logging levels\n" - " -v, --verbose set maximum verbosity level\n" - " -h, --help display this help message\n" - " -V, --version display version information\n", - ofp_rundir); - exit(EXIT_SUCCESS); -} diff --git a/utilities/automake.mk b/utilities/automake.mk index d6f79a8c..8373433c 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -3,6 +3,7 @@ bin_PROGRAMS += \ utilities/dpctl \ utilities/ofp-discover \ utilities/ofp-kill +noinst_PROGRAMS += utilities/nlmon bin_SCRIPTS += utilities/ofp-pki noinst_SCRIPTS += utilities/ofp-pki-cgi utilities/ofp-parse-leaks @@ -43,3 +44,6 @@ utilities_ofp_discover_LDADD = lib/libopenflow.a utilities_ofp_kill_SOURCES = utilities/ofp-kill.c utilities_ofp_kill_LDADD = lib/libopenflow.a + +utilities_nlmon_SOURCES = utilities/nlmon.c +utilities_nlmon_LDADD = lib/libopenflow.a diff --git a/utilities/dpctl.c b/utilities/dpctl.c index 94283c73..1ba6580d 100644 --- a/utilities/dpctl.c +++ b/utilities/dpctl.c @@ -44,18 +44,14 @@ #include #include -#ifdef HAVE_NETLINK -#include "netdev.h" -#include "netlink.h" -#include "openflow/openflow-netlink.h" -#endif - #include "command-line.h" #include "compiler.h" #include "dpif.h" -#include "openflow/nicira-ext.h" +#include "netdev.h" +#include "netlink.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "packets.h" #include "random.h" @@ -211,16 +207,15 @@ usage(void) { printf("%s: OpenFlow switch management utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" -#ifdef HAVE_NETLINK - "\nFor local datapaths only:\n" - " adddp nl:DP_ID add a new local datapath DP_ID\n" - " deldp nl:DP_ID delete local datapath DP_ID\n" - " addif nl:DP_ID IFACE... add each IFACE as a port on DP_ID\n" - " delif nl:DP_ID IFACE... delete each IFACE from DP_ID\n" - " get-idx OF_DEV get datapath index for OF_DEV\n" -#endif - "\nFor local datapaths and remote switches:\n" - " show SWITCH show basic information\n" + "\nFor local datapaths:\n" + " adddp DP add a new local datapath DP\n" + " deldp DP delete local datapath DP\n" + " addif DP IFACE... add each IFACE as a port on DP\n" + " delif DP IFACE... delete each IFACE from DP\n" + " showdp show basic info on all datapaths\n" + " showdp DP... show basic info on each DP\n" + "\nFor OpenFlow switches:\n" + " show SWITCH show OpenFlow information\n" " status SWITCH [KEY] report statistics (about KEY)\n" " dump-desc SWITCH print switch description\n" " dump-tables SWITCH print table stats\n" @@ -240,7 +235,7 @@ usage(void) " del-flows SWITCH [FLOW] delete matching FLOWs\n" " monitor SWITCH print packets received from SWITCH\n" " execute SWITCH CMD [ARG...] execute CMD with ARGS on SWITCH\n" - "\nFor local datapaths, remote switches, and controllers:\n" + "\nFor OpenFlow switches and controllers:\n" " probe VCONN probe whether VCONN is up\n" " ping VCONN [N] latency of N-byte echos\n" " benchmark VCONN N COUNT bandwidth of COUNT N-byte echos\n" @@ -278,9 +273,6 @@ static void run(int retval, const char *message, ...) } } -#ifdef HAVE_NETLINK -/* Netlink-only commands. */ - static int if_up(const char *netdev_name) { struct netdev *netdev; @@ -297,36 +289,17 @@ static int if_up(const char *netdev_name) static void do_get_idx(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) { - int dp_idx; - struct dpif dpif; - run(dpif_open(-1, &dpif), "opening management socket"); - dp_idx = dpif_get_idx(argv[1]); - if (dp_idx == -1) { - dpif_close(&dpif); - ofp_fatal(0, "unknown OpenFlow device: %s", argv[1]); - } - printf("%d\n", dp_idx); + run(dpif_open(argv[1], &dpif), "opening datapath"); + printf("%u\n", dpif.minor); dpif_close(&dpif); } -static int -get_dp_idx(const char *name) -{ - if (strncmp(name, "nl:", 3) - || strlen(name) < 4 - || name[strspn(name + 3, "0123456789") + 3]) { - ofp_fatal(0, "%s: argument is not of the form \"nl:DP_ID\"", name); - } - return atoi(name + 3); -} - static void do_add_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) { struct dpif dpif; - run(dpif_open(-1, &dpif), "opening management socket"); - run(dpif_add_dp(&dpif, get_dp_idx(argv[1]), NULL), "add_dp"); + run(dpif_create(argv[1], &dpif), "add_dp"); dpif_close(&dpif); } @@ -334,28 +307,81 @@ static void do_del_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) { struct dpif dpif; - run(dpif_open(-1, &dpif), "opening management socket"); - run(dpif_del_dp(&dpif, get_dp_idx(argv[1]), NULL), "del_dp"); + run(dpif_open(argv[1], &dpif), "opening datapath"); + run(dpif_delete(&dpif), "del_dp"); dpif_close(&dpif); } -static void add_del_ports(int argc UNUSED, char *argv[], - int (*function)(struct dpif *, int dp_idx, - const char *netdev), - const char *operation, const char *preposition) +static int +compare_ports(const void *a_, const void *b_) +{ + const struct odp_port *a = a_; + const struct odp_port *b = b_; + return a->port < b->port ? -1 : a->port > b->port; +} + +static void +query_ports(struct dpif *dpif, struct odp_port **ports, size_t *n_ports) +{ + run(dpif_port_list(dpif, ports, n_ports), "listing ports"); + qsort(*ports, *n_ports, sizeof **ports, compare_ports); +} + +static uint16_t +get_free_port(struct dpif *dpif) +{ + struct odp_port *ports; + size_t n_ports; + int port_no; + + query_ports(dpif, &ports, &n_ports); + for (port_no = 0; port_no <= UINT16_MAX; port_no++) { + size_t i; + for (i = 0; i < n_ports; i++) { + if (ports[i].port == port_no) { + goto next_portno; + } + } + free(ports); + return port_no; + + next_portno: ; + } + ofp_fatal(0, "no free datapath ports"); +} + +static void +do_add_port(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) { bool failure = false; struct dpif dpif; - int dp_idx; int i; - run(dpif_open(-1, &dpif), "opening management socket"); - dp_idx = get_dp_idx(argv[1]); + run(dpif_open(argv[1], &dpif), "opening datapath"); for (i = 2; i < argc; i++) { - int retval = function(&dpif, dp_idx, argv[i]); - if (retval) { - ofp_error(retval, "failed to %s %s %s %s", - operation, argv[i], preposition, argv[1]); + char *save_ptr = NULL; + char *devname, *port_s; + uint16_t port; + int error; + + devname = strtok_r(argv[i], "@@", &save_ptr); + if (!devname) { + ofp_error(0, "%s is not a valid network device name", argv[i]); + continue; + } + + if (if_up(devname)) { + failure = true; + continue; + } + + port_s = strtok_r(NULL, "", &save_ptr); + port = port_s ? atoi(port_s) : get_free_port(&dpif); + + error = dpif_port_add(&dpif, devname, port); + if (error) { + ofp_error(error, "adding %s as port %"PRIu16" of %s failed", + devname, port, argv[1]); failure = true; } } @@ -365,24 +391,115 @@ static void add_del_ports(int argc UNUSED, char *argv[], } } -static int ifup_and_add_port(struct dpif *dpif, int dp_idx, const char *netdev) +static bool +get_port_number(struct dpif *dpif, const char *name, uint16_t *port) { - int retval = if_up(netdev); - return retval ? retval : dpif_add_port(dpif, dp_idx, netdev); + struct odp_port *ports; + size_t n_ports; + size_t i; + + query_ports(dpif, &ports, &n_ports); + for (i = 0; i < n_ports; i++) { + if (!strcmp(name, ports[i].devname)) { + *port = ports[i].port; + free(ports); + return true; + } + } + free(ports); + ofp_error(0, "no port named %s", name); + return false; } -static void do_add_port(const struct settings *s UNUSED, int argc UNUSED, - char *argv[]) +static void +do_del_port(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) { - add_del_ports(argc, argv, ifup_and_add_port, "add", "to"); + bool failure = false; + struct dpif dpif; + int i; + + run(dpif_open(argv[1], &dpif), "opening datapath"); + for (i = 2; i < argc; i++) { + const char *name = argv[i]; + uint16_t port; + int error; + + if (!name[strspn(name, "0123456789")]) { + port = atoi(name); + } else if (!get_port_number(&dpif, name, &port)) { + failure = true; + continue; + } + + error = dpif_port_del(&dpif, port); + if (error) { + ofp_error(error, "deleting port %s from %s failed", name, argv[1]); + failure = true; + } + } + dpif_close(&dpif); + if (failure) { + exit(EXIT_FAILURE); + } } -static void do_del_port(const struct settings *s UNUSED, int argc UNUSED, - char *argv[]) +static void +show_dpif(struct dpif *dpif) { - add_del_ports(argc, argv, dpif_del_port, "remove", "from"); + struct odp_port *ports; + size_t n_ports; + size_t i; + + printf("datapath %u:\n", dpif->minor); + query_ports(dpif, &ports, &n_ports); + for (i = 0; i < n_ports; i++) { + printf("\tport %u: %s\n", ports[i].port, ports[i].devname); + } + free(ports); + dpif_close(dpif); } -#endif /* HAVE_NETLINK */ + +static void +do_show_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) +{ + bool failure = false; + if (argc > 1) { + int i; + for (i = 1; i < argc; i++) { + const char *name = argv[i]; + struct dpif dpif; + int error; + + error = dpif_open(name, &dpif); + if (!error) { + show_dpif(&dpif); + } else { + ofp_error(error, "opening datapath %s failed", name); + failure = true; + } + } + } else { + unsigned int i; + for (i = 0; i < ODP_MAX; i++) { + char name[128]; + struct dpif dpif; + int error; + + sprintf(name, "dp%u", i); + error = dpif_open(name, &dpif); + if (!error) { + show_dpif(&dpif); + } else if (error != ENODEV) { + ofp_error(error, "opening datapath %s failed", name); + failure = true; + } + } + } + if (failure) { + exit(EXIT_FAILURE); + } +} + /* Generic commands. */ @@ -1370,6 +1487,7 @@ static struct command all_commands[] = { { "addif", 2, INT_MAX, do_add_port }, { "delif", 2, INT_MAX, do_del_port }, { "get-idx", 1, 1, do_get_idx }, + { "showdp", 0, INT_MAX, do_show_dp }, #endif { "show", 1, 1, do_show }, diff --git a/utilities/nlmon.c b/utilities/nlmon.c new file mode 100644 index 00000000..78801b7e --- /dev/null +++ b/utilities/nlmon.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "netlink.h" +#include "ofpbuf.h" +#include "poll-loop.h" +#include "timeval.h" +#include "util.h" +#include "vlog.h" + +static const struct nl_policy rtnlgrp_link_policy[] = { + [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, + [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, +}; + +int +main(int argc UNUSED, char *argv[]) +{ + struct nl_sock *sock; + int error; + + set_program_name(argv[0]); + time_init(); + vlog_init(); + vlog_set_levels(VLM_ANY_MODULE, VLF_ANY_FACILITY, VLL_DBG); + + error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &sock); + if (error) { + ofp_fatal(error, "could not create rtnetlink socket"); + } + + for (;;) { + struct ofpbuf *buf; + + error = nl_sock_recv(sock, &buf, false); + if (error == EAGAIN) { + /* Nothing to do. */ + } else if (error == ENOBUFS) { + ofp_error(0, "network monitor socket overflowed"); + } else if (error) { + ofp_fatal(error, "error on network monitor socket"); + } else { + struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; + struct nlmsghdr *nlh; + struct ifinfomsg *iim; + + nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); + iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); + if (!iim) { + ofp_error(0, "received bad rtnl message (no ifinfomsg)"); + ofpbuf_delete(buf); + continue; + } + + if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), + rtnlgrp_link_policy, + attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { + ofp_error(0, "received bad rtnl message (policy)"); + ofpbuf_delete(buf); + continue; + } + printf("netdev %s changed (%s):\n", + nl_attr_get_string(attrs[IFLA_IFNAME]), + (nlh->nlmsg_type == RTM_NEWLINK ? "RTM_NEWLINK" + : nlh->nlmsg_type == RTM_DELLINK ? "RTM_DELLINK" + : nlh->nlmsg_type == RTM_GETLINK ? "RTM_GETLINK" + : nlh->nlmsg_type == RTM_SETLINK ? "RTM_SETLINK" + : "other")); + if (attrs[IFLA_MASTER]) { + uint32_t idx = nl_attr_get_u32(attrs[IFLA_MASTER]); + char ifname[IFNAMSIZ]; + if (!if_indextoname(idx, ifname)) { + strcpy(ifname, "unknown"); + } + printf("\tmaster=%"PRIu32" (%s)\n", idx, ifname); + } + ofpbuf_delete(buf); + } + + nl_sock_wait(sock, POLLIN); + poll_block(); + } +} + diff --git a/vswitchd/brcompat.c b/vswitchd/brcompat.c index 439d9a80..6e98c29d 100644 --- a/vswitchd/brcompat.c +++ b/vswitchd/brcompat.c @@ -64,9 +64,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Used for creating kernel datapaths */ -static struct dpif mgmt_dpif; - /* Netlink socket to kernel datapath */ struct nl_sock *nl_sock; @@ -444,9 +441,5 @@ brc_init(const char *file_name) "\"brcompat\" kernel module."); } - if (dpif_open(-1, &mgmt_dpif) != 0) { - ofp_fatal(0, "could not open datapath interface"); - } - config_name = file_name; } diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 7ff6c66c..9c89d3ff 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -169,6 +169,7 @@ struct bridge { struct rconn *rconn; /* Connection to secchan subprocess. */ /* Kernel datapath information. */ + struct dpif dpif; /* Kernel datapath. */ int dp_idx; /* Kernel datapath index. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ @@ -197,12 +198,8 @@ struct bridge { /* List of all bridges. */ static struct list all_bridges = LIST_INITIALIZER(&all_bridges); -/* Each value is true if the corresponding datapath has been created, - * false otherwise.*/ -static bool in_use_dps[DP_MAX]; - -/* Used for creating and destroying kernel datapaths, etc. */ -static struct dpif mgmt_dpif; +/* Maximum number of datapaths. */ +enum { DP_MAX = 256 }; static struct bridge *bridge_create(const char *name); static void bridge_destroy(struct bridge *); @@ -212,7 +209,7 @@ static void bridge_run_one(struct bridge *); static void bridge_reconfigure_one(struct bridge *); static void bridge_get_all_ifaces(const struct bridge *, struct svec *ifaces); static bool bridge_is_backlogged(const struct bridge *); -static int bridge_fetch_dp_ifaces(struct bridge *, struct svec *iface_names); +static void bridge_fetch_dp_ifaces(struct bridge *); static void bridge_flush(struct bridge *); static void bridge_process_msg(struct bridge *, struct ofpbuf *); @@ -241,7 +238,7 @@ static void brstp_reconfigure(struct bridge *); static void brstp_adjust_timers(struct bridge *); static void brstp_run(struct bridge *); static void brstp_wait(struct bridge *); -static void brstp_receive(struct bridge *, const struct flow *, +static void brstp_receive(struct bridge *, const flow_t *, const struct ofpbuf *); static void iface_create(struct port *, const char *name); @@ -256,16 +253,18 @@ void bridge_init(void) { int retval; - size_t i; - - retval = dpif_open(-1, &mgmt_dpif); - if (retval) { - ofp_fatal(retval, "could not create datapath management socket"); - } + int i; for (i = 0; i < DP_MAX; i++) { - int retval = dpif_del_dp(&mgmt_dpif, i, NULL); - if (retval && retval != ENOENT) { + struct dpif dpif; + char devname[16]; + + sprintf(devname, "dp%d", i); + retval = dpif_open(devname, &dpif); + if (!retval) { + dpif_delete(&dpif); + dpif_close(&dpif); + } else if (retval != ENODEV) { VLOG_ERR("failed to delete datapath nl:%d: %s", i, strerror(retval)); } @@ -317,49 +316,69 @@ bridge_reconfigure(void) * that port already belongs to a different datapath, so we must do all * port deletions before any port additions. */ LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { - struct svec cur_ifaces, want_ifaces, del_ifaces; + struct odp_port *dpif_ports; + size_t n_dpif_ports; + struct svec want_ifaces; - bridge_fetch_dp_ifaces(br, &cur_ifaces); + dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); bridge_get_all_ifaces(br, &want_ifaces); - svec_diff(&want_ifaces, &cur_ifaces, NULL, NULL, &del_ifaces); - for (i = 0; i < del_ifaces.n; i++) { - const char *if_name = del_ifaces.names[i]; - if (strcmp(if_name, br->name)) { - int retval = dpif_del_port(&mgmt_dpif, br->dp_idx, if_name); + for (i = 0; i < n_dpif_ports; i++) { + const struct odp_port *p = &dpif_ports[i]; + if (!svec_contains(&want_ifaces, p->devname) + && strcmp(p->devname, br->name)) { + int retval = dpif_port_del(&br->dpif, p->port); if (retval) { VLOG_ERR("failed to remove %s interface from nl:%d: %s", - if_name, br->dp_idx, strerror(retval)); + p->devname, br->dp_idx, strerror(retval)); } - } else { - /* Can't remove local port. */ } } - svec_destroy(&cur_ifaces); svec_destroy(&want_ifaces); - svec_destroy(&del_ifaces); + free(dpif_ports); } LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + struct odp_port *dpif_ports; + size_t n_dpif_ports; struct svec cur_ifaces, want_ifaces, add_ifaces; + int next_port_no; - bridge_fetch_dp_ifaces(br, &cur_ifaces); + dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); + svec_init(&cur_ifaces); + for (i = 0; i < n_dpif_ports; i++) { + svec_add(&cur_ifaces, dpif_ports[i].devname); + } + free(dpif_ports); + svec_sort_unique(&cur_ifaces); bridge_get_all_ifaces(br, &want_ifaces); svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL); + + next_port_no = 0; for (i = 0; i < add_ifaces.n; i++) { const char *if_name = add_ifaces.names[i]; - int retval; if_up(if_name); - retval = dpif_add_port(&mgmt_dpif, br->dp_idx, if_name); - if (retval) { - VLOG_ERR("failed to add %s interface to nl:%d: %s", - if_name, br->dp_idx, strerror(retval)); + for (;;) { + int error = dpif_port_add(&br->dpif, if_name, next_port_no++); + if (error != EEXIST) { + if (next_port_no >= 256) { + VLOG_ERR("ran out of valid port numbers on nl:%d", + br->dp_idx); + goto out; + } + if (error) { + VLOG_ERR("failed to add %s interface to nl:%d: %s", + if_name, br->dp_idx, strerror(error)); + } + break; + } } } + out: svec_destroy(&cur_ifaces); svec_destroy(&want_ifaces); svec_destroy(&add_ifaces); } LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { - bridge_fetch_dp_ifaces(br, NULL); + bridge_fetch_dp_ifaces(br); for (i = 0; i < br->n_ports; ) { struct port *port = br->ports[i]; for (j = 0; j < port->n_ifaces; ) { @@ -382,7 +401,6 @@ bridge_reconfigure(void) } } } - LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { brstp_reconfigure(br); } @@ -441,7 +459,6 @@ bridge_flush(struct bridge *br) /* Bridge reconfiguration functions. */ -static void sanitize_opp(struct ofp_phy_port *opp); static void run_secchan(struct bridge *); static void start_secchan(struct bridge *); @@ -449,7 +466,7 @@ static struct bridge * bridge_create(const char *name) { struct bridge *br; - int retval; + int error; assert(!bridge_lookup(name)); br = xcalloc(1, sizeof *br); @@ -476,21 +493,17 @@ bridge_create(const char *name) br->stats_mgr = stats_mgr_create(br->rconn); /* Create kernel datapath. */ - retval = dpif_add_dp(&mgmt_dpif, -1, br->name); - if (retval) { + error = dpif_create(br->name, &br->dpif); + if (error) { VLOG_ERR("failed to create datapath %s: %s", - br->name, strerror(retval)); + br->name, strerror(error)); free(br); return NULL; } list_push_back(&all_bridges, &br->node); - br->dp_idx = dpif_get_idx(br->name); - if (br->dp_idx == -1) { - VLOG_WARN("bad dp_idx for bridge %s", br->name); - } - + br->dp_idx = br->dpif.minor; VLOG_INFO("created bridge %s with dp_idx %d", br->name, br->dp_idx); return br; @@ -692,11 +705,8 @@ bridge_destroy(struct bridge *br) } list_remove(&br->node); if (br->dp_idx >= 0) { - int retval = dpif_del_dp(&mgmt_dpif, br->dp_idx, NULL); - if (!retval || retval == ENOENT) { - assert(br->dp_idx < DP_MAX); - in_use_dps[br->dp_idx] = false; - } else { + int retval = dpif_delete(&br->dpif); + if (retval && retval != ENOENT) { VLOG_ERR("failed to delete datapath nl:%d: %s", br->dp_idx, strerror(retval)); } @@ -968,24 +978,14 @@ bridge_is_backlogged(const struct bridge *br) return br->txqlen >= 100; } -/* The kernel interface to add ports doesn't report what port numbers they were - * assigned (XXX), so now we have to connect to the datapath and use a feature - * request to obtain the port numbers. */ -static int -bridge_fetch_dp_ifaces(struct bridge *br, struct svec *iface_names) -{ - char *vconn_name; - struct vconn *vconn = NULL; - struct ofpbuf *request; - struct ofpbuf *reply = NULL; - struct ofp_switch_features *osf; - size_t n_ports; +/* For robustness, in case the administrator moves around datapath ports behind + * our back, we re-check all the datapath port numbers here. */ +static void +bridge_fetch_dp_ifaces(struct bridge *br) +{ + struct odp_port *dpif_ports; + size_t n_dpif_ports; size_t i, j; - int retval; - - if (iface_names) { - svec_init(iface_names); - } /* Reset all interface numbers. */ for (i = 0; i < br->n_ports; i++) { @@ -997,91 +997,25 @@ bridge_fetch_dp_ifaces(struct bridge *br, struct svec *iface_names) } port_array_clear(&br->ifaces); - /* Open connection to datapath. */ - vconn_name = xasprintf("nl:%d", br->dp_idx); - retval = vconn_open_block(vconn_name, OFP_VERSION, &vconn); - free(vconn_name); - if (retval) { - VLOG_ERR("could not open connection to nl:%d: %s", - br->dp_idx, strerror(retval)); - goto done; - } - - /* Send request, receive reply. */ - make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &request); - retval = vconn_transact(vconn, request, &reply); - if (retval) { - if (retval == EOF) { - VLOG_ERR("unexpected connection close talking to nl:%d", - br->dp_idx); - } else { - VLOG_ERR("error requesting features from nl:%d: %s", - br->dp_idx, strerror(retval)); - } - goto done; - } - - /* Parse reply. */ - osf = reply->data; - retval = check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY, - sizeof *osf, sizeof *osf->ports, - &n_ports); - if (retval) { - goto done; - } - for (i = 0; i < n_ports; i++) { - struct ofp_phy_port *opp = &osf->ports[i]; - int port_no = ntohs(opp->port_no); - struct iface *iface; - - sanitize_opp(opp); - - iface = iface_lookup(br, (const char *) opp->name); + dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); + for (i = 0; i < n_dpif_ports; i++) { + struct odp_port *p = &dpif_ports[i]; + struct iface *iface = iface_lookup(br, p->devname); if (iface) { if (iface->dp_ifidx >= 0) { VLOG_WARN("datapath nl:%d reported interface %s twice", - br->dp_idx, opp->name); - } else if (iface_from_dp_ifidx(br, port_no)) { - VLOG_WARN("datapath nl:%d reported interface %d twice", - br->dp_idx, port_no); + br->dp_idx, p->devname); + } else if (iface_from_dp_ifidx(br, p->port)) { + VLOG_WARN("datapath nl:%d reported interface %"PRIu16" twice", + br->dp_idx, p->port); } else { - port_array_set(&br->ifaces, port_no, iface); - iface->dp_ifidx = port_no; + uint16_t ofp_port = p->port == ODPP_LOCAL ? OFPP_LOCAL : p->port; + port_array_set(&br->ifaces, ofp_port, iface); + iface->dp_ifidx = p->port; } } - if (iface_names) { - svec_add(iface_names, (const char *) opp->name); - } - } - retval = 0; - - if (iface_names) { - svec_sort(iface_names); - if (!svec_is_unique(iface_names)) { - VLOG_WARN("datapath nl:%d reported interface named %s twice", - br->dp_idx, svec_get_duplicate(iface_names)); - svec_unique(iface_names); - } - } - -done: - vconn_close(vconn); - ofpbuf_delete(reply); - return retval; -} - -static void -sanitize_opp(struct ofp_phy_port *opp) -{ - size_t i; - - for (i = 0; i < sizeof opp->name; i++) { - char c = opp->name[i]; - if (c && (c < 0x20 || c > 0x7e)) { - opp->name[i] = '.'; - } } - opp->name[sizeof opp->name - 1] = '\0'; + free(dpif_ports); } /* Returns the idle time that the bridge is currently using. We reduce the @@ -1188,7 +1122,7 @@ bond_choose_iface(const struct port *port) } static bool -choose_output_iface(const struct port *port, const struct flow *flow, +choose_output_iface(const struct port *port, const flow_t *flow, uint16_t *dp_ifidx, tag_type *tags) { struct iface *iface; @@ -1340,7 +1274,7 @@ bond_wait(struct bridge *br) } static bool -set_dst(struct ft_dst *p, const struct flow *flow, +set_dst(struct ft_dst *p, const flow_t *flow, const struct port *in_port, const struct port *out_port, tag_type *tags) { @@ -1470,7 +1404,7 @@ port_includes_vlan(const struct port *port, uint16_t vlan) } static size_t -compose_dsts(const struct bridge *br, const struct flow *flow, uint16_t vlan, +compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, const struct port *in_port, const struct port *out_port, struct ft_dst dsts[], tag_type *tags) { @@ -1553,7 +1487,7 @@ struct received_packet { }; static void -send_packets(struct bridge *br, const struct flow *flow, +send_packets(struct bridge *br, const flow_t *flow, const struct received_packet *pkt, uint16_t vlan, const struct port *in_port, const struct port *out_port, tag_type tags, bool setup_flow) @@ -1635,7 +1569,7 @@ send_packets(struct bridge *br, const struct flow *flow, } static bool -is_bcast_arp_reply(const struct flow *flow, const struct ofpbuf *pkt) +is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *pkt) { return (flow->dl_type == htons(ETH_TYPE_ARP) && eth_addr_is_broadcast(flow->dl_dst) @@ -1644,7 +1578,7 @@ is_bcast_arp_reply(const struct flow *flow, const struct ofpbuf *pkt) } static void -process_flow(struct bridge *br, const struct flow *flow, +process_flow(struct bridge *br, const flow_t *flow, struct received_packet *pkt) { uint16_t in_ifidx = ntohs(flow->in_port); @@ -1774,7 +1708,7 @@ process_flow(struct bridge *br, const struct flow *flow, /* MAC learning. */ out_port = FLOOD_PORT; if (br->ml) { - uint16_t out_port_idx; + int out_port_idx; bool may_learn; if (!pkt->buf) { @@ -1786,8 +1720,8 @@ process_flow(struct bridge *br, const struct flow *flow, * (because we probably sent the packet on one bonded interface and * got it back on the other). */ /* XXX invalidation? */ - uint16_t src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); - may_learn = src_idx == OFPP_FLOOD || src_idx == in_port->port_idx; + int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + may_learn = src_idx < 0 || src_idx == in_port->port_idx; /* Broadcast ARP replies are an exception to this rule: the host * has moved to another switch. */ @@ -1818,7 +1752,7 @@ process_flow(struct bridge *br, const struct flow *flow, /* Determine output port. */ out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, &tags); - if (out_port_idx < br->n_ports) { + if (out_port_idx >= 0 && out_port_idx < br->n_ports) { out_port = br->ports[out_port_idx]; } } @@ -1864,7 +1798,7 @@ process_packet_in(struct bridge *br, void *opi_) struct ofp_packet_in *opi = opi_; struct received_packet pkt; struct ofpbuf buf; - struct flow flow; + flow_t flow; if (check_ofp_message_array(&opi->header, OFPT_PACKET_IN, offsetof(struct ofp_packet_in, data), @@ -1874,7 +1808,7 @@ process_packet_in(struct bridge *br, void *opi_) buf.data = opi->data; pkt.buf = &buf; pkt.buffer_id = ntohl(opi->buffer_id); - flow_extract(&buf, ntohs(opi->in_port), &flow); + flow_extract(&buf, ntohs(opi->in_port), &flow); /* XXX port number translation */ if (opi->reason == OFPR_NO_MATCH) { /* Delete any existing flow from the flow table. It must not really be @@ -1907,7 +1841,7 @@ process_flow_expired(struct bridge *br, void *ofe_) { struct ofp_flow_expired *ofe = ofe_; struct ft_flow *f; - struct flow flow; + flow_t flow; if (check_ofp_message(&ofe->header, OFPT_FLOW_EXPIRED, sizeof *ofe)) { return; @@ -2241,7 +2175,7 @@ flowstats_process(struct bridge *br) - offsetof(struct ofp_flow_stats, actions)) / sizeof(struct ofp_action_header)); struct ft_flow *f; - struct flow flow; + flow_t flow; size_t hash; if (fs->match.wildcards != htonl(0)) { @@ -3136,8 +3070,7 @@ brstp_wait(struct bridge *br) } static void -brstp_receive(struct bridge *br, const struct flow *flow, - const struct ofpbuf *pkt) +brstp_receive(struct bridge *br, const flow_t *flow, const struct ofpbuf *pkt) { struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); struct ofpbuf payload = *pkt; diff --git a/vswitchd/flowtrack.c b/vswitchd/flowtrack.c index 450ae81e..bf3cce9c 100644 --- a/vswitchd/flowtrack.c +++ b/vswitchd/flowtrack.c @@ -91,7 +91,7 @@ ftd_print(const struct ft_dst *dsts, size_t n) } struct ft_flow * -ftf_create(const struct flow *flow, +ftf_create(const flow_t *flow, const struct ft_dst dsts[], size_t n_dsts, tag_type tags) { @@ -159,7 +159,7 @@ ft_destroy(struct ft *ft) } struct ft_flow * -ft_lookup(const struct ft *ft, const struct flow *target, size_t hash) +ft_lookup(const struct ft *ft, const flow_t *target, size_t hash) { struct ft_flow *f; diff --git a/vswitchd/flowtrack.h b/vswitchd/flowtrack.h index b04f4716..c0c40c65 100644 --- a/vswitchd/flowtrack.h +++ b/vswitchd/flowtrack.h @@ -59,7 +59,7 @@ ftd_equal(const struct ft_dst *a, size_t an, struct ft_flow { tag_type tags; struct hmap_node node; - struct flow flow; + flow_t flow; bool need_drop; /* Statistics. */ @@ -72,7 +72,7 @@ struct ft_flow { struct ft_dst one_dst; }; -struct ft_flow *ftf_create(const struct flow *, +struct ft_flow *ftf_create(const flow_t *, const struct ft_dst[], size_t n_dsts, tag_type tags); void ftf_destroy(struct ft_flow *); @@ -85,7 +85,7 @@ struct ft { struct ft *ft_create(void); void ft_destroy(struct ft *); void ft_swap(struct ft *, struct ft *); -struct ft_flow *ft_lookup(const struct ft *, const struct flow *, size_t hash); +struct ft_flow *ft_lookup(const struct ft *, const flow_t *, size_t hash); void ft_remove(struct ft *, struct ft_flow *); void ft_insert(struct ft *, struct ft_flow *); -- 2.30.2