--- /dev/null
+The following features are temporarily missing, pending time to
+reimplement them with the new architecture:
+
+- Hardware table support in the kernel datapath.
+
+- STP support in secchan (note that this is distinct from STP support
+ in vswitchd).
+
+- The OFPPC_NO_RECV, OFPPC_NO_RECV_STP, and OFPPC_NO_FWD bits in port
+ configurations.
+
+- The OFPP_TABLE action.
+
+- SNAT support in secchan (but SNAT is still supported in the kernel
+ datapath).
+
+- udatapath.
+
+- vswitchd (this is our top priority).
+
+- A lot of the manpages and documentation need to be updated.
include secchan/automake.mk
include controller/automake.mk
include utilities/automake.mk
-include udatapath/automake.mk
include tests/automake.mk
include include/automake.mk
include third-party/automake.mk
dist_modules = $(both_modules) # Modules to distribute
openflow_sources = \
- chain.c \
- crc32.c \
+ actions.c \
datapath.c \
- dp_act.c \
dp_dev.c \
dp_notify.c \
flow.c \
- forward.c \
- nx_act.c \
- nx_act_snat.c \
- nx_msg.c \
- table-hash.c \
- table-linear.c
+ snat.c \
+ table.c
openflow_headers = \
- chain.h \
+ actions.h \
compat.h \
- crc32.h \
datapath.h \
dp_dev.h \
flow.h \
- forward.h \
- dp_act.h \
- nx_act.h \
- nx_act_snat.h \
- nx_msg.h \
- table.h
+ snat.h
dist_sources = $(foreach module,$(dist_modules),$($(module)_sources))
dist_headers = $(foreach module,$(dist_modules),$($(module)_headers))
--- /dev/null
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
+ * Stanford Junior University
+ */
+
+/* Functions for executing OpenFlow actions. */
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include "datapath.h"
+#include "dp_dev.h"
+#include "actions.h"
+#include "openflow/datapath-protocol.h"
+#include "snat.h"
+
+struct sk_buff *
+make_writable(struct sk_buff *skb, gfp_t gfp)
+{
+ if (skb_shared(skb) || skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, gfp);
+ if (nskb) {
+ kfree_skb(skb);
+ return nskb;
+ }
+ } else {
+ unsigned int hdr_len = (skb_transport_offset(skb)
+ + sizeof(struct tcphdr));
+ if (pskb_may_pull(skb, min(hdr_len, skb->len)))
+ return skb;
+ }
+ kfree_skb(skb);
+ return NULL;
+}
+
+
+static struct sk_buff *
+vlan_pull_tag(struct sk_buff *skb)
+{
+ struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+ struct ethhdr *eh;
+
+
+ /* Verify we were given a vlan packet */
+ if (vh->h_vlan_proto != htons(ETH_P_8021Q))
+ return skb;
+
+ memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
+
+ eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+
+ skb->protocol = eh->h_proto;
+ skb->mac_header += VLAN_HLEN;
+
+ return skb;
+}
+
+
+static struct sk_buff *
+modify_vlan_tci(struct sk_buff *skb, struct odp_flow_key *key,
+ u16 tci, u16 mask)
+{
+ struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+
+ if (key->dl_vlan != htons(ODP_VLAN_NONE)) {
+ /* Modify vlan id, but maintain other TCI values */
+ vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci);
+ } else {
+ /* Add vlan header */
+
+ /* xxx The vlan_put_tag function, doesn't seem to work
+ * xxx reliably when it attempts to use the hardware-accelerated
+ * xxx version. We'll directly use the software version
+ * xxx until the problem can be diagnosed.
+ */
+ skb = __vlan_put_tag(skb, tci);
+ if (!skb)
+ return NULL;
+ vh = vlan_eth_hdr(skb);
+ }
+ key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
+
+ return skb;
+}
+
+static struct sk_buff *set_vlan_vid(struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct odp_action_vlan_vid *a,
+ gfp_t gfp)
+{
+ u16 tci = ntohs(a->vlan_vid);
+ skb = make_writable(skb, gfp);
+ if (skb)
+ skb = modify_vlan_tci(skb, key, tci, VLAN_VID_MASK);
+ return skb;
+}
+
+/* Mask for the priority bits in a vlan header. The kernel doesn't
+ * define this like it does for VID. */
+#define VLAN_PCP_MASK 0xe000
+
+static struct sk_buff *set_vlan_pcp(struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct odp_action_vlan_pcp *a,
+ gfp_t gfp)
+{
+ u16 tci = a->vlan_pcp << 13;
+ skb = make_writable(skb, gfp);
+ if (skb)
+ skb = modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK);
+ return skb;
+}
+
+static struct sk_buff *strip_vlan(struct sk_buff *skb,
+ struct odp_flow_key *key, gfp_t gfp)
+{
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ vlan_pull_tag(skb);
+ key->dl_vlan = htons(ODP_VLAN_NONE);
+ }
+ return skb;
+}
+
+static struct sk_buff *set_dl_addr(struct sk_buff *skb,
+ const struct odp_action_dl_addr *a,
+ gfp_t gfp)
+{
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct ethhdr *eh = eth_hdr(skb);
+ memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest,
+ a->dl_addr, ETH_ALEN);
+ }
+ return skb;
+}
+
+/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
+ * covered by the sum has been changed from 'from' to 'to'. If set,
+ * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
+ * Based on nf_proto_csum_replace4. */
+static void update_csum(__sum16 *sum, struct sk_buff *skb,
+ __be32 from, __be32 to, int pseudohdr)
+{
+ __be32 diff[] = { ~from, to };
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
+ ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+ ~skb->csum);
+ } else if (pseudohdr)
+ *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
+ csum_unfold(*sum)));
+}
+
+static struct sk_buff *set_nw_addr(struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct odp_action_nw_addr *a,
+ gfp_t gfp)
+{
+ if (key->dl_type != htons(ETH_P_IP))
+ return skb;
+
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct iphdr *nh = ip_hdr(skb);
+ u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr;
+ u32 old = *f;
+ u32 new = a->nw_addr;
+
+ if (key->nw_proto == IPPROTO_TCP) {
+ struct tcphdr *th = tcp_hdr(skb);
+ update_csum(&th->check, skb, old, new, 1);
+ } else if (key->nw_proto == IPPROTO_UDP) {
+ struct udphdr *th = udp_hdr(skb);
+ update_csum(&th->check, skb, old, new, 1);
+ }
+ update_csum(&nh->check, skb, old, new, 0);
+ *f = new;
+ }
+ return skb;
+}
+
+static struct sk_buff *
+set_tp_port(struct sk_buff *skb, struct odp_flow_key *key,
+ const struct odp_action_tp_port *a,
+ gfp_t gfp)
+{
+ int check_ofs;
+
+ if (key->dl_type != htons(ETH_P_IP))
+ return skb;
+
+ if (key->nw_proto == IPPROTO_TCP)
+ check_ofs = offsetof(struct tcphdr, check);
+ else if (key->nw_proto == IPPROTO_UDP)
+ check_ofs = offsetof(struct udphdr, check);
+ else
+ return skb;
+
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct udphdr *th = udp_hdr(skb);
+ u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest;
+ u16 old = *f;
+ u16 new = a->tp_port;
+ update_csum((u16*)((u8*)skb->data + check_ofs),
+ skb, old, new, 1);
+ *f = new;
+ }
+ return skb;
+}
+
+static inline unsigned packet_length(const struct sk_buff *skb)
+{
+ unsigned length = skb->len - ETH_HLEN;
+ if (skb->protocol == htons(ETH_P_8021Q))
+ length -= VLAN_HLEN;
+ return length;
+}
+
+#ifdef SUPPORT_SNAT
+static int
+dp_xmit_skb_finish(struct sk_buff *skb)
+{
+ /* Copy back the Ethernet header that was stowed earlier. */
+ if (skb->protocol == htons(ETH_P_IP) && snat_copy_header(skb)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ skb_reset_mac_header(skb);
+
+ if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
+ printk("dropped over-mtu packet: %d > %d\n",
+ packet_length(skb), skb->dev->mtu);
+ kfree_skb(skb);
+ return -E2BIG;
+ }
+
+ skb_push(skb, ETH_HLEN);
+ dev_queue_xmit(skb);
+
+ return 0;
+}
+
+int dp_xmit_skb(struct sk_buff *skb)
+{
+ int len = skb->len;
+ int err;
+
+ skb_pull(skb, ETH_HLEN);
+
+ /* The ip_fragment function does not copy the Ethernet header into
+ * the newly generated frames, so stow the original. */
+ if (skb->protocol == htons(ETH_P_IP))
+ snat_save_header(skb);
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ skb->len > skb->dev->mtu &&
+ !skb_is_gso(skb)) {
+ err = ip_fragment(skb, dp_xmit_skb_finish);
+ } else {
+ err = dp_xmit_skb_finish(skb);
+ }
+ if (err)
+ return err;
+
+ return len;
+}
+#else
+int dp_xmit_skb(struct sk_buff *skb)
+{
+ struct datapath *dp = skb->dev->br_port->dp;
+ int len = skb->len;
+
+ if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
+ printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
+ dp->netdev->name, packet_length(skb), skb->dev->mtu);
+ kfree_skb(skb);
+ return -E2BIG;
+ }
+
+ dev_queue_xmit(skb);
+
+ return len;
+}
+#endif
+
+static void
+do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+ struct net_bridge_port *p;
+
+ if (!skb)
+ goto error;
+
+ if (out_port == ODPP_LOCAL) {
+ struct net_device *dev = dp->netdev;
+ if (!dev)
+ goto error;
+#ifdef SUPPORT_SNAT
+ snat_local_in(skb);
+#endif
+ dp_dev_recv(dev, skb);
+ return;
+ }
+
+ p = dp->ports[out_port];
+ if (!p)
+ goto error;
+
+ skb->dev = p->dev;
+ dp_xmit_skb(skb);
+ return;
+
+error:
+ kfree_skb(skb);
+}
+
+static int output_group(struct datapath *dp, __u16 group,
+ struct sk_buff *skb, gfp_t gfp)
+{
+ struct odp_port_group *g = rcu_dereference(dp->groups[group]);
+ int prev_port = -1;
+ int i;
+
+ if (!g)
+ return -EINVAL;
+ for (i = 0; i < g->n_ports; i++) {
+ struct net_bridge_port *p = dp->ports[g->ports[i]];
+ if (!p || skb->dev == p->dev)
+ continue;
+ if (prev_port != -1) {
+ struct sk_buff *clone = skb_clone(skb, gfp);
+ if (!clone) {
+ kfree_skb(skb);
+ return -1;
+ }
+ do_output(dp, clone, prev_port);
+ }
+ prev_port = p->port_no;
+ }
+ return prev_port;
+}
+
+static int
+output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
+{
+ skb = skb_clone(skb, gfp);
+ if (!skb)
+ return -ENOMEM;
+ return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg);
+}
+
+/* Execute a list of actions against 'skb'. */
+int execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct sw_flow_actions *actions,
+ gfp_t gfp)
+{
+ /* Every output action needs a separate clone of 'skb', but the common
+ * case is just a single output action, so that doing a clone and
+ * then freeing the original skbuff is wasteful. So the following code
+ * is slightly obscure just to avoid that. */
+ int prev_port = -1;
+ unsigned int i;
+ int err = 0;
+ for (i = 0; i < actions->n_actions; i++) {
+ const union odp_action *a = &actions->actions[i];
+ WARN_ON_ONCE(skb_shared(skb));
+ if (prev_port != -1) {
+ do_output(dp, skb_clone(skb, gfp), prev_port);
+ prev_port = -1;
+ }
+
+ switch (a->type) {
+ case ODPAT_OUTPUT:
+ prev_port = a->output.port;
+ break;
+
+ case ODPAT_OUTPUT_GROUP:
+ prev_port = output_group(dp, a->output_group.group,
+ skb, gfp);
+ break;
+
+ case ODPAT_CONTROLLER:
+ err = output_control(dp, skb, a->controller.arg, gfp);
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+ break;
+
+ case ODPAT_SET_VLAN_VID:
+ skb = set_vlan_vid(skb, key, &a->vlan_vid, gfp);
+ break;
+
+ case ODPAT_SET_VLAN_PCP:
+ skb = set_vlan_pcp(skb, key, &a->vlan_pcp, gfp);
+ break;
+
+ case ODPAT_STRIP_VLAN:
+ skb = strip_vlan(skb, key, gfp);
+ break;
+
+ case ODPAT_SET_DL_SRC:
+ case ODPAT_SET_DL_DST:
+ skb = set_dl_addr(skb, &a->dl_addr, gfp);
+ break;
+
+ case ODPAT_SET_NW_SRC:
+ case ODPAT_SET_NW_DST:
+ skb = set_nw_addr(skb, key, &a->nw_addr, gfp);
+ break;
+
+ case ODPAT_SET_TP_SRC:
+ case ODPAT_SET_TP_DST:
+ skb = set_tp_port(skb, key, &a->tp_port, gfp);
+ break;
+
+#ifdef SUPPORT_SNAT
+ case ODPAT_SNAT:
+ snat_skb(dp, skb, a->snat.port, gfp);
+ break;
+#endif
+ }
+ if (!skb)
+ return -ENOMEM;
+ }
+ if (prev_port != -1)
+ do_output(dp, skb, prev_port);
+ else
+ kfree_skb(skb);
+ return err;
+}
--- /dev/null
+#ifndef ACTIONS_H
+#define ACTIONS_H 1
+
+#include <linux/gfp.h>
+
+struct datapath;
+struct sk_buff;
+struct odp_flow_key;
+struct sw_flow_actions;
+
+struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp);
+int dp_xmit_skb(struct sk_buff *);
+int execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct sw_flow_actions *, gfp_t gfp);
+
+#endif /* actions.h */
int i, index = 0;
rcu_read_lock();
- for (i=0; i < DP_MAX && index < num; i++) {
- struct datapath *dp = dp_get_by_idx(i);
+ for (i=0; i < ODP_MAX && index < num; i++) {
+ struct datapath *dp = get_dp(i);
if (!dp)
continue;
indices[index++] = dp->netdev->ifindex;
case BRCTL_GET_BRIDGE_INFO: {
struct __bridge_info b;
- uint64_t id = 0;
+ u64 id = 0;
int i;
memset(&b, 0, sizeof(struct __bridge_info));
for (i=0; i<ETH_ALEN; i++)
- id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
+ id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
b.bridge_id = cpu_to_be64(id);
b.stp_enabled = 0;
return -EINVAL;
if (num == 0)
num = 256;
- if (num > OFPP_MAX)
- num = OFPP_MAX;
+ if (num > DP_MAX_PORTS)
+ num = DP_MAX_PORTS;
indices = kcalloc(num, sizeof(int), GFP_KERNEL);
if (indices == NULL)
printk("OpenFlow Bridge Compatibility, built "__DATE__" "__TIME__"\n");
rcu_read_lock();
- for (i=0; i<DP_MAX; i++) {
- if (dp_get_by_idx(i)) {
+ for (i=0; i<ODP_MAX; i++) {
+ if (get_dp(i)) {
rcu_read_unlock();
printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
return -EEXIST;
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-#include "chain.h"
-#include "datapath.h"
-#include "flow.h"
-#include "table.h"
-#include <linux/module.h>
-#include <linux/rcupdate.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-static struct sw_table *(*create_hw_table_hook)(void);
-static struct module *hw_table_owner;
-static DEFINE_SPINLOCK(hook_lock);
-
-/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or
- * negative error. If 'table' is null it is assumed that table creation failed
- * due to out-of-memory. */
-static int add_table(struct sw_chain *chain, struct sw_table *table)
-{
- if (table == NULL)
- return -ENOMEM;
- if (chain->n_tables >= CHAIN_MAX_TABLES) {
- printk(KERN_EMERG "%s: too many tables in chain\n",
- chain->dp->netdev->name);
- table->destroy(table);
- return -ENOBUFS;
- }
- chain->tables[chain->n_tables++] = table;
- return 0;
-}
-
-/* Creates and returns a new chain associated with 'dp'. Returns NULL if the
- * chain cannot be created. */
-struct sw_chain *chain_create(struct datapath *dp)
-{
- struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL);
- if (chain == NULL)
- goto error;
- chain->dp = dp;
- chain->owner = try_module_get(hw_table_owner) ? hw_table_owner : NULL;
- if (chain->owner && create_hw_table_hook) {
- struct sw_table *hwtable = create_hw_table_hook();
- if (!hwtable || add_table(chain, hwtable))
- goto error;
- }
-
- if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS,
- 0x741B8CD7, TABLE_HASH_MAX_FLOWS))
- || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS)))
- goto error;
- return chain;
-
-error:
- if (chain)
- chain_destroy(chain);
- return NULL;
-}
-
-/* Searches 'chain' for a flow matching 'key', which must not have any wildcard
- * fields. Returns the flow if successful, otherwise a null pointer.
- *
- * Caller must hold rcu_read_lock or dp_mutex. */
-struct sw_flow *chain_lookup(struct sw_chain *chain,
- const struct sw_flow_key *key)
-{
- int i;
-
- BUG_ON(key->wildcards);
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- struct sw_flow *flow = t->lookup(t, key);
- t->n_lookup++;
- if (flow) {
- t->n_matched++;
- return flow;
- }
- }
- return NULL;
-}
-
-/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if
- * successful or a negative error.
- *
- * If successful, 'flow' becomes owned by the chain, otherwise it is retained
- * by the caller.
- *
- * Caller must hold dp_mutex. */
-int chain_insert(struct sw_chain *chain, struct sw_flow *flow)
-{
- int i;
-
- might_sleep();
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- if (t->insert(t, flow))
- return 0;
- }
-
- return -ENOBUFS;
-}
-
-/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards
- * and priority must match. Returns the number of flows that were modified.
- *
- * Expensive in the general case as currently implemented, since it requires
- * iterating through the entire contents of each table for keys that contain
- * wildcards. Relatively cheap for fully specified keys. */
-int
-chain_modify(struct sw_chain *chain, const struct sw_flow_key *key,
- uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- count += t->modify(t, key, priority, strict, actions, actions_len);
- }
-
- return count;
-}
-
-/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port'
- * is not OFPP_NONE, then matching entries must have that port as an
- * argument for an output action. If 'strict" is set, then wildcards and
- * priority must match. Returns the number of flows that were deleted.
- *
- * Expensive in the general case as currently implemented, since it requires
- * iterating through the entire contents of each table for keys that contain
- * wildcards. Relatively cheap for fully specified keys.
- *
- * Caller must hold dp_mutex. */
-int chain_delete(struct sw_chain *chain, const struct sw_flow_key *key,
- uint16_t out_port, uint16_t priority, int strict)
-{
- int count = 0;
- int i;
-
- might_sleep();
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- count += t->delete(chain->dp, t, key, out_port, priority, strict);
- }
-
- return count;
-}
-
-/* Performs timeout processing on all the tables in 'chain'. Returns the
- * number of flow entries deleted through expiration.
- *
- * Expensive as currently implemented, since it iterates through the entire
- * contents of each table.
- *
- * Caller must not hold dp_mutex, because individual tables take and release it
- * as necessary. */
-int chain_timeout(struct sw_chain *chain)
-{
- int count = 0;
- int i;
-
- might_sleep();
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- count += t->timeout(chain->dp, t);
- }
- return count;
-}
-
-/* Destroys 'chain', which must not have any users. */
-void chain_destroy(struct sw_chain *chain)
-{
- int i;
-
- synchronize_rcu();
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- if (t->destroy)
- t->destroy(t);
- }
- module_put(chain->owner);
- kfree(chain);
-}
-
-int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void),
- struct module *owner)
-{
- int retval = -EBUSY;
-
- spin_lock(&hook_lock);
- if (!create_hw_table_hook) {
- create_hw_table_hook = create_hw_table;
- hw_table_owner = owner;
- retval = 0;
- }
- spin_unlock(&hook_lock);
-
- return retval;
-}
-EXPORT_SYMBOL(chain_set_hw_hook);
-
-void chain_clear_hw_hook(void)
-{
- create_hw_table_hook = NULL;
- hw_table_owner = NULL;
-}
-EXPORT_SYMBOL(chain_clear_hw_hook);
+++ /dev/null
-#ifndef CHAIN_H
-#define CHAIN_H 1
-
-#include <linux/types.h>
-
-struct sw_flow;
-struct sw_flow_key;
-struct ofp_action_header;
-struct datapath;
-
-
-#define TABLE_LINEAR_MAX_FLOWS 100
-#define TABLE_HASH_MAX_FLOWS 65536
-
-/* Set of tables chained together in sequence from cheap to expensive. */
-#define CHAIN_MAX_TABLES 4
-struct sw_chain {
- int n_tables;
- struct sw_table *tables[CHAIN_MAX_TABLES];
-
- struct datapath *dp;
- struct module *owner;
-};
-
-struct sw_chain *chain_create(struct datapath *);
-struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *);
-int chain_insert(struct sw_chain *, struct sw_flow *);
-int chain_modify(struct sw_chain *, const struct sw_flow_key *,
- uint16_t, int, const struct ofp_action_header *, size_t);
-int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t,
- uint16_t, int);
-int chain_timeout(struct sw_chain *);
-void chain_destroy(struct sw_chain *);
-
-int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void),
- struct module *owner);
-void chain_clear_hw_hook(void);
-
-#endif /* chain.h */
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-#include "crc32.h"
-
-void crc32_init(struct crc32 *crc, unsigned int polynomial)
-{
- int i;
-
- for (i = 0; i < CRC32_TABLE_SIZE; ++i) {
- unsigned int reg = i << 24;
- int j;
- for (j = 0; j < CRC32_TABLE_BITS; j++) {
- int topBit = (reg & 0x80000000) != 0;
- reg <<= 1;
- if (topBit)
- reg ^= polynomial;
- }
- crc->table[i] = reg;
- }
-}
-
-unsigned int crc32_calculate(const struct crc32 *crc,
- const void *data_, size_t n_bytes)
-{
- // FIXME: this can be optimized by unrolling, see linux-2.6/lib/crc32.c.
- const uint8_t *data = data_;
- unsigned int result = 0;
- size_t i;
-
- for (i = 0; i < n_bytes; i++) {
- unsigned int top = result >> 24;
- top ^= data[i];
- result = (result << 8) ^ crc->table[top];
- }
- return result;
-}
+++ /dev/null
-#ifndef CRC32_H
-#define CRC32_H 1
-
-#include <linux/types.h>
-#ifndef __KERNEL__
-#include <stdint.h>
-#endif
-#include <stddef.h>
-
-#define CRC32_TABLE_BITS 8
-#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS)
-
-struct crc32 {
- unsigned int table[CRC32_TABLE_SIZE];
-};
-
-void crc32_init(struct crc32 *, unsigned int polynomial);
-unsigned int crc32_calculate(const struct crc32 *,
- const void *data_, size_t n_bytes);
-
-
-#endif /* crc32.h */
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/fs.h>
#include <linux/if_arp.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
-#include <net/genetlink.h>
#include <linux/ip.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
-#include <linux/rtnetlink.h>
+#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/version.h>
#include <linux/ethtool.h>
#include <linux/random.h>
+#include <linux/wait.h>
#include <asm/system.h>
#include <asm/div64.h>
#include <linux/netfilter_bridge.h>
#include <linux/workqueue.h>
#include <linux/dmi.h>
-#include "openflow/nicira-ext.h"
-#include "openflow/openflow-netlink.h"
+#include "openflow/datapath-protocol.h"
#include "datapath.h"
-#include "nx_act_snat.h"
-#include "table.h"
-#include "chain.h"
+#include "snat.h"
+#include "actions.h"
#include "dp_dev.h"
-#include "forward.h"
#include "flow.h"
#include "compat.h"
-/* Strings to describe the manufacturer, hardware, and software. This data
- * is queriable through the switch description stats message. */
-static char mfr_desc[DESC_STR_LEN] = "Nicira Networks, Inc.";
-static char hw_desc[DESC_STR_LEN] = "Reference Linux Kernel Module";
-static char sw_desc[DESC_STR_LEN] = VERSION BUILDNR;
-static char serial_num[SERIAL_NUM_LEN] = "None";
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-module_param_string(mfr_desc, mfr_desc, sizeof mfr_desc, 0444);
-module_param_string(hw_desc, hw_desc, sizeof hw_desc, 0444);
-module_param_string(sw_desc, sw_desc, sizeof sw_desc, 0444);
-module_param_string(serial_num, serial_num, sizeof serial_num, 0444);
-#else
-MODULE_PARM(mfr_desc, "s");
-MODULE_PARM(hw_desc, "s");
-MODULE_PARM(sw_desc, "s");
-MODULE_PARM(serial_num, "s");
-#endif
-
-
int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
EXPORT_SYMBOL(dp_ioctl_hook);
int (*dp_del_if_hook)(struct net_bridge_port *p);
EXPORT_SYMBOL(dp_del_if_hook);
-/* Number of milliseconds between runs of the maintenance thread. */
-#define MAINT_SLEEP_MSECS 1000
-
-#define UINT32_MAX 4294967295U
-#define UINT16_MAX 65535
-#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
-
-static struct genl_family dp_genl_family;
-
-/*
- * Datapath multicast groups.
- *
- * Really we want one multicast group per in-use datapath (or even more than
- * one). Locking issues, however, mean that we can't allocate a multicast
- * group at the point in the code where we we actually create a datapath[*], so
- * we have to pre-allocate them. It's massive overkill to allocate DP_MAX of
- * them in advance, since we will hardly ever actually create DP_MAX datapaths,
- * so instead we allocate a few multicast groups at startup and choose one for
- * each datapath by hashing its datapath index.
- *
- * [*] dp_genl_add, to add a new datapath, is called under the genl_lock
- * mutex, and genl_register_mc_group, called to acquire a new multicast
- * group ID, also acquires genl_lock, thus deadlock.
- */
-#define N_MC_GROUPS 16 /* Must be power of 2. */
-static struct genl_multicast_group mc_groups[N_MC_GROUPS];
-
/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
* by dp_mutex. dp_mutex is almost completely redundant with genl_mutex
* maintained by the Generic Netlink code, but the timeout path needs mutual
* It is safe to access the datapath and net_bridge_port structures with just
* dp_mutex.
*/
-static struct datapath *dps[DP_MAX];
+static struct datapath *dps[ODP_MAX];
DEFINE_MUTEX(dp_mutex);
EXPORT_SYMBOL(dp_mutex);
+/* Number of milliseconds between runs of the maintenance thread. */
+#define MAINT_SLEEP_MSECS 1000
+
+#ifdef SUPPORT_SNAT
static int dp_maint_func(void *data);
-static void init_port_status(struct net_bridge_port *p);
-static int dp_genl_openflow_done(struct netlink_callback *);
-static struct net_bridge_port *new_nbp(struct datapath *,
- struct net_device *, int port_no);
-
-/* nla_shrink - reduce amount of space reserved by nla_reserve
- * @skb: socket buffer from which to recover room
- * @nla: netlink attribute to adjust
- * @len: new length of attribute payload
- *
- * Reduces amount of space reserved by a call to nla_reserve.
- *
- * No other attributes may be added between calling nla_reserve and this
- * function, since it will create a hole in the message.
- */
-void nla_shrink(struct sk_buff *skb, struct nlattr *nla, int len)
+#endif
+static int new_nbp(struct datapath *, struct net_device *, int port_no);
+
+/* Must be called with rcu_read_lock or dp_mutex. */
+struct datapath *get_dp(int dp_idx)
{
- int delta = nla_total_size(len) - nla_total_size(nla_len(nla));
- BUG_ON(delta > 0);
- skb->tail += delta;
- skb->len += delta;
- nla->nla_len = nla_attr_size(len);
+ if (dp_idx < 0 || dp_idx >= ODP_MAX)
+ return NULL;
+ return rcu_dereference(dps[dp_idx]);
}
+EXPORT_SYMBOL_GPL(get_dp);
-/* Puts a set of openflow headers for a message of the given 'type' into 'skb'.
- * If 'sender' is nonnull, then it is used as the message's destination. 'dp'
- * must specify the datapath to use.
- *
- * '*max_openflow_len' receives the maximum number of bytes that are available
- * for the embedded OpenFlow message. The caller must call
- * resize_openflow_skb() to set the actual size of the message to this number
- * of bytes or less.
- *
- * Returns the openflow header if successful, otherwise (if 'skb' is too small)
- * an error code. */
-static void *
-put_openflow_headers(struct datapath *dp, struct sk_buff *skb, uint8_t type,
- const struct sender *sender, int *max_openflow_len)
+struct datapath *get_dp_locked(int dp_idx)
{
- struct ofp_header *oh;
- struct nlattr *attr;
- int openflow_len;
-
- /* Assemble the Generic Netlink wrapper. */
- if (!genlmsg_put(skb,
- sender ? sender->pid : 0,
- sender ? sender->seq : 0,
- &dp_genl_family, 0, DP_GENL_C_OPENFLOW))
- return ERR_PTR(-ENOBUFS);
- if (nla_put_u32(skb, DP_GENL_A_DP_IDX, dp->dp_idx) < 0)
- return ERR_PTR(-ENOBUFS);
- openflow_len = (skb_tailroom(skb) - NLA_HDRLEN) & ~(NLA_ALIGNTO - 1);
- if (openflow_len < sizeof *oh)
- return ERR_PTR(-ENOBUFS);
- *max_openflow_len = openflow_len;
- attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, openflow_len);
- BUG_ON(!attr);
-
- /* Fill in the header. The caller is responsible for the length. */
- oh = nla_data(attr);
- oh->version = OFP_VERSION;
- oh->type = type;
- oh->xid = sender ? sender->xid : 0;
-
- return oh;
+ struct datapath *dp;
+
+ mutex_lock(&dp_mutex);
+ dp = get_dp(dp_idx);
+ if (dp)
+ mutex_lock(&dp->mutex);
+ mutex_unlock(&dp_mutex);
+ return dp;
}
-/* Resizes OpenFlow header 'oh', which must be at the tail end of 'skb', to new
- * length 'new_length' (in bytes), adjusting pointers and size values as
- * necessary. */
-static void
-resize_openflow_skb(struct sk_buff *skb,
- struct ofp_header *oh, size_t new_length)
+static inline size_t br_nlmsg_size(void)
{
- struct nlattr *attr = ((void *) oh) - NLA_HDRLEN;
- nla_shrink(skb, attr, new_length);
- oh->length = htons(new_length);
- nlmsg_end(skb, (struct nlmsghdr *) skb->data);
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1); /* IFLA_OPERSTATE */
}
-/* Allocates a new skb to contain an OpenFlow message 'openflow_len' bytes in
- * length. Returns a null pointer if memory is unavailable, otherwise returns
- * the OpenFlow header and stores a pointer to the skb in '*pskb'.
- *
- * 'type' is the OpenFlow message type. If 'sender' is nonnull, then it is
- * used as the message's destination. 'dp' must specify the datapath to
- * use. */
-static void *
-alloc_openflow_skb(struct datapath *dp, size_t openflow_len, uint8_t type,
- const struct sender *sender, struct sk_buff **pskb)
+static int dp_fill_ifinfo(struct sk_buff *skb,
+ const struct net_bridge_port *port,
+ int event, unsigned int flags)
{
- struct ofp_header *oh;
- size_t genl_len;
- struct sk_buff *skb;
- int max_openflow_len;
-
- if ((openflow_len + sizeof(struct ofp_header)) > UINT16_MAX) {
- if (net_ratelimit())
- printk(KERN_ERR "%s: alloc_openflow_skb: openflow "
- "message too large: %zu\n",
- dp->netdev->name, openflow_len);
- return NULL;
- }
-
- genl_len = nlmsg_total_size(GENL_HDRLEN + dp_genl_family.hdrsize);
- genl_len += nla_total_size(sizeof(uint32_t)); /* DP_GENL_A_DP_IDX */
- genl_len += nla_total_size(openflow_len); /* DP_GENL_A_OPENFLOW */
- skb = *pskb = genlmsg_new(genl_len, GFP_ATOMIC);
- if (!skb) {
- return NULL;
- }
+ const struct datapath *dp = port->dp;
+ const struct net_device *dev = port->dev;
+ struct ifinfomsg *hdr;
+ struct nlmsghdr *nlh;
+ u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
- oh = put_openflow_headers(dp, skb, type, sender, &max_openflow_len);
- BUG_ON(!oh || IS_ERR(oh));
- resize_openflow_skb(skb, oh, openflow_len);
+ nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
- return oh;
-}
+ hdr = nlmsg_data(nlh);
+ hdr->ifi_family = AF_BRIDGE;
+ hdr->__ifi_pad = 0;
+ hdr->ifi_type = dev->type;
+ hdr->ifi_index = dev->ifindex;
+ hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_change = 0;
-/* Returns the ID of the multicast group used by datapath 'dp'. */
-static u32
-dp_mc_group(const struct datapath *dp)
-{
- return mc_groups[dp->dp_idx & (N_MC_GROUPS - 1)].id;
-}
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+ NLA_PUT_U32(skb, IFLA_MASTER, dp->netdev->ifindex);
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_U8(skb, IFLA_OPERSTATE, operstate);
-/* Sends 'skb' to 'sender' if it is nonnull, otherwise multicasts 'skb' to all
- * listeners. */
-static int
-send_openflow_skb(const struct datapath *dp,
- struct sk_buff *skb, const struct sender *sender)
-{
- return (sender
- ? genlmsg_unicast(skb, sender->pid)
- : genlmsg_multicast(skb, 0, dp_mc_group(dp), GFP_ATOMIC));
-}
+ if (dev->addr_len)
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-/* Retrieves the datapath id, which is the MAC address of the "of" device. */
-static
-uint64_t get_datapath_id(struct net_device *dev)
-{
- uint64_t id = 0;
- int i;
+ if (dev->ifindex != dev->iflink)
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
- for (i=0; i<ETH_ALEN; i++)
- id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
+ return nlmsg_end(skb, nlh);
- return id;
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
}
-/* Find the first free datapath index. Return the index or -1 if a free
- * index could not be found. */
-int gen_dp_idx(void)
+static void dp_ifinfo_notify(int event, struct net_bridge_port *port)
{
- int i;
+ struct net *net = dev_net(port->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
- for (i=0; i<DP_MAX; i++) {
- if (!dps[i])
- return i;
- }
+ skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
- return -1;
+ err = dp_fill_ifinfo(skb, port, event, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in br_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
}
-/* Creates a new datapath numbered 'dp_idx'. If 'dp_idx' is -1, it
- * allocates the lowest numbered index available. If 'dp_name' is not
- * null, it is used as the device name instead of the default one.
- * Returns 0 for success or a negative error code. */
-static int new_dp(int dp_idx, const char *dp_name)
+static int create_dp(int dp_idx, const char __user *devnamep)
{
+ char devname[IFNAMSIZ];
struct datapath *dp;
int err;
+ int i;
+
+ if (devnamep) {
+ err = -EFAULT;
+ if (strncpy_from_user(devname, devnamep, IFNAMSIZ - 1) < 0)
+ goto err;
+ devname[IFNAMSIZ - 1] = '\0';
+ } else {
+ snprintf(devname, sizeof devname, "of%d", dp_idx);
+ }
rtnl_lock();
mutex_lock(&dp_mutex);
- if (dp_idx == -1)
- dp_idx = gen_dp_idx();
-
- err = -EINVAL;
- if (dp_idx < 0 || dp_idx >= DP_MAX)
- goto err_unlock;
-
err = -ENODEV;
if (!try_module_get(THIS_MODULE))
goto err_unlock;
/* Exit early if a datapath with that number already exists. */
err = -EEXIST;
- if (dps[dp_idx])
- goto err_put;
+ if (get_dp(dp_idx))
+ goto err_put_module;
err = -ENOMEM;
dp = kzalloc(sizeof *dp, GFP_KERNEL);
if (dp == NULL)
- goto err_put;
+ goto err_put_module;
+ mutex_init(&dp->mutex);
dp->dp_idx = dp_idx;
+ for (i = 0; i < DP_N_QUEUES; i++)
+ skb_queue_head_init(&dp->queues[i]);
+ init_waitqueue_head(&dp->waitqueue);
/* Setup our datapath device */
- err = dp_dev_setup(dp, dp_name);
+ err = dp_dev_setup(dp, devname);
if (err)
goto err_free_dp;
- dp->chain = chain_create(dp);
- if (dp->chain == NULL)
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
+ if (!dp->table)
goto err_destroy_dp_dev;
INIT_LIST_HEAD(&dp->port_list);
- dp->local_port = new_nbp(dp, dp->netdev, OFPP_LOCAL);
- if (IS_ERR(dp->local_port)) {
- err = PTR_ERR(dp->local_port);
- goto err_destroy_local_port;
- }
+ err = new_nbp(dp, dp->netdev, ODPP_LOCAL);
+ if (err)
+ goto err_destroy_table;
- dp->flags = 0;
- dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
+ dp->drop_frags = 0;
+ dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ goto err_destroy_local_port;
+#ifdef SUPPORT_SNAT
dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx);
if (IS_ERR(dp->dp_task))
- goto err_destroy_chain;
+ goto err_destroy_stats_percpu;
+#endif
- dps[dp_idx] = dp;
+ rcu_assign_pointer(dps[dp_idx], dp);
mutex_unlock(&dp_mutex);
rtnl_unlock();
return 0;
+#ifdef SUPPORT_SNAT
+err_destroy_stats_percpu:
+ free_percpu(dp->stats_percpu);
+#endif
err_destroy_local_port:
- dp_del_switch_port(dp->local_port);
-err_destroy_chain:
- chain_destroy(dp->chain);
+ dp_del_port(dp->ports[ODPP_LOCAL]);
+err_destroy_table:
+ dp_table_destroy(dp->table, 0);
err_destroy_dp_dev:
dp_dev_destroy(dp);
err_free_dp:
kfree(dp);
-err_put:
+err_put_module:
module_put(THIS_MODULE);
err_unlock:
mutex_unlock(&dp_mutex);
rtnl_unlock();
+err:
return err;
}
-/* Find and return a free port number under 'dp'. */
-static int find_portno(struct datapath *dp)
+static void do_destroy_dp(struct datapath *dp)
{
+ struct net_bridge_port *p, *n;
int i;
- for (i = 0; i < DP_MAX_PORTS; i++)
- if (dp->ports[i] == NULL)
- return i;
- return -EXFULL;
+
+#ifdef SUPPORT_SNAT
+ send_sig(SIGKILL, dp->dp_task, 0);
+ kthread_stop(dp->dp_task);
+#endif
+
+ /* Drop references to DP. */
+ list_for_each_entry_safe (p, n, &dp->port_list, node)
+ dp_del_port(p);
+
+ if (dp_del_dp_hook)
+ dp_del_dp_hook(dp);
+
+ rcu_assign_pointer(dps[dp->dp_idx], NULL);
+ synchronize_rcu();
+
+ /* Destroy dp->netdev. (Must follow deleting switch ports since the
+ * ODPP_LOCAL port has a reference to it.) */
+ dp_dev_destroy(dp);
+
+ /* Wait until no longer in use, then destroy it. */
+ synchronize_rcu();
+ dp_table_destroy(dp->table, 1);
+ for (i = 0; i < DP_N_QUEUES; i++)
+ skb_queue_purge(&dp->queues[i]);
+ free_percpu(dp->stats_percpu);
+ kfree(dp);
+ module_put(THIS_MODULE);
+}
+
+static int destroy_dp(int dp_idx)
+{
+ struct net_device *dev = NULL;
+ struct datapath *dp;
+ int err;
+
+ rtnl_lock();
+ mutex_lock(&dp_mutex);
+ dp = get_dp(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto err_unlock;
+
+ dev = dp->netdev;
+ do_destroy_dp(dp);
+ err = 0;
+
+err_unlock:
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+ if (dev)
+ free_netdev(dev);
+ return err;
}
/* Called with RTNL lock and dp_mutex. */
-static struct net_bridge_port *new_nbp(struct datapath *dp,
- struct net_device *dev, int port_no)
+static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
{
struct net_bridge_port *p;
if (dev->br_port != NULL)
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (p == NULL)
- return ERR_PTR(-ENOMEM);
+ if (!p)
+ return -ENOMEM;
dev_set_promiscuity(dev, 1);
dev_hold(dev);
- p->dp = dp;
- p->dev = dev;
p->port_no = port_no;
spin_lock_init(&p->lock);
- if (port_no != OFPP_LOCAL)
+ p->dp = dp;
+ p->dev = dev;
+ if (port_no != ODPP_LOCAL)
rcu_assign_pointer(dev->br_port, p);
- if (port_no < DP_MAX_PORTS)
- rcu_assign_pointer(dp->ports[port_no], p);
+ rcu_assign_pointer(dp->ports[port_no], p);
list_add_rcu(&p->node, &dp->port_list);
+ dp->n_ports++;
- return p;
+ dp_ifinfo_notify(RTM_NEWLINK, p);
+
+ return 0;
}
-/* Called with RTNL lock and dp_mutex. */
-int add_switch_port(struct datapath *dp, struct net_device *dev)
+static int add_port(int dp_idx, struct odp_port __user *portp)
{
- struct net_bridge_port *p;
+ struct net_device *dev;
+ struct datapath *dp;
+ struct odp_port port;
int port_no;
+ int err;
- if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER
- || is_dp_dev(dev))
- return -EINVAL;
+ err = -EFAULT;
+ if (copy_from_user(&port, portp, sizeof port))
+ goto out;
+ port.devname[IFNAMSIZ - 1] = '\0';
+ port_no = port.port;
+
+ err = -EINVAL;
+ if (port_no < 0 || port_no >= DP_MAX_PORTS)
+ goto out;
+
+ rtnl_lock();
+ dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto out_unlock_rtnl;
+
+ err = -ENODEV;
+ dev = dev_get_by_name(&init_net, port.devname);
+ if (!dev)
+ goto out_unlock_dp;
- port_no = find_portno(dp);
- if (port_no < 0)
- return port_no;
+ err = -EINVAL;
+ if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER ||
+ is_dp_dev(dev))
+ goto out_put;
- p = new_nbp(dp, dev, port_no);
- if (IS_ERR(p))
- return PTR_ERR(p);
+ err = -EEXIST;
+ if (dp->ports[port_no])
+ goto out_put;
- init_port_status(p);
+ err = new_nbp(dp, dev, port_no);
+ if (err)
+ goto out_put;
if (dp_add_if_hook)
- dp_add_if_hook(p);
+ dp_add_if_hook(dp->ports[port_no]);
- /* Notify the ctlpath that this port has been added */
- dp_send_port_status(p, OFPPR_ADD);
-
- return 0;
+out_put:
+ dev_put(dev);
+out_unlock_dp:
+ mutex_unlock(&dp->mutex);
+out_unlock_rtnl:
+ rtnl_unlock();
+out:
+ return err;
}
-/* Delete 'p' from switch.
- * Called with RTNL lock and dp_mutex. */
-int dp_del_switch_port(struct net_bridge_port *p)
+/* Free any SNAT configuration on the port. */
+static void free_snat(struct net_bridge_port *p)
{
#ifdef SUPPORT_SNAT
- unsigned long flags;
-#endif
+ unsigned long int flags;
+ spin_lock_irqsave(&p->lock, flags);
+ snat_free_conf(p);
+ spin_unlock_irqrestore(&p->lock, flags);
+#endif /* !SUPPORT_SNAT */
+}
+
+int dp_del_port(struct net_bridge_port *p)
+{
+ ASSERT_RTNL();
#if CONFIG_SYSFS
- if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook)
+ if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook)
sysfs_remove_link(&p->dp->ifobj, p->dev->name);
#endif
+ dp_ifinfo_notify(RTM_DELLINK, p);
+
+ p->dp->n_ports--;
/* First drop references to device. */
dev_set_promiscuity(p->dev, -1);
list_del_rcu(&p->node);
- if (p->port_no != OFPP_LOCAL)
- rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+ rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
rcu_assign_pointer(p->dev->br_port, NULL);
/* Then wait until no one is still using it, and destroy it. */
synchronize_rcu();
+ free_snat(p);
-#ifdef SUPPORT_SNAT
- /* Free any SNAT configuration on the port. */
- spin_lock_irqsave(&p->lock, flags);
- snat_free_conf(p);
- spin_unlock_irqrestore(&p->lock, flags);
-#endif
-
- /* Notify the ctlpath that this port no longer exists */
- dp_send_port_status(p, OFPPR_DELETE);
-
- if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook) {
+ if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook) {
dp_del_if_hook(p);
} else {
dev_put(p->dev);
return 0;
}
-static void del_dp(struct datapath *dp)
+static int del_port(int dp_idx, int port_no)
{
- struct net_bridge_port *p, *n;
-
- send_sig(SIGKILL, dp->dp_task, 0);
- kthread_stop(dp->dp_task);
-
- /* Drop references to DP. */
- list_for_each_entry_safe (p, n, &dp->port_list, node)
- dp_del_switch_port(p);
+ struct net_bridge_port *p;
+ struct datapath *dp;
+ int err;
- if (dp_del_dp_hook)
- dp_del_dp_hook(dp);
+ err = -EINVAL;
+ if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL)
+ goto out;
- rcu_assign_pointer(dps[dp->dp_idx], NULL);
+ rtnl_lock();
+ dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto out_unlock_rtnl;
- /* Kill off local_port dev references from buffered packets that have
- * associated dst entries. */
- synchronize_rcu();
- fwd_discard_all();
+ p = dp->ports[port_no];
+ err = -ENOENT;
+ if (!p)
+ goto out_unlock_dp;
- /* Destroy dp->netdev. (Must follow deleting switch ports since
- * dp->local_port has a reference to it.) */
- dp_dev_destroy(dp);
+ err = dp_del_port(p);
- /* Wait until no longer in use, then destroy it. */
- synchronize_rcu();
- chain_destroy(dp->chain);
- kfree(dp);
- module_put(THIS_MODULE);
+out_unlock_dp:
+ mutex_unlock(&dp->mutex);
+out_unlock_rtnl:
+ rtnl_unlock();
+out:
+ return err;
}
+#ifdef SUPPORT_SNAT
static int dp_maint_func(void *data)
{
struct datapath *dp = (struct datapath *) data;
allow_signal(SIGKILL);
while (!signal_pending(current)) {
-#ifdef SUPPORT_SNAT
struct net_bridge_port *p;
/* Expire old SNAT entries */
list_for_each_entry_rcu (p, &dp->port_list, node)
snat_maint(p);
rcu_read_unlock();
-#endif
-
- /* Timeout old entries */
- chain_timeout(dp->chain);
msleep_interruptible(MAINT_SLEEP_MSECS);
}
while (!kthread_should_stop()) {
}
return 0;
}
+#endif
static void
do_port_input(struct net_bridge_port *p, struct sk_buff *skb)
/* Push the Ethernet header back on. */
skb_push(skb, ETH_HLEN);
skb_reset_mac_header(skb);
- fwd_port_input(p->dp->chain, skb, p);
+ dp_process_received_packet(skb, p);
+}
+
+void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p)
+{
+ struct datapath *dp = p->dp;
+ struct dp_stats_percpu *stats;
+ struct odp_flow_key key;
+ struct sw_flow *flow;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(skb->destructor);
+
+ /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */
+ stats = percpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) {
+ if (dp->drop_frags) {
+ kfree_skb(skb);
+ stats->n_frags++;
+ return;
+ }
+ }
+
+ flow = dp_table_lookup(rcu_dereference(dp->table), &key);
+ if (flow) {
+ flow_used(flow, skb);
+ execute_actions(dp, skb, &key, rcu_dereference(flow->sf_acts),
+ GFP_ATOMIC);
+ stats->n_hit++;
+ } else {
+ stats->n_missed++;
+ dp_output_control(dp, skb, _ODPL_MISS_NR, 0);
+ }
}
/*
}
#endif
-/* Forwarding output path.
- * Based on net/bridge/br_forward.c. */
-
-static inline unsigned packet_length(const struct sk_buff *skb)
+#ifdef CONFIG_XEN
+/* This code is copied verbatim from net/dev/core.c in Xen's
+ * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions
+ * directly because they aren't exported. */
+static int skb_pull_up_to(struct sk_buff *skb, void *ptr)
{
- unsigned length = skb->len - ETH_HLEN;
- if (skb->protocol == htons(ETH_P_8021Q))
- length -= VLAN_HLEN;
- return length;
+ if (ptr < (void *)skb->tail)
+ return 1;
+ if (__pskb_pull_tail(skb,
+ ptr - (void *)skb->data - skb_headlen(skb))) {
+ return 1;
+ } else {
+ return 0;
+ }
}
-/* Send packets out all the ports except the originating one. If the
- * "flood" argument is set, only send along the minimum spanning tree.
- */
-static int
-output_all(struct datapath *dp, struct sk_buff *skb, int flood)
+inline int skb_checksum_setup(struct sk_buff *skb)
{
- u32 disable = flood ? OFPPC_NO_FLOOD : 0;
- struct net_bridge_port *p;
- int prev_port = -1;
-
- list_for_each_entry_rcu (p, &dp->port_list, node) {
- if (skb->dev == p->dev || p->config & disable)
- continue;
- if (prev_port != -1) {
- struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
- if (!clone) {
- kfree_skb(skb);
- return -ENOMEM;
- }
- dp_output_port(dp, clone, prev_port, 0);
+ if (skb->proto_csum_blank) {
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out;
+ if (!skb_pull_up_to(skb, skb->nh.iph + 1))
+ goto out;
+ skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+ switch (skb->nh.iph->protocol) {
+ case IPPROTO_TCP:
+ skb->csum = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ skb->csum = offsetof(struct udphdr, check);
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_ERR "Attempting to checksum a non-"
+ "TCP/UDP packet, dropping a protocol"
+ " %d packet", skb->nh.iph->protocol);
+ goto out;
}
- prev_port = p->port_no;
+ if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2))
+ goto out;
+ skb->ip_summed = CHECKSUM_HW;
+ skb->proto_csum_blank = 0;
}
- if (prev_port != -1)
- dp_output_port(dp, skb, prev_port, 0);
- else
- kfree_skb(skb);
-
return 0;
-}
-
-/* Marks 'skb' as having originated from 'in_port' in 'dp'.
- FIXME: how are devices reference counted? */
-void dp_set_origin(struct datapath *dp, uint16_t in_port,
- struct sk_buff *skb)
-{
- struct net_bridge_port *p;
- p = (in_port < DP_MAX_PORTS ? dp->ports[in_port]
- : in_port == OFPP_LOCAL ? dp->local_port
- : NULL);
- if (p)
- skb->dev = p->dev;
- else
- skb->dev = NULL;
-}
-
-#ifdef SUPPORT_SNAT
-static int
-dp_xmit_skb_finish(struct sk_buff *skb)
-{
- /* Copy back the Ethernet header that was stowed earlier. */
- if (skb->protocol == htons(ETH_P_IP) && snat_copy_header(skb)) {
- kfree_skb(skb);
- return -EINVAL;
- }
- skb_reset_mac_header(skb);
-
- if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
- printk("dropped over-mtu packet: %d > %d\n",
- packet_length(skb), skb->dev->mtu);
- kfree_skb(skb);
- return -E2BIG;
- }
-
- skb_push(skb, ETH_HLEN);
- dev_queue_xmit(skb);
-
- return 0;
-}
-
-int
-dp_xmit_skb(struct sk_buff *skb)
-{
- int len = skb->len;
- int err;
-
- skb_pull(skb, ETH_HLEN);
-
- /* The ip_fragment function does not copy the Ethernet header into
- * the newly generated frames, so stow the original. */
- if (skb->protocol == htons(ETH_P_IP))
- snat_save_header(skb);
-
- if (skb->protocol == htons(ETH_P_IP) &&
- skb->len > skb->dev->mtu &&
- !skb_is_gso(skb)) {
- err = ip_fragment(skb, dp_xmit_skb_finish);
- } else {
- err = dp_xmit_skb_finish(skb);
- }
- if (err)
- return err;
-
- return len;
-}
-#else
-int
-dp_xmit_skb(struct sk_buff *skb)
-{
- struct datapath *dp = skb->dev->br_port->dp;
- int len = skb->len;
-
- if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
- printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
- dp->netdev->name, packet_length(skb), skb->dev->mtu);
- kfree_skb(skb);
- return -E2BIG;
- }
-
- dev_queue_xmit(skb);
-
- return len;
-}
-#endif
-
-/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
- */
-int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port,
- int ignore_no_fwd)
-{
- BUG_ON(!skb);
- switch (out_port){
- case OFPP_IN_PORT:
- /* Send it out the port it came in on, which is already set in
- * the skb. */
- if (!skb->dev) {
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: skb device not set "
- "forwarding to in_port\n",
- dp->netdev->name);
- kfree_skb(skb);
- return -ESRCH;
- }
- return dp_xmit_skb(skb);
-
- case OFPP_TABLE: {
- int retval = run_flow_through_tables(dp->chain, skb,
- skb->dev->br_port);
- if (retval)
- kfree_skb(skb);
- return retval;
- }
-
- case OFPP_FLOOD:
- return output_all(dp, skb, 1);
-
- case OFPP_ALL:
- return output_all(dp, skb, 0);
-
- case OFPP_CONTROLLER:
- return dp_output_control(dp, skb, 0, OFPR_ACTION);
-
- case OFPP_LOCAL: {
- struct net_device *dev = dp->netdev;
-#ifdef SUPPORT_SNAT
- snat_local_in(skb);
-#endif
- return dev ? dp_dev_recv(dev, skb) : -ESRCH;
- }
-
- case 0 ... DP_MAX_PORTS - 1: {
- struct net_bridge_port *p = dp->ports[out_port];
- if (p == NULL)
- goto bad_port;
- if (p->dev == skb->dev) {
- /* To send to the input port, must use OFPP_IN_PORT */
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: can't directly "
- "forward to input port\n",
- dp->netdev->name);
- return -EINVAL;
- }
- if (p->config & OFPPC_NO_FWD && !ignore_no_fwd) {
- kfree_skb(skb);
- return 0;
- }
- skb->dev = p->dev;
- return dp_xmit_skb(skb);
- }
-
- default:
- goto bad_port;
- }
-
-bad_port:
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: can't forward to bad port %d\n",
- dp->netdev->name, out_port);
- return -ENOENT;
-}
-
-#ifdef CONFIG_XEN
-/* This code is copied verbatim from net/dev/core.c in Xen's
- * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions
- * directly because they aren't exported. */
-static int skb_pull_up_to(struct sk_buff *skb, void *ptr)
-{
- if (ptr < (void *)skb->tail)
- return 1;
- if (__pskb_pull_tail(skb,
- ptr - (void *)skb->data - skb_headlen(skb))) {
- return 1;
- } else {
- return 0;
- }
-}
-
-inline int skb_checksum_setup(struct sk_buff *skb)
-{
- if (skb->proto_csum_blank) {
- if (skb->protocol != htons(ETH_P_IP))
- goto out;
- if (!skb_pull_up_to(skb, skb->nh.iph + 1))
- goto out;
- skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
- switch (skb->nh.iph->protocol) {
- case IPPROTO_TCP:
- skb->csum = offsetof(struct tcphdr, check);
- break;
- case IPPROTO_UDP:
- skb->csum = offsetof(struct udphdr, check);
- break;
- default:
- if (net_ratelimit())
- printk(KERN_ERR "Attempting to checksum a non-"
- "TCP/UDP packet, dropping a protocol"
- " %d packet", skb->nh.iph->protocol);
- goto out;
- }
- if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2))
- goto out;
- skb->ip_summed = CHECKSUM_HW;
- skb->proto_csum_blank = 0;
- }
- return 0;
-out:
- return -EPROTO;
+out:
+ return -EPROTO;
}
#endif
-/* Takes ownership of 'skb' and transmits it to 'dp''s control path. 'reason'
- * indicates why 'skb' is being sent. 'max_len' sets the maximum number of
- * bytes that the caller wants to be sent; a value of 0 indicates the entire
- * packet should be sent. */
int
-dp_output_control(struct datapath *dp, struct sk_buff *skb,
- size_t max_len, int reason)
+dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
+ u32 arg)
{
- /* FIXME? Can we avoid creating a new skbuff in the case where we
- * forward the whole packet? */
- struct sk_buff *f_skb;
- struct ofp_packet_in *opi;
- size_t fwd_len, opi_len;
- uint32_t buffer_id;
+ struct dp_stats_percpu *stats;
+ struct sk_buff_head *queue;
+ struct odp_msg *header;
int err;
WARN_ON_ONCE(skb_shared(skb));
+ BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR);
+
+ queue = &dp->queues[queue_no];
+ err = -ENOBUFS;
+ if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
+ goto err;
#ifdef CONFIG_XEN
/* If a checksum-deferred packet is forwarded to the controller,
*/
err = skb_checksum_setup(skb);
if (err)
- goto out;
+ goto err;
if (skb->ip_summed == CHECKSUM_HW) {
err = skb_checksum_help(skb, 0);
if (err)
- goto out;
+ goto err;
}
#endif
- buffer_id = fwd_save_skb(skb);
-
- fwd_len = skb->len;
- if ((buffer_id != (uint32_t) -1) && max_len)
- fwd_len = min(fwd_len, max_len);
+ err = skb_cow(skb, sizeof *header);
+ if (err)
+ goto err;
+
+ header = (struct odp_msg*)__skb_push(skb, sizeof *header);
+ header->type = queue_no;
+ header->length = skb->len;
+ header->port = (skb->dev && skb->dev->br_port
+ ? skb->dev->br_port->port_no
+ : ODPP_LOCAL);
+ header->reserved = 0;
+ header->arg = arg;
+ skb_queue_tail(queue, skb);
+ wake_up_interruptible(&dp->waitqueue);
+ return 0;
- opi_len = offsetof(struct ofp_packet_in, data) + fwd_len;
- opi = alloc_openflow_skb(dp, opi_len, OFPT_PACKET_IN, NULL, &f_skb);
- if (!opi) {
- err = -ENOMEM;
- goto out;
- }
- opi->buffer_id = htonl(buffer_id);
- opi->total_len = htons(skb->len);
- opi->in_port = htons(skb->dev && skb->dev->br_port
- ? skb->dev->br_port->port_no
- : OFPP_LOCAL);
- opi->reason = reason;
- opi->pad = 0;
- skb_copy_bits(skb, 0, opi->data, fwd_len);
- err = send_openflow_skb(dp, f_skb, NULL);
+err:
+ stats = percpu_ptr(dp->stats_percpu, get_cpu());
+ stats->n_lost++;
+ put_cpu();
-out:
kfree_skb(skb);
return err;
}
-static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
+static int flush_flows(struct datapath *dp)
{
- unsigned long flags;
- desc->port_no = htons(p->port_no);
- strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
- desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
- memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
- desc->curr = 0;
- desc->supported = 0;
- desc->advertised = 0;
- desc->peer = 0;
-
- spin_lock_irqsave(&p->lock, flags);
- desc->config = htonl(p->config);
- desc->state = htonl(p->state);
- spin_unlock_irqrestore(&p->lock, flags);
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
- if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
- struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
-
- if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
- /* Set the supported features */
- if (ecmd.supported & SUPPORTED_10baseT_Half)
- desc->supported |= OFPPF_10MB_HD;
- if (ecmd.supported & SUPPORTED_10baseT_Full)
- desc->supported |= OFPPF_10MB_FD;
- if (ecmd.supported & SUPPORTED_100baseT_Half)
- desc->supported |= OFPPF_100MB_HD;
- if (ecmd.supported & SUPPORTED_100baseT_Full)
- desc->supported |= OFPPF_100MB_FD;
- if (ecmd.supported & SUPPORTED_1000baseT_Half)
- desc->supported |= OFPPF_1GB_HD;
- if (ecmd.supported & SUPPORTED_1000baseT_Full)
- desc->supported |= OFPPF_1GB_FD;
- if (ecmd.supported & SUPPORTED_10000baseT_Full)
- desc->supported |= OFPPF_10GB_FD;
- if (ecmd.supported & SUPPORTED_TP)
- desc->supported |= OFPPF_COPPER;
- if (ecmd.supported & SUPPORTED_FIBRE)
- desc->supported |= OFPPF_FIBER;
- if (ecmd.supported & SUPPORTED_Autoneg)
- desc->supported |= OFPPF_AUTONEG;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
- if (ecmd.supported & SUPPORTED_Pause)
- desc->supported |= OFPPF_PAUSE;
- if (ecmd.supported & SUPPORTED_Asym_Pause)
- desc->supported |= OFPPF_PAUSE_ASYM;
-#endif /* kernel >= 2.6.14 */
-
- /* Set the advertised features */
- if (ecmd.advertising & ADVERTISED_10baseT_Half)
- desc->advertised |= OFPPF_10MB_HD;
- if (ecmd.advertising & ADVERTISED_10baseT_Full)
- desc->advertised |= OFPPF_10MB_FD;
- if (ecmd.advertising & ADVERTISED_100baseT_Half)
- desc->advertised |= OFPPF_100MB_HD;
- if (ecmd.advertising & ADVERTISED_100baseT_Full)
- desc->advertised |= OFPPF_100MB_FD;
- if (ecmd.advertising & ADVERTISED_1000baseT_Half)
- desc->advertised |= OFPPF_1GB_HD;
- if (ecmd.advertising & ADVERTISED_1000baseT_Full)
- desc->advertised |= OFPPF_1GB_FD;
- if (ecmd.advertising & ADVERTISED_10000baseT_Full)
- desc->advertised |= OFPPF_10GB_FD;
- if (ecmd.advertising & ADVERTISED_TP)
- desc->advertised |= OFPPF_COPPER;
- if (ecmd.advertising & ADVERTISED_FIBRE)
- desc->advertised |= OFPPF_FIBER;
- if (ecmd.advertising & ADVERTISED_Autoneg)
- desc->advertised |= OFPPF_AUTONEG;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
- if (ecmd.advertising & ADVERTISED_Pause)
- desc->advertised |= OFPPF_PAUSE;
- if (ecmd.advertising & ADVERTISED_Asym_Pause)
- desc->advertised |= OFPPF_PAUSE_ASYM;
-#endif /* kernel >= 2.6.14 */
-
- /* Set the current features */
- if (ecmd.speed == SPEED_10)
- desc->curr = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD;
- else if (ecmd.speed == SPEED_100)
- desc->curr = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD;
- else if (ecmd.speed == SPEED_1000)
- desc->curr = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD;
- else if (ecmd.speed == SPEED_10000)
- desc->curr = OFPPF_10GB_FD;
-
- if (ecmd.port == PORT_TP)
- desc->curr |= OFPPF_COPPER;
- else if (ecmd.port == PORT_FIBRE)
- desc->curr |= OFPPF_FIBER;
-
- if (ecmd.autoneg)
- desc->curr |= OFPPF_AUTONEG;
- }
- }
-#endif
- desc->curr = htonl(desc->curr);
- desc->supported = htonl(desc->supported);
- desc->advertised = htonl(desc->advertised);
- desc->peer = htonl(desc->peer);
+ dp->n_flows = 0;
+ return dp_table_flush(dp);
}
-static int
-fill_features_reply(struct datapath *dp, struct ofp_switch_features *ofr)
+static int validate_actions(const struct sw_flow_actions *actions)
{
- struct net_bridge_port *p;
- uint64_t dpid = get_datapath_id(dp->netdev);
- int port_count = 0;
+ unsigned int i;
- ofr->datapath_id = cpu_to_be64(dpid);
+ for (i = 0; i < actions->n_actions; i++) {
+ const union odp_action *a = &actions->actions[i];
+ switch (a->type) {
+ case ODPAT_SNAT:
+ if (a->snat.port >= DP_MAX_PORTS)
+ return -EINVAL;
+#ifndef SUPPORT_SNAT
+ if (net_ratelimit())
+ printk(KERN_ERR "SNAT not supported\n");
+ return -EOPNOTSUPP;
+#endif
- ofr->n_buffers = htonl(N_PKT_BUFFERS);
- ofr->n_tables = dp->chain->n_tables;
- ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES);
- ofr->actions = htonl(OFP_SUPPORTED_ACTIONS);
- memset(ofr->pad, 0, sizeof ofr->pad);
+ case ODPAT_OUTPUT:
+ if (a->output.port >= DP_MAX_PORTS)
+ return -EINVAL;
+ break;
- list_for_each_entry_rcu (p, &dp->port_list, node) {
- fill_port_desc(p, &ofr->ports[port_count]);
- port_count++;
+ case ODPAT_OUTPUT_GROUP:
+ if (a->output_group.group >= DP_MAX_GROUPS)
+ return -EINVAL;
+ break;
+
+ default:
+ if (a->type >= ODPAT_N_ACTIONS)
+ return -EOPNOTSUPP;
+ break;
+ }
}
- return port_count;
+ return 0;
}
-int
-dp_send_features_reply(struct datapath *dp, const struct sender *sender)
+static int set_flow_actions(struct datapath *dp, struct odp_flow __user *ufp)
{
- struct sk_buff *skb;
- struct ofp_switch_features *ofr;
- size_t ofr_len, port_max_len;
- int port_count;
-
- /* Overallocate. */
- port_max_len = sizeof(struct ofp_phy_port) * DP_MAX_PORTS;
- ofr = alloc_openflow_skb(dp, sizeof(*ofr) + port_max_len,
- OFPT_FEATURES_REPLY, sender, &skb);
- if (!ofr)
- return -ENOMEM;
-
- /* Fill. */
- port_count = fill_features_reply(dp, ofr);
-
- /* Shrink to fit. */
- ofr_len = sizeof(*ofr) + (sizeof(struct ofp_phy_port) * port_count);
- resize_openflow_skb(skb, &ofr->header, ofr_len);
- return send_openflow_skb(dp, skb, sender);
-}
+ struct sw_flow_actions *new_acts, *old_acts;
+ struct sw_flow *flow;
+ struct odp_flow uf;
+ int error;
-int
-dp_send_config_reply(struct datapath *dp, const struct sender *sender)
-{
- struct sk_buff *skb;
- struct ofp_switch_config *osc;
+ error = -EFAULT;
+ if (copy_from_user(&uf, ufp, sizeof uf))
+ goto error;
- osc = alloc_openflow_skb(dp, sizeof *osc, OFPT_GET_CONFIG_REPLY, sender,
- &skb);
- if (!osc)
- return -ENOMEM;
+ /* Get actions. */
+ new_acts = flow_actions_alloc(uf.n_actions);
+ error = -ENOMEM;
+ if (!new_acts)
+ goto error;
+ if (copy_from_user(new_acts->actions, uf.actions,
+ uf.n_actions * sizeof *uf.actions))
+ goto error_free_actions;
+ error = validate_actions(new_acts);
+ if (error)
+ goto error_free_actions;
+
+ /* Replace actions. */
+ flow = dp_table_lookup(dp->table, &uf.key);
+ error = -ENOENT;
+ if (!flow)
+ goto error_free_actions;
+ old_acts = rcu_dereference(flow->sf_acts);
+ rcu_assign_pointer(flow->sf_acts, new_acts);
+ synchronize_rcu(); /* XXX expensive! */
+ kfree(old_acts);
- osc->flags = htons(dp->flags);
- osc->miss_send_len = htons(dp->miss_send_len);
+ return 0;
- return send_openflow_skb(dp, skb, sender);
+error_free_actions:
+ kfree(new_acts);
+error:
+ return error;
}
-int
-dp_send_hello(struct datapath *dp, const struct sender *sender,
- const struct ofp_header *request)
+static int put_stats(struct sw_flow *flow, struct __user odp_flow *ufp)
{
- if (request->version < OFP_VERSION) {
- char err[64];
- sprintf(err, "Only version 0x%02x supported", OFP_VERSION);
- dp_send_error_msg(dp, sender, OFPET_HELLO_FAILED,
- OFPHFC_INCOMPATIBLE, err, strlen(err));
- return -EINVAL;
+ struct odp_flow_stats stats;
+ unsigned long flags;
+
+ if (flow->used.tv_sec) {
+ stats.used_sec = flow->used.tv_sec;
+ stats.used_nsec = flow->used.tv_nsec;
} else {
- struct sk_buff *skb;
- struct ofp_header *reply;
+ stats.used_sec = 0;
+ stats.used_nsec = 0;
+ }
- reply = alloc_openflow_skb(dp, sizeof *reply,
- OFPT_HELLO, sender, &skb);
- if (!reply)
- return -ENOMEM;
+ spin_lock_irqsave(&flow->lock, flags);
+ stats.n_packets = flow->packet_count;
+ stats.n_bytes = flow->byte_count;
+ stats.ip_tos = flow->ip_tos;
+ stats.tcp_flags = flow->tcp_flags;
+ spin_unlock_irqrestore(&flow->lock, flags);
- return send_openflow_skb(dp, skb, sender);
- }
+ return __copy_to_user(&ufp->stats, &stats, sizeof ufp->stats);
}
-int
-dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
+static int add_flow(struct datapath *dp, struct odp_flow __user *ufp)
{
- unsigned long int flags;
- int port_no = ntohs(opm->port_no);
- struct net_bridge_port *p;
- p = (port_no < DP_MAX_PORTS ? dp->ports[port_no]
- : port_no == OFPP_LOCAL ? dp->local_port
- : NULL);
+ struct odp_flow uf;
+ struct sw_flow *flow, **bucket;
+ struct dp_table *table;
+ struct sw_flow_actions *sf_acts;
+ int error;
- /* Make sure the port id hasn't changed since this was sent */
- if (!p || memcmp(opm->hw_addr, p->dev->dev_addr, ETH_ALEN))
- return -1;
+ error = -EFAULT;
+ if (copy_from_user(&uf, ufp, sizeof uf))
+ goto error;
- spin_lock_irqsave(&p->lock, flags);
- if (opm->mask) {
- uint32_t config_mask = ntohl(opm->mask);
- p->config &= ~config_mask;
- p->config |= ntohl(opm->config) & config_mask;
+ flow = flow_alloc(uf.n_actions);
+ if (flow == NULL)
+ goto error;
+ sf_acts = rcu_dereference(flow->sf_acts);
+
+ /* Initialize flow. */
+ flow->key = uf.key;
+ if (copy_from_user(sf_acts->actions, uf.actions,
+ uf.n_actions * sizeof *uf.actions))
+ goto error_free_flow;
+ error = validate_actions(sf_acts);
+ if (error)
+ goto error_free_flow;
+
+ flow->used.tv_sec = flow->used.tv_nsec = 0;
+ flow->tcp_flags = 0;
+ flow->ip_tos = 0;
+ spin_lock_init(&flow->lock);
+ flow->packet_count = 0;
+ flow->byte_count = 0;
+
+ /* Add to table. */
+ table = rcu_dereference(dp->table);
+ if (dp->n_flows * 4 >= table->n_buckets &&
+ table->n_buckets < DP_MAX_BUCKETS) {
+ error = dp_table_expand(dp);
+ if (error)
+ goto error_free_flow;
+ table = dp->table;
}
- spin_unlock_irqrestore(&p->lock, flags);
- return 0;
+ bucket = dp_table_lookup_for_insert(table, flow);
+ error = -EXFULL;
+ if (!bucket)
+ goto error_free_flow;
+ else if (!*bucket) {
+ error = 0;
+ rcu_assign_pointer(*bucket, flow);
+ dp->n_flows++;
+ } else {
+ /* Replace 'old_flow' by 'flow'. */
+ struct sw_flow *old_flow = *rcu_dereference(bucket);
+ rcu_assign_pointer(*bucket, flow);
+ synchronize_rcu(); /* XXX expensive! */
+ error = put_stats(old_flow, ufp) ? -EFAULT : 0;
+ flow_free(old_flow);
+ }
+
+ return error;
+
+error_free_flow:
+ flow_free(flow);
+error:
+ return error;
}
-/* Initialize the port status field of the bridge port. */
-static void
-init_port_status(struct net_bridge_port *p)
+static int put_actions(const struct sw_flow *flow, struct odp_flow __user *ufp)
{
- unsigned long int flags;
+ union odp_action __user *actions;
+ struct sw_flow_actions *sf_acts;
+ u32 n_actions;
- spin_lock_irqsave(&p->lock, flags);
+ if (__get_user(actions, &ufp->actions) ||
+ __get_user(n_actions, &ufp->n_actions))
+ return -EFAULT;
- if (p->dev->flags & IFF_UP)
- p->config &= ~OFPPC_PORT_DOWN;
- else
- p->config |= OFPPC_PORT_DOWN;
+ if (!n_actions)
+ return 0;
+ if (ufp->n_actions > INT_MAX / sizeof(union odp_action))
+ return -EINVAL;
- if (netif_carrier_ok(p->dev))
- p->state &= ~OFPPS_LINK_DOWN;
- else
- p->state |= OFPPS_LINK_DOWN;
+ sf_acts = rcu_dereference(flow->sf_acts);
+ if (__put_user(sf_acts->n_actions, &ufp->n_actions) ||
+ (actions && copy_to_user(actions, sf_acts->actions,
+ sizeof(union odp_action) *
+ min(sf_acts->n_actions, n_actions))))
+ return -EFAULT;
- spin_unlock_irqrestore(&p->lock, flags);
+ return 0;
}
-int
-dp_send_port_status(struct net_bridge_port *p, uint8_t status)
+static int answer_query(struct sw_flow *flow, struct odp_flow __user *ufp)
{
- struct sk_buff *skb;
- struct ofp_port_status *ops;
-
- ops = alloc_openflow_skb(p->dp, sizeof *ops, OFPT_PORT_STATUS, NULL,
- &skb);
- if (!ops)
- return -ENOMEM;
- ops->reason = status;
- memset(ops->pad, 0, sizeof ops->pad);
- fill_port_desc(p, &ops->desc);
-
- return send_openflow_skb(p->dp, skb, NULL);
+ if (put_stats(flow, ufp))
+ return -EFAULT;
+ return put_actions(flow, ufp);
}
-/* Convert jiffies_64 to milliseconds. */
-static u64 inline jiffies_64_to_msecs(const u64 j)
+static int del_or_query_flow(struct datapath *dp,
+ struct odp_flow __user *ufp,
+ unsigned int cmd)
{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
- return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
- return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
-#else
- return (j * MSEC_PER_SEC) / HZ;
-#endif
+ struct dp_table *table = rcu_dereference(dp->table);
+ struct odp_flow uf;
+ struct sw_flow *flow;
+ int error;
+
+ error = -EFAULT;
+ if (copy_from_user(&uf, ufp, sizeof uf))
+ goto error;
+
+ flow = dp_table_lookup(table, &uf.key);
+ error = -ENOENT;
+ if (!flow)
+ goto error;
+
+ if (cmd == ODP_FLOW_DEL) {
+ /* XXX redundant lookup */
+ error = dp_table_delete(table, flow);
+ if (error)
+ goto error;
+ dp->n_flows--;
+ synchronize_rcu(); /* XXX expensive! */
+ error = answer_query(flow, ufp);
+ flow_free(flow);
+ } else {
+ error = answer_query(flow, ufp);
+ }
+
+error:
+ return error;
}
-int
-dp_send_flow_end(struct datapath *dp, struct sw_flow *flow,
- enum nx_flow_end_reason reason)
+static int query_multiple_flows(struct datapath *dp,
+ const struct odp_flowvec *flowvec)
{
- struct sk_buff *skb;
- struct nx_flow_end *nfe;
-
- if (!dp->send_flow_end)
- return 0;
+ struct dp_table *table = rcu_dereference(dp->table);
+ int i;
+ for (i = 0; i < flowvec->n_flows; i++) {
+ struct __user odp_flow *ufp = &flowvec->flows[i];
+ struct odp_flow uf;
+ struct sw_flow *flow;
+ int error;
- nfe = alloc_openflow_skb(dp, sizeof *nfe, OFPT_VENDOR, 0, &skb);
- if (!nfe)
- return -ENOMEM;
+ if (__copy_from_user(&uf, ufp, sizeof uf))
+ return -EFAULT;
- nfe->header.vendor = htonl(NX_VENDOR_ID);
- nfe->header.subtype = htonl(NXT_FLOW_END);
+ flow = dp_table_lookup(table, &uf.key);
+ if (!flow)
+ error = __clear_user(&ufp->stats, sizeof ufp->stats);
+ else
+ error = answer_query(flow, ufp);
+ if (error)
+ return -EFAULT;
+ }
+ return flowvec->n_flows;
+}
- flow_fill_match(&nfe->match, &flow->key);
+struct list_flows_cbdata {
+ struct odp_flow __user *uflows;
+ int n_flows;
+ int listed_flows;
+};
- nfe->priority = htons(flow->priority);
- nfe->reason = reason;
+static int list_flow(struct sw_flow *flow, void *cbdata_)
+{
+ struct list_flows_cbdata *cbdata = cbdata_;
+ struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
+ int error;
- nfe->tcp_flags = flow->tcp_flags;
- nfe->ip_tos = flow->ip_tos;
+ if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
+ return -EFAULT;
+ error = answer_query(flow, ufp);
+ if (error)
+ return error;
- memset(nfe->pad, 0, sizeof nfe->pad);
+ if (cbdata->listed_flows >= cbdata->n_flows)
+ return cbdata->listed_flows;
+ return 0;
+}
- nfe->init_time = cpu_to_be64(jiffies_64_to_msecs(flow->created));
- nfe->used_time = cpu_to_be64(jiffies_64_to_msecs(flow->used));
- nfe->end_time = cpu_to_be64(jiffies_64_to_msecs(get_jiffies_64()));
+static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
+{
+ struct list_flows_cbdata cbdata;
+ int error;
- nfe->packet_count = cpu_to_be64(flow->packet_count);
- nfe->byte_count = cpu_to_be64(flow->byte_count);
+ if (!flowvec->n_flows)
+ return 0;
- return send_openflow_skb(dp, skb, NULL);
+ cbdata.uflows = flowvec->flows;
+ cbdata.n_flows = flowvec->n_flows;
+ cbdata.listed_flows = 0;
+ error = dp_table_foreach(rcu_dereference(dp->table),
+ list_flow, &cbdata);
+ return error ? error : cbdata.listed_flows;
}
-EXPORT_SYMBOL(dp_send_flow_end);
-int
-dp_send_error_msg(struct datapath *dp, const struct sender *sender,
- uint16_t type, uint16_t code, const void *data, size_t len)
+static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
+ int (*function)(struct datapath *,
+ const struct odp_flowvec *))
{
- struct sk_buff *skb;
- struct ofp_error_msg *oem;
+ struct odp_flowvec __user *uflowvec;
+ struct odp_flowvec flowvec;
+ int retval;
+ uflowvec = (struct odp_flowvec __user *)argp;
+ if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) ||
+ copy_from_user(&flowvec, uflowvec, sizeof flowvec))
+ return -EFAULT;
- oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR,
- sender, &skb);
- if (!oem)
- return -ENOMEM;
+ if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow))
+ return -EINVAL;
- oem->type = htons(type);
- oem->code = htons(code);
- memcpy(oem->data, data, len);
+ if (!access_ok(VERIFY_WRITE, flowvec.flows,
+ flowvec.n_flows * sizeof(struct odp_flow)))
+ return -EFAULT;
- return send_openflow_skb(dp, skb, sender);
+ retval = function(dp, &flowvec);
+ return (retval < 0 ? retval
+ : retval == flowvec.n_flows ? 0
+ : __put_user(retval, &uflowvec->n_flows));
}
-int
-dp_send_echo_reply(struct datapath *dp, const struct sender *sender,
- const struct ofp_header *rq)
+static int do_execute(struct datapath *dp, const struct odp_execute *executep)
{
+ struct odp_execute execute;
+ struct odp_flow_key key;
struct sk_buff *skb;
- struct ofp_header *reply;
-
- reply = alloc_openflow_skb(dp, ntohs(rq->length), OFPT_ECHO_REPLY,
- sender, &skb);
- if (!reply)
- return -ENOMEM;
+ struct sw_flow_actions *actions;
+ int err;
- memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq);
- return send_openflow_skb(dp, skb, sender);
-}
+ err = -EFAULT;
+ if (copy_from_user(&execute, executep, sizeof execute))
+ goto error;
-/* Generic Netlink interface.
- *
- * See netlink(7) for an introduction to netlink. See
- * http://linux-net.osdl.org/index.php/Netlink for more information and
- * pointers on how to work with netlink and Generic Netlink in the kernel and
- * in userspace. */
-
-static struct genl_family dp_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = DP_GENL_FAMILY_NAME,
- .version = 1,
- .maxattr = DP_GENL_A_MAX,
-};
+ err = -EINVAL;
+ if (execute.length < ETH_HLEN || execute.length > 65535)
+ goto error;
-/* Attribute policy: what each attribute may contain. */
-static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = {
- [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
- [DP_GENL_A_DP_NAME] = { .type = NLA_NUL_STRING },
- [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 },
- [DP_GENL_A_PORTNAME] = { .type = NLA_NUL_STRING }
-};
+ err = -ENOMEM;
+ actions = flow_actions_alloc(execute.n_actions);
+ if (!actions)
+ goto error;
-static int dp_genl_add(struct sk_buff *skb, struct genl_info *info)
-{
- int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ?
- nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1;
- const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ?
- nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL;
+ err = -EFAULT;
+ if (copy_from_user(actions->actions, execute.actions,
+ execute.n_actions * sizeof *execute.actions))
+ goto error_free_actions;
- if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME]))
- return -EINVAL;
+ err = validate_actions(actions);
+ if (err)
+ goto error_free_actions;
- if ((dp_idx == -1) && (!dp_name))
- return -EINVAL;
+ err = -ENOMEM;
+ skb = alloc_skb(execute.length, GFP_KERNEL);
+ if (!skb)
+ goto error_free_actions;
+ if (execute.in_port < DP_MAX_PORTS) {
+ struct net_bridge_port *p = dp->ports[execute.in_port];
+ if (p)
+ skb->dev = p->dev;
+ }
- return new_dp(dp_idx, dp_name);
-}
+ err = -EFAULT;
+ if (copy_from_user(skb_put(skb, execute.length), execute.data,
+ execute.length))
+ goto error_free_skb;
-static struct genl_ops dp_genl_ops_add_dp = {
- .cmd = DP_GENL_C_ADD_DP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_policy,
- .doit = dp_genl_add,
- .dumpit = NULL,
-};
+ flow_extract(skb, execute.in_port, &key);
+ err = execute_actions(dp, skb, &key, actions, GFP_KERNEL);
+ kfree(actions);
+ return err;
-/* Must be called with rcu_read_lock or dp_mutex. */
-struct datapath *dp_get_by_idx(int dp_idx)
-{
- if (dp_idx < 0 || dp_idx >= DP_MAX)
- return NULL;
- return rcu_dereference(dps[dp_idx]);
+error_free_skb:
+ kfree_skb(skb);
+error_free_actions:
+ kfree(actions);
+error:
+ return err;
}
-EXPORT_SYMBOL(dp_get_by_idx);
-/* Must be called with rcu_read_lock or dp_mutex. */
-struct datapath *dp_get_by_name(const char *dp_name)
+static int
+get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
{
+ struct odp_stats stats;
int i;
- for (i=0; i<DP_MAX; i++) {
- struct datapath *dp = rcu_dereference(dps[i]);
- if (dp && !strcmp(dp->netdev->name, dp_name))
- return dp;
+
+ stats.n_flows = dp->n_flows;
+ stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2;
+ stats.max_capacity = DP_MAX_BUCKETS * 2;
+ stats.n_ports = dp->n_ports;
+ stats.max_ports = DP_MAX_PORTS;
+ stats.max_groups = DP_MAX_GROUPS;
+ stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
+ for_each_possible_cpu(i) {
+ const struct dp_stats_percpu *s;
+ s = percpu_ptr(dp->stats_percpu, i);
+ stats.n_frags += s->n_frags;
+ stats.n_hit += s->n_hit;
+ stats.n_missed += s->n_missed;
+ stats.n_lost += s->n_lost;
}
- return NULL;
+ return copy_to_user(statsp, &stats, sizeof stats);
}
-/* Must be called with rcu_read_lock or dp_mutex. */
-static struct datapath *
-lookup_dp(struct genl_info *info)
+static int
+put_port(const struct net_bridge_port *p, struct odp_port __user *uop)
{
- int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ?
- nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1;
- const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ?
- nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL;
-
- if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME]))
- return ERR_PTR(-EINVAL);
-
- if (dp_idx != -1) {
- struct datapath *dp = dp_get_by_idx(dp_idx);
- if (!dp)
- return ERR_PTR(-ENOENT);
- else if (dp_name && strcmp(dp->netdev->name, dp_name))
- return ERR_PTR(-EINVAL);
- else
- return dp;
- } else if (dp_name) {
- struct datapath *dp = dp_get_by_name(dp_name);
- return dp ? dp : ERR_PTR(-ENOENT);
- } else {
- return ERR_PTR(-EINVAL);
- }
+ struct odp_port op;
+ memset(&op, 0, sizeof op);
+ strncpy(op.devname, p->dev->name, sizeof op.devname);
+ op.port = p->port_no;
+ return copy_to_user(uop, &op, sizeof op);
}
-static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
+static int
+query_port(struct datapath *dp, struct odp_port __user *uport)
{
- struct net_device *dev = NULL;
- struct datapath *dp;
- int err;
+ struct odp_port port;
- rtnl_lock();
- mutex_lock(&dp_mutex);
- dp = lookup_dp(info);
- if (IS_ERR(dp))
- err = PTR_ERR(dp);
- else {
- dev = dp->netdev;
- del_dp(dp);
- err = 0;
- }
- mutex_unlock(&dp_mutex);
- rtnl_unlock();
- if (dev)
- free_netdev(dev);
- return err;
-}
+ if (copy_from_user(&port, uport, sizeof port))
+ return -EFAULT;
+ if (port.devname[0]) {
+ struct net_bridge_port *p;
+ struct net_device *dev;
+ int err;
-static struct genl_ops dp_genl_ops_del_dp = {
- .cmd = DP_GENL_C_DEL_DP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_policy,
- .doit = dp_genl_del,
- .dumpit = NULL,
-};
+ port.devname[IFNAMSIZ - 1] = '\0';
-/* Queries a datapath for related information. Currently the only relevant
- * information is the datapath's multicast group ID, datapath ID, and
- * datapath device name. */
-static int dp_genl_query(struct sk_buff *skb, struct genl_info *info)
-{
- struct datapath *dp;
- struct sk_buff *ans_skb = NULL;
- int err;
+ dev = dev_get_by_name(&init_net, port.devname);
+ if (!dev)
+ return -ENODEV;
- rcu_read_lock();
- dp = lookup_dp(info);
- if (IS_ERR(dp))
- err = PTR_ERR(dp);
- else {
- void *data;
- ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
- if (!ans_skb) {
- err = -ENOMEM;
- goto err;
- }
- err = -ENOMEM;
- data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
- 0, DP_GENL_C_QUERY_DP);
- if (data == NULL)
- goto err;
- NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
- NLA_PUT_STRING(ans_skb, DP_GENL_A_DP_NAME, dp->netdev->name);
- NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, dp_mc_group(dp));
+ p = dev->br_port;
+ err = p && p->dp == dp ? put_port(p, uport) : -ENOENT;
+ dev_put(dev);
- genlmsg_end(ans_skb, data);
- err = genlmsg_reply(ans_skb, info);
- ans_skb = NULL;
+ return err;
+ } else {
+ if (port.port >= DP_MAX_PORTS)
+ return -EINVAL;
+ if (!dp->ports[port.port])
+ return -ENOENT;
+ return put_port(dp->ports[port.port], uport);
}
-err:
-nla_put_failure:
- kfree_skb(ans_skb);
- rcu_read_unlock();
- return err;
}
-static struct genl_ops dp_genl_ops_query_dp = {
- .cmd = DP_GENL_C_QUERY_DP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_policy,
- .doit = dp_genl_query,
- .dumpit = NULL,
-};
-
-static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
+static int
+list_ports(struct datapath *dp, struct odp_portvec __user *pvp)
{
- struct datapath *dp;
- struct net_device *port;
- int err;
-
- if (!info->attrs[DP_GENL_A_PORTNAME] ||
- VERIFY_NUL_STRING(info->attrs[DP_GENL_A_PORTNAME]))
- return -EINVAL;
-
- rtnl_lock();
- mutex_lock(&dp_mutex);
-
- /* Get datapath. */
- dp = lookup_dp(info);
- if (IS_ERR(dp)) {
- err = PTR_ERR(dp);
- goto out_unlock;
- }
-
- /* Get interface to add/remove. */
- port = dev_get_by_name(&init_net,
- nla_data(info->attrs[DP_GENL_A_PORTNAME]));
- if (!port) {
- err = -ENOENT;
- goto out_unlock;
- }
-
- /* Execute operation. */
- if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT)
- err = add_switch_port(dp, port);
- else {
- if (port->br_port == NULL || port->br_port->dp != dp) {
- err = -ENOENT;
- goto out_put;
+ struct odp_portvec pv;
+ struct net_bridge_port *p;
+ int idx;
+
+ if (copy_from_user(&pv, pvp, sizeof pv))
+ return -EFAULT;
+
+ idx = 0;
+ if (pv.n_ports) {
+ list_for_each_entry_rcu (p, &dp->port_list, node) {
+ if (put_port(p, &pv.ports[idx]))
+ return -EFAULT;
+ if (idx++ >= pv.n_ports)
+ break;
}
- err = dp_del_switch_port(port->br_port);
}
-
-out_put:
- dev_put(port);
-out_unlock:
- mutex_unlock(&dp_mutex);
- rtnl_unlock();
- return err;
+ return put_user(idx, &pvp->n_ports);
}
-static struct genl_ops dp_genl_ops_add_port = {
- .cmd = DP_GENL_C_ADD_PORT,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_policy,
- .doit = dp_genl_add_del_port,
- .dumpit = NULL,
-};
-
-static struct genl_ops dp_genl_ops_del_port = {
- .cmd = DP_GENL_C_DEL_PORT,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_policy,
- .doit = dp_genl_add_del_port,
- .dumpit = NULL,
-};
-
-static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
+static int
+set_port_group(struct datapath *dp, const struct odp_port_group __user *upg)
{
- struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW];
- struct datapath *dp;
- struct ofp_header *oh;
- struct sender sender;
- int err;
-
- if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
- return -EINVAL;
+ struct odp_port_group pg;
+ struct odp_port_group *pgp, *old_pg;
+ int error;
- dp = dp_get_by_idx(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
- if (!dp)
- return -ENOENT;
-
- if (nla_len(va) < sizeof(struct ofp_header))
- return -EINVAL;
- oh = nla_data(va);
-
- sender.xid = oh->xid;
- sender.pid = info->snd_pid;
- sender.seq = info->snd_seq;
-
- mutex_lock(&dp_mutex);
- err = fwd_control_input(dp->chain, &sender,
- nla_data(va), nla_len(va));
- mutex_unlock(&dp_mutex);
- return err;
-}
-
-static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
- [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
-};
+ error = -EFAULT;
+ if (copy_from_user(&pg, upg, sizeof pg))
+ goto error;
-static int desc_stats_dump(struct datapath *dp, void *state,
- void *body, int *body_len)
-{
- struct ofp_desc_stats *ods = body;
- int n_bytes = sizeof *ods;
+ error = -EINVAL;
+ if (pg.n_ports > DP_MAX_PORTS || pg.group >= DP_MAX_GROUPS)
+ goto error;
- if (n_bytes > *body_len) {
- return -ENOBUFS;
+ error = -ENOMEM;
+ pgp = kmalloc(sizeof *pgp, GFP_KERNEL);
+ if (!pgp)
+ goto error;
+ pgp->ports = kmalloc(sizeof(u16) * pg.n_ports, GFP_KERNEL);
+ if (!pgp->ports)
+ goto error_free_pgp;
+
+ pgp->n_ports = pg.n_ports;
+ error = -EFAULT;
+ if (copy_from_user(pgp->ports, pg.ports, sizeof(u16) * pg.n_ports))
+ goto error_free_pgp_ports;
+
+ old_pg = rcu_dereference(dp->groups[pg.group]);
+ rcu_assign_pointer(dp->groups[pg.group], pgp);
+ if (old_pg) {
+ synchronize_rcu(); /* XXX expensive! */
+ kfree(old_pg->ports);
+ kfree(old_pg);
}
- *body_len = n_bytes;
-
- strncpy(ods->mfr_desc, mfr_desc, sizeof ods->mfr_desc);
- strncpy(ods->hw_desc, hw_desc, sizeof ods->hw_desc);
- strncpy(ods->sw_desc, sw_desc, sizeof ods->sw_desc);
- strncpy(ods->serial_num, serial_num, sizeof ods->serial_num);
-
return 0;
-}
-
-struct flow_stats_state {
- int table_idx;
- struct sw_table_position position;
- const struct ofp_flow_stats_request *rq;
- void *body;
- int bytes_used, bytes_allocated;
-};
-
-static int flow_stats_init(struct datapath *dp, const void *body, int body_len,
- void **state)
-{
- const struct ofp_flow_stats_request *fsr = body;
- struct flow_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC);
- if (!s)
- return -ENOMEM;
- s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id;
- memset(&s->position, 0, sizeof s->position);
- s->rq = fsr;
- *state = s;
- return 0;
+error_free_pgp_ports:
+ kfree(pgp->ports);
+error_free_pgp:
+ kfree(pgp);
+error:
+ return error;
}
-static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
+static int
+get_port_group(struct datapath *dp, struct odp_port_group *upg)
{
- struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts);
- struct flow_stats_state *s = private;
- struct ofp_flow_stats *ofs;
- int length;
- uint64_t duration;
-
- length = sizeof *ofs + sf_acts->actions_len;
- if (length + s->bytes_used > s->bytes_allocated)
- return 1;
+ struct odp_port_group pg, *g;
+ u16 n_copy;
- ofs = s->body + s->bytes_used;
- ofs->length = htons(length);
- ofs->table_id = s->table_idx;
- ofs->pad = 0;
- ofs->match.wildcards = htonl(flow->key.wildcards);
- ofs->match.in_port = flow->key.in_port;
- memcpy(ofs->match.dl_src, flow->key.dl_src, ETH_ALEN);
- memcpy(ofs->match.dl_dst, flow->key.dl_dst, ETH_ALEN);
- ofs->match.dl_vlan = flow->key.dl_vlan;
- ofs->match.dl_type = flow->key.dl_type;
- ofs->match.nw_src = flow->key.nw_src;
- ofs->match.nw_dst = flow->key.nw_dst;
- ofs->match.nw_proto = flow->key.nw_proto;
- ofs->match.pad = 0;
- ofs->match.tp_src = flow->key.tp_src;
- ofs->match.tp_dst = flow->key.tp_dst;
-
- /* The kernel doesn't support 64-bit division, so use the 'do_div'
- * macro instead. The first argument is replaced with the quotient,
- * while the remainder is the return value. */
- duration = get_jiffies_64() - flow->created;
- do_div(duration, HZ);
- ofs->duration = htonl(duration);
-
- ofs->priority = htons(flow->priority);
- ofs->idle_timeout = htons(flow->idle_timeout);
- ofs->hard_timeout = htons(flow->hard_timeout);
- memset(ofs->pad2, 0, sizeof ofs->pad2);
- ofs->packet_count = cpu_to_be64(flow->packet_count);
- ofs->byte_count = cpu_to_be64(flow->byte_count);
- memcpy(ofs->actions, sf_acts->actions, sf_acts->actions_len);
-
- s->bytes_used += length;
- return 0;
-}
+ if (copy_from_user(&pg, upg, sizeof pg))
+ return -EFAULT;
-static int flow_stats_dump(struct datapath *dp, void *state,
- void *body, int *body_len)
-{
- struct flow_stats_state *s = state;
- struct sw_flow_key match_key;
- int error = 0;
-
- s->bytes_used = 0;
- s->bytes_allocated = *body_len;
- s->body = body;
-
- flow_extract_match(&match_key, &s->rq->match);
- while (s->table_idx < dp->chain->n_tables
- && (s->rq->table_id == 0xff || s->rq->table_id == s->table_idx))
- {
- struct sw_table *table = dp->chain->tables[s->table_idx];
-
- error = table->iterate(table, &match_key, s->rq->out_port,
- &s->position, flow_stats_dump_callback, s);
- if (error)
- break;
+ if (pg.group >= DP_MAX_GROUPS)
+ return -EINVAL;
- s->table_idx++;
- memset(&s->position, 0, sizeof s->position);
- }
- *body_len = s->bytes_used;
-
- /* If error is 0, we're done.
- * Otherwise, if some bytes were used, there are more flows to come.
- * Otherwise, we were not able to fit even a single flow in the body,
- * which indicates that we have a single flow with too many actions to
- * fit. We won't ever make any progress at that rate, so give up. */
- return !error ? 0 : s->bytes_used ? 1 : -ENOMEM;
-}
+ g = dp->groups[pg.group];
+ n_copy = g ? min(g->n_ports, pg.n_ports) : 0;
+ if (n_copy && copy_to_user(pg.ports, g->ports, n_copy * sizeof(u16)))
+ return -EFAULT;
-static void flow_stats_done(void *state)
-{
- kfree(state);
-}
+ if (put_user(g ? g->n_ports : 0, &upg->n_ports))
+ return -EFAULT;
-static int aggregate_stats_init(struct datapath *dp,
- const void *body, int body_len,
- void **state)
-{
- *state = (void *)body;
return 0;
}
-static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private)
+static long openflow_ioctl(struct file *f, unsigned int cmd,
+ unsigned long argp)
{
- struct ofp_aggregate_stats_reply *rpy = private;
- rpy->packet_count += flow->packet_count;
- rpy->byte_count += flow->byte_count;
- rpy->flow_count++;
- return 0;
-}
+ int dp_idx = iminor(f->f_path.dentry->d_inode);
+ struct datapath *dp;
+ int drop_frags, listeners, port_no;
+#ifdef SUPPORT_SNAT
+ struct odp_snat_config osc;
+#endif
+ int err;
-static int aggregate_stats_dump(struct datapath *dp, void *state,
- void *body, int *body_len)
-{
- struct ofp_aggregate_stats_request *rq = state;
- struct ofp_aggregate_stats_reply *rpy;
- struct sw_table_position position;
- struct sw_flow_key match_key;
- int table_idx;
-
- if (*body_len < sizeof *rpy)
- return -ENOBUFS;
- rpy = body;
- *body_len = sizeof *rpy;
-
- memset(rpy, 0, sizeof *rpy);
-
- flow_extract_match(&match_key, &rq->match);
- table_idx = rq->table_id == 0xff ? 0 : rq->table_id;
- memset(&position, 0, sizeof position);
- while (table_idx < dp->chain->n_tables
- && (rq->table_id == 0xff || rq->table_id == table_idx))
- {
- struct sw_table *table = dp->chain->tables[table_idx];
- int error;
+ /* Handle commands with special locking requirements up front. */
+ switch (cmd) {
+ case ODP_DP_CREATE:
+ return create_dp(dp_idx, (char __user *)argp);
- error = table->iterate(table, &match_key, rq->out_port, &position,
- aggregate_stats_dump_callback, rpy);
- if (error)
- return error;
+ case ODP_DP_DESTROY:
+ return destroy_dp(dp_idx);
+
+ case ODP_PORT_ADD:
+ return add_port(dp_idx, (struct odp_port __user *)argp);
- table_idx++;
- memset(&position, 0, sizeof position);
+ case ODP_PORT_DEL:
+ err = get_user(port_no, (int __user *)argp);
+ if (err)
+ break;
+ return del_port(dp_idx, port_no);
}
- rpy->packet_count = cpu_to_be64(rpy->packet_count);
- rpy->byte_count = cpu_to_be64(rpy->byte_count);
- rpy->flow_count = htonl(rpy->flow_count);
- return 0;
-}
+ dp = get_dp_locked(dp_idx);
+ if (!dp)
+ return -ENODEV;
-static int table_stats_dump(struct datapath *dp, void *state,
- void *body, int *body_len)
-{
- struct ofp_table_stats *ots;
- int n_bytes = dp->chain->n_tables * sizeof *ots;
- int i;
- if (n_bytes > *body_len)
- return -ENOBUFS;
- *body_len = n_bytes;
- for (i = 0, ots = body; i < dp->chain->n_tables; i++, ots++) {
- struct sw_table_stats stats;
- dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
- strncpy(ots->name, stats.name, sizeof ots->name);
- ots->table_id = i;
- ots->wildcards = htonl(stats.wildcards);
- memset(ots->pad, 0, sizeof ots->pad);
- ots->max_entries = htonl(stats.max_flows);
- ots->active_count = htonl(stats.n_flows);
- ots->lookup_count = cpu_to_be64(stats.n_lookup);
- ots->matched_count = cpu_to_be64(stats.n_matched);
- }
- return 0;
-}
+ switch (cmd) {
+ case ODP_DP_STATS:
+ err = get_dp_stats(dp, (struct odp_stats __user *)argp);
+ break;
-struct port_stats_state {
- int port;
-};
+ case ODP_GET_DROP_FRAGS:
+ err = put_user(dp->drop_frags, (int __user *)argp);
+ break;
-static int port_stats_init(struct datapath *dp, const void *body, int body_len,
- void **state)
-{
- struct port_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC);
- if (!s)
- return -ENOMEM;
- s->port = 0;
- *state = s;
- return 0;
-}
+ case ODP_SET_DROP_FRAGS:
+ err = get_user(drop_frags, (int __user *)argp);
+ if (err)
+ break;
+ err = -EINVAL;
+ if (drop_frags != 0 && drop_frags != 1)
+ break;
+ dp->drop_frags = drop_frags;
+ err = 0;
+ break;
-static int port_stats_dump(struct datapath *dp, void *state,
- void *body, int *body_len)
-{
- struct port_stats_state *s = state;
- struct ofp_port_stats *ops;
- int n_ports, max_ports;
- int i;
+ case ODP_GET_LISTEN_MASK:
+ err = put_user((int)f->private_data, (int __user *)argp);
+ break;
- max_ports = *body_len / sizeof *ops;
- if (!max_ports)
- return -ENOMEM;
- ops = body;
-
- n_ports = 0;
- for (i = s->port; i < DP_MAX_PORTS && n_ports < max_ports; i++) {
- struct net_bridge_port *p = dp->ports[i];
- struct net_device_stats *stats;
- if (!p)
- continue;
- stats = p->dev->get_stats(p->dev);
- ops->port_no = htons(p->port_no);
- memset(ops->pad, 0, sizeof ops->pad);
- ops->rx_packets = cpu_to_be64(stats->rx_packets);
- ops->tx_packets = cpu_to_be64(stats->tx_packets);
- ops->rx_bytes = cpu_to_be64(stats->rx_bytes);
- ops->tx_bytes = cpu_to_be64(stats->tx_bytes);
- ops->rx_dropped = cpu_to_be64(stats->rx_dropped);
- ops->tx_dropped = cpu_to_be64(stats->tx_dropped);
- ops->rx_errors = cpu_to_be64(stats->rx_errors);
- ops->tx_errors = cpu_to_be64(stats->tx_errors);
- ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors);
- ops->rx_over_err = cpu_to_be64(stats->rx_over_errors);
- ops->rx_crc_err = cpu_to_be64(stats->rx_crc_errors);
- ops->collisions = cpu_to_be64(stats->collisions);
- n_ports++;
- ops++;
- }
- s->port = i;
- *body_len = n_ports * sizeof *ops;
- return n_ports >= max_ports;
-}
+ case ODP_SET_LISTEN_MASK:
+ err = get_user(listeners, (int __user *)argp);
+ if (err)
+ break;
+ err = -EINVAL;
+ if (listeners & ~ODPL_ALL)
+ break;
+ err = 0;
+ f->private_data = (void*)listeners;
+ break;
-static void port_stats_done(void *state)
-{
- kfree(state);
-}
+ case ODP_PORT_QUERY:
+ err = query_port(dp, (struct odp_port __user *)argp);
+ break;
-struct stats_type {
- /* Minimum and maximum acceptable number of bytes in body member of
- * struct ofp_stats_request. */
- size_t min_body, max_body;
-
- /* Prepares to dump some kind of statistics on 'dp'. 'body' and
- * 'body_len' are the 'body' member of the struct ofp_stats_request.
- * Returns zero if successful, otherwise a negative error code.
- * May initialize '*state' to state information. May be null if no
- * initialization is required.*/
- int (*init)(struct datapath *dp, const void *body, int body_len,
- void **state);
-
- /* Dumps statistics for 'dp' into the '*body_len' bytes at 'body', and
- * modifies '*body_len' to reflect the number of bytes actually used.
- * ('body' will be transmitted as the 'body' member of struct
- * ofp_stats_reply.) */
- int (*dump)(struct datapath *dp, void *state,
- void *body, int *body_len);
-
- /* Cleans any state created by the init or dump functions. May be null
- * if no cleanup is required. */
- void (*done)(void *state);
-};
+ case ODP_PORT_LIST:
+ err = list_ports(dp, (struct odp_portvec __user *)argp);
+ break;
-static const struct stats_type stats[] = {
- [OFPST_DESC] = {
- 0,
- 0,
- NULL,
- desc_stats_dump,
- NULL
- },
- [OFPST_FLOW] = {
- sizeof(struct ofp_flow_stats_request),
- sizeof(struct ofp_flow_stats_request),
- flow_stats_init,
- flow_stats_dump,
- flow_stats_done
- },
- [OFPST_AGGREGATE] = {
- sizeof(struct ofp_aggregate_stats_request),
- sizeof(struct ofp_aggregate_stats_request),
- aggregate_stats_init,
- aggregate_stats_dump,
- NULL
- },
- [OFPST_TABLE] = {
- 0,
- 0,
- NULL,
- table_stats_dump,
- NULL
- },
- [OFPST_PORT] = {
- 0,
- 0,
- port_stats_init,
- port_stats_dump,
- port_stats_done
- },
-};
+ case ODP_PORT_GROUP_SET:
+ err = set_port_group(dp, (struct odp_port_group __user *)argp);
+ break;
-static int
-dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct datapath *dp;
- struct sender sender;
- const struct stats_type *s;
- struct ofp_stats_reply *osr;
- int dp_idx;
- int max_openflow_len, body_len;
- void *body;
- int err;
+ case ODP_PORT_GROUP_GET:
+ err = get_port_group(dp, (struct odp_port_group __user *)argp);
+ break;
- /* Set up the cleanup function for this dump. Linux 2.6.20 and later
- * support setting up cleanup functions via the .doneit member of
- * struct genl_ops. This kluge supports earlier versions also. */
- cb->done = dp_genl_openflow_done;
-
- sender.pid = NETLINK_CB(cb->skb).pid;
- sender.seq = cb->nlh->nlmsg_seq;
- if (!cb->args[0]) {
- struct nlattr *attrs[DP_GENL_A_MAX + 1];
- struct ofp_stats_request *rq;
- struct nlattr *va;
- size_t len, body_len;
- int type;
-
- err = nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, DP_GENL_A_MAX,
- dp_genl_openflow_policy);
- if (err < 0)
- return err;
-
- if (!attrs[DP_GENL_A_DP_IDX])
- return -EINVAL;
- dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]);
- dp = dp_get_by_idx(dp_idx);
- if (!dp)
- return -ENOENT;
+ case ODP_FLOW_FLUSH:
+ err = flush_flows(dp);
+ break;
- va = attrs[DP_GENL_A_OPENFLOW];
- len = nla_len(va);
- if (!va || len < sizeof *rq)
- return -EINVAL;
+ case ODP_FLOW_ADD:
+ err = add_flow(dp, (struct odp_flow __user *)argp);
+ break;
- rq = nla_data(va);
- sender.xid = rq->header.xid;
- type = ntohs(rq->type);
- if (rq->header.version != OFP_VERSION) {
- dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_VERSION, rq, len);
- return -EINVAL;
- }
- if (rq->header.type != OFPT_STATS_REQUEST
- || ntohs(rq->header.length) != len)
- return -EINVAL;
+ case ODP_FLOW_SET_ACTS:
+ err = set_flow_actions(dp, (struct odp_flow __user *)argp);
+ break;
- if (type >= ARRAY_SIZE(stats) || !stats[type].dump) {
- dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_STAT, rq, len);
- return -EINVAL;
- }
+ case ODP_FLOW_DEL:
+ case ODP_FLOW_QUERY:
+ err = del_or_query_flow(dp, (struct odp_flow __user *)argp,
+ cmd);
+ break;
- s = &stats[type];
- body_len = len - offsetof(struct ofp_stats_request, body);
- if (body_len < s->min_body || body_len > s->max_body)
- return -EINVAL;
+ case ODP_FLOW_QUERY_MULTIPLE:
+ err = do_flowvec_ioctl(dp, argp, query_multiple_flows);
+ break;
- cb->args[0] = 1;
- cb->args[1] = dp_idx;
- cb->args[2] = type;
- cb->args[3] = rq->header.xid;
- if (s->init) {
- void *state;
- err = s->init(dp, rq->body, body_len, &state);
- if (err)
- return err;
- cb->args[4] = (long) state;
- }
- } else if (cb->args[0] == 1) {
- sender.xid = cb->args[3];
- dp_idx = cb->args[1];
- s = &stats[cb->args[2]];
+ case ODP_FLOW_LIST:
+ err = do_flowvec_ioctl(dp, argp, list_flows);
+ break;
- dp = dp_get_by_idx(dp_idx);
- if (!dp)
- return -ENOENT;
- } else {
- return 0;
- }
+ case ODP_EXECUTE:
+ err = do_execute(dp, (struct odp_execute __user *)argp);
+ break;
- osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender,
- &max_openflow_len);
- if (IS_ERR(osr))
- return PTR_ERR(osr);
- osr->type = htons(s - stats);
- osr->flags = 0;
- resize_openflow_skb(skb, &osr->header, max_openflow_len);
- body = osr->body;
- body_len = max_openflow_len - offsetof(struct ofp_stats_reply, body);
-
- err = s->dump(dp, (void *) cb->args[4], body, &body_len);
- if (err >= 0) {
- if (!err)
- cb->args[0] = 2;
- else
- osr->flags = ntohs(OFPSF_REPLY_MORE);
- resize_openflow_skb(skb, &osr->header,
- (offsetof(struct ofp_stats_reply, body)
- + body_len));
- err = skb->len;
- }
+#ifdef SUPPORT_SNAT
+ case ODP_SNAT_ADD_PORT:
+ err = -EFAULT;
+ if (copy_from_user(&osc, (struct odp_snat_config __user *)argp,
+ sizeof osc))
+ break;
+ err = snat_add_port(dp, &osc);
+ break;
+ case ODP_SNAT_DEL_PORT:
+ err = get_user(port_no, (int __user *)argp);
+ if (err)
+ break;
+ err = snat_del_port(dp, port_no);
+ break;
+#endif
+
+ default:
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ mutex_unlock(&dp->mutex);
return err;
}
-static int
-dp_genl_openflow_done(struct netlink_callback *cb)
+static int dp_has_packet_of_interest(struct datapath *dp, int listeners)
{
- if (cb->args[0]) {
- const struct stats_type *s = &stats[cb->args[2]];
- if (s->done)
- s->done((void *) cb->args[4]);
+ int i;
+ for (i = 0; i < DP_N_QUEUES; i++) {
+ if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i]))
+ return 1;
}
return 0;
}
-static struct genl_ops dp_genl_ops_openflow = {
- .cmd = DP_GENL_C_OPENFLOW,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_openflow_policy,
- .doit = dp_genl_openflow,
- .dumpit = dp_genl_openflow_dumpit,
-};
+ssize_t openflow_read(struct file *f, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ int listeners = (int) f->private_data;
+ int dp_idx = iminor(f->f_path.dentry->d_inode);
+ struct datapath *dp = get_dp(dp_idx);
+ struct sk_buff *skb;
+ struct iovec __user iov;
+ size_t copy_bytes;
+ int retval;
-static struct genl_ops *dp_genl_all_ops[] = {
- /* Keep this operation first. Generic Netlink dispatching
- * looks up operations with linear search, so we want it at the
- * front. */
- &dp_genl_ops_openflow,
-
- &dp_genl_ops_add_dp,
- &dp_genl_ops_del_dp,
- &dp_genl_ops_query_dp,
- &dp_genl_ops_add_port,
- &dp_genl_ops_del_port,
-};
+ if (!dp)
+ return -ENODEV;
-static int dp_init_netlink(void)
-{
- int err;
- int i;
+ if (nbytes == 0 || !listeners)
+ return 0;
- err = genl_register_family(&dp_genl_family);
- if (err)
- return err;
+ for (;;) {
+ int i;
- for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) {
- err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]);
- if (err)
- goto err_unregister;
- }
+ for (i = 0; i < DP_N_QUEUES; i++) {
+ if (listeners & (1 << i)) {
+ skb = skb_dequeue(&dp->queues[i]);
+ if (skb)
+ goto success;
+ }
+ }
- for (i = 0; i < N_MC_GROUPS; i++) {
- snprintf(mc_groups[i].name, sizeof mc_groups[i].name,
- "openflow%d", i);
- err = genl_register_mc_group(&dp_genl_family, &mc_groups[i]);
- if (err < 0)
- goto err_unregister;
- }
+ if (f->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ goto error;
+ }
- return 0;
+ wait_event_interruptible(dp->waitqueue,
+ dp_has_packet_of_interest(dp,
+ listeners));
-err_unregister:
- genl_unregister_family(&dp_genl_family);
- return err;
-}
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ goto error;
+ }
+ }
+success:
+ copy_bytes = min(skb->len, nbytes);
+ iov.iov_base = buf;
+ iov.iov_len = copy_bytes;
+ retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len);
+ if (!retval)
+ retval = copy_bytes;
+ kfree_skb(skb);
-static void dp_uninit_netlink(void)
-{
- genl_unregister_family(&dp_genl_family);
+error:
+ return retval;
}
-/* Set the description strings if appropriate values are available from
- * the DMI. */
-static void set_desc(void)
+static unsigned int openflow_poll(struct file *file, poll_table *wait)
{
- const char *uuid = dmi_get_system_info(DMI_PRODUCT_UUID);
- const char *vendor = dmi_get_system_info(DMI_SYS_VENDOR);
- const char *name = dmi_get_system_info(DMI_PRODUCT_NAME);
- const char *version = dmi_get_system_info(DMI_PRODUCT_VERSION);
- const char *serial = dmi_get_system_info(DMI_PRODUCT_SERIAL);
- const char *uptr;
-
- if (!uuid || *uuid == '\0' || strlen(uuid) != 36)
- return;
+ int dp_idx = iminor(file->f_path.dentry->d_inode);
+ struct datapath *dp = get_dp(dp_idx);
+ unsigned int mask;
- /* We are only interested version 1 UUIDs, since the last six bytes
- * are an IEEE 802 MAC address. */
- if (uuid[14] != '1')
- return;
+ if (dp) {
+ mask = 0;
+ poll_wait(file, &dp->waitqueue, wait);
+ if (dp_has_packet_of_interest(dp, (int)file->private_data))
+ mask |= POLLIN | POLLRDNORM;
+ } else {
+ mask = POLLIN | POLLRDNORM | POLLHUP;
+ }
+ return mask;
+}
- /* Only set if the UUID is from Nicira. */
- uptr = uuid + 24;
- if (strncmp(uptr, NICIRA_OUI_STR, strlen(NICIRA_OUI_STR)))
- return;
+const struct file_operations openflow_fops = {
+ /* XXX .aio_read = openflow_aio_read, */
+ .read = openflow_read,
+ .poll = openflow_poll,
+ .unlocked_ioctl = openflow_ioctl,
+ /* XXX .fasync = openflow_fasync, */
+};
- if (vendor)
- strlcpy(mfr_desc, vendor, sizeof(mfr_desc));
- if (name || version)
- snprintf(hw_desc, sizeof(hw_desc), "%s %s",
- name ? name : "",
- version ? version : "");
- if (serial)
- strlcpy(serial_num, serial, sizeof(serial_num));
-}
+static int major;
static int __init dp_init(void)
{
int err;
- printk("OpenFlow %s, built "__DATE__" "__TIME__", "
- "protocol 0x%02x\n", VERSION BUILDNR, OFP_VERSION);
+ printk("OpenFlow %s, built "__DATE__" "__TIME__, VERSION BUILDNR);
err = flow_init();
if (err)
if (err)
goto error_flow_exit;
- err = dp_init_netlink();
- if (err)
+ major = register_chrdev(0, "openflow", &openflow_fops);
+ if (err < 0)
goto error_unreg_notifier;
- dp_ioctl_hook = NULL;
- dp_add_dp_hook = NULL;
- dp_del_dp_hook = NULL;
- dp_add_if_hook = NULL;
- dp_del_if_hook = NULL;
-
- /* Check if better descriptions of the switch are available than the
- * defaults. */
- set_desc();
-
/* Hook into callback used by the bridge to intercept packets.
* Parasites we are. */
if (br_handle_frame_hook)
error_flow_exit:
flow_exit();
error:
- printk(KERN_EMERG "openflow: failed to install!");
return err;
}
static void dp_cleanup(void)
{
- fwd_exit();
- dp_uninit_netlink();
+ unregister_chrdev(major, "openflow");
unregister_netdevice_notifier(&dp_device_notifier);
flow_exit();
br_handle_frame_hook = NULL;
module_exit(dp_cleanup);
MODULE_DESCRIPTION("OpenFlow switching datapath");
-MODULE_AUTHOR("Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University");
MODULE_LICENSE("GPL");
#ifndef DATAPATH_H
#define DATAPATH_H 1
+#include <asm/page.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/netlink.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
-#include "openflow/openflow.h"
-#include "openflow/nicira-ext.h"
#include "flow.h"
+struct sk_buff;
-#define NL_FLOWS_PER_MESSAGE 100
+#define DP_MAX_PORTS 256
+#define DP_MAX_GROUPS 16
-/* Capabilities supported by this implementation. */
-#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \
- | OFPC_TABLE_STATS \
- | OFPC_PORT_STATS \
- | OFPC_MULTI_PHY_TX )
+#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*)))
+#define DP_L2_SIZE (1 << DP_L2_BITS)
+#define DP_L2_SHIFT 0
-/* Actions supported by this implementation. */
-#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \
- | (1 << OFPAT_SET_VLAN_VID) \
- | (1 << OFPAT_SET_VLAN_PCP) \
- | (1 << OFPAT_STRIP_VLAN) \
- | (1 << OFPAT_SET_DL_SRC) \
- | (1 << OFPAT_SET_DL_DST) \
- | (1 << OFPAT_SET_NW_SRC) \
- | (1 << OFPAT_SET_NW_DST) \
- | (1 << OFPAT_SET_TP_SRC) \
- | (1 << OFPAT_SET_TP_DST) )
+#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**)))
+#define DP_L1_SIZE (1 << DP_L1_BITS)
+#define DP_L1_SHIFT DP_L2_BITS
-struct sk_buff;
+#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE)
+
+struct dp_table {
+ unsigned int n_buckets;
+ struct sw_flow ***flows[2];
+};
+
+#define DP_N_QUEUES 2
+#define DP_MAX_QUEUE_LEN 100
-#define DP_MAX_PORTS 255
+struct dp_stats_percpu {
+ u64 n_frags;
+ u64 n_hit;
+ u64 n_missed;
+ u64 n_lost;
+};
struct datapath {
+ struct mutex mutex;
int dp_idx;
- struct timer_list timer; /* Expiration timer. */
- struct sw_chain *chain; /* Forwarding rules. */
+#ifdef SUPPORT_SNAT
struct task_struct *dp_task; /* Kernel thread for maintenance. */
+#endif
- /* Data related to the "of" device of this datapath */
- struct net_device *netdev;
+ struct net_device *netdev; /* ofX network device. */
- /* Configuration set from controller */
- uint16_t flags;
- uint16_t miss_send_len;
+ struct kobject ifobj;
- /* Flag controlling whether Flow End messages are generated. */
- uint8_t send_flow_end;
+ int drop_frags;
- struct kobject ifobj;
+ /* Queued data. */
+ struct sk_buff_head queues[DP_N_QUEUES];
+ wait_queue_head_t waitqueue;
+
+ /* Flow table. */
+ unsigned int n_flows;
+ struct dp_table *table;
+
+ /* Port groups. */
+ struct odp_port_group *groups[DP_MAX_GROUPS];
/* Switch ports. */
+ unsigned int n_ports;
struct net_bridge_port *ports[DP_MAX_PORTS];
- struct net_bridge_port *local_port; /* OFPP_LOCAL port. */
struct list_head port_list; /* All ports, including local_port. */
-};
-/* Information necessary to reply to the sender of an OpenFlow message. */
-struct sender {
- uint32_t xid; /* OpenFlow transaction ID of request. */
- uint32_t pid; /* Netlink process ID of sending socket. */
- uint32_t seq; /* Netlink sequence ID of request. */
+ /* Stats. */
+ struct dp_stats_percpu *stats_percpu;
};
struct net_bridge_port {
- u16 port_no;
- u32 config; /* Some subset of OFPPC_* flags. */
- u32 state; /* Some subset of OFPPS_* flags. */
+ u16 port_no;
spinlock_t lock;
struct datapath *dp;
struct net_device *dev;
extern int (*dp_add_if_hook)(struct net_bridge_port *p);
extern int (*dp_del_if_hook)(struct net_bridge_port *p);
-int dp_del_switch_port(struct net_bridge_port *);
-int dp_xmit_skb(struct sk_buff *skb);
+/* Flow table. */
+struct dp_table *dp_table_create(unsigned int n_buckets);
+void dp_table_destroy(struct dp_table *, int free_flows);
+struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *);
+struct sw_flow **dp_table_lookup_for_insert(struct dp_table *table,
+ struct sw_flow *target);
+int dp_table_delete(struct dp_table *, struct sw_flow *);
+int dp_table_expand(struct datapath *);
+int dp_table_flush(struct datapath *);
+int dp_table_foreach(struct dp_table *table,
+ int (*callback)(struct sw_flow *flow, void *aux),
+ void *aux);
+
+void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *);
+int dp_del_port(struct net_bridge_port *);
int dp_output_port(struct datapath *, struct sk_buff *, int out_port,
int ignore_no_fwd);
-int dp_output_control(struct datapath *, struct sk_buff *, size_t, int);
-void dp_set_origin(struct datapath *, uint16_t, struct sk_buff *);
-int dp_send_features_reply(struct datapath *, const struct sender *);
-int dp_send_config_reply(struct datapath *, const struct sender *);
-int dp_send_port_status(struct net_bridge_port *p, uint8_t status);
-int dp_send_flow_end(struct datapath *, struct sw_flow *,
- enum nx_flow_end_reason);
-int dp_send_error_msg(struct datapath *, const struct sender *,
- uint16_t, uint16_t, const void *, size_t);
-int dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm);
-int dp_send_echo_reply(struct datapath *, const struct sender *,
- const struct ofp_header *);
-int dp_send_hello(struct datapath *, const struct sender *,
- const struct ofp_header *);
+int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
+void dp_set_origin(struct datapath *, u16, struct sk_buff *);
/* Should hold at least RCU read lock when calling */
-struct datapath *dp_get_by_idx(int dp_idx);
-struct datapath *dp_get_by_name(const char *dp_name);
+struct datapath *get_dp(int dp_idx);
#endif /* datapath.h */
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-/* Functions for executing OpenFlow actions. */
-
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/in6.h>
-#include <linux/if_vlan.h>
-#include <net/checksum.h>
-#include "forward.h"
-#include "dp_act.h"
-#include "openflow/nicira-ext.h"
-#include "nx_act.h"
-
-
-static uint16_t
-validate_output(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_output *oa = (struct ofp_action_output *)ah;
-
- if (oa->port == htons(OFPP_NONE) ||
- (!(key->wildcards & OFPFW_IN_PORT) && oa->port == key->in_port))
- return OFPBAC_BAD_OUT_PORT;
-
- return ACT_VALIDATION_OK;
-}
-
-static int
-do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len,
- int out_port, int ignore_no_fwd)
-{
- if (!skb)
- return -ENOMEM;
- return (likely(out_port != OFPP_CONTROLLER)
- ? dp_output_port(dp, skb, out_port, ignore_no_fwd)
- : dp_output_control(dp, skb, max_len, OFPR_ACTION));
-}
-
-
-static struct sk_buff *
-vlan_pull_tag(struct sk_buff *skb)
-{
- struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
- struct ethhdr *eh;
-
-
- /* Verify we were given a vlan packet */
- if (vh->h_vlan_proto != htons(ETH_P_8021Q))
- return skb;
-
- memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
-
- eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
-
- skb->protocol = eh->h_proto;
- skb->mac_header += VLAN_HLEN;
-
- return skb;
-}
-
-
-static struct sk_buff *
-modify_vlan_tci(struct sk_buff *skb, struct sw_flow_key *key,
- uint16_t tci, uint16_t mask)
-{
- struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
-
- if (key->dl_vlan != htons(OFP_VLAN_NONE)) {
- /* Modify vlan id, but maintain other TCI values */
- vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci);
- } else {
- /* Add vlan header */
-
- /* xxx The vlan_put_tag function, doesn't seem to work
- * xxx reliably when it attempts to use the hardware-accelerated
- * xxx version. We'll directly use the software version
- * xxx until the problem can be diagnosed.
- */
- skb = __vlan_put_tag(skb, tci);
- vh = vlan_eth_hdr(skb);
- }
- key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
-
- return skb;
-}
-
-static struct sk_buff *
-set_vlan_vid(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah;
- uint16_t tci = ntohs(va->vlan_vid);
-
- return modify_vlan_tci(skb, key, tci, VLAN_VID_MASK);
-}
-
-/* Mask for the priority bits in a vlan header. The kernel doesn't
- * define this like it does for VID. */
-#define VLAN_PCP_MASK 0xe000
-
-static struct sk_buff *
-set_vlan_pcp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah;
- uint16_t tci = (uint16_t)va->vlan_pcp << 13;
-
- return modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK);
-}
-
-static struct sk_buff *
-strip_vlan(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- vlan_pull_tag(skb);
- key->dl_vlan = htons(OFP_VLAN_NONE);
-
- return skb;
-}
-
-static struct sk_buff *
-set_dl_addr(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah;
- struct ethhdr *eh = eth_hdr(skb);
-
- if (da->type == htons(OFPAT_SET_DL_SRC))
- memcpy(eh->h_source, da->dl_addr, sizeof eh->h_source);
- else
- memcpy(eh->h_dest, da->dl_addr, sizeof eh->h_dest);
-
- return skb;
-}
-
-/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
- * covered by the sum has been changed from 'from' to 'to'. If set,
- * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
- * Based on nf_proto_csum_replace4. */
-static void update_csum(__sum16 *sum, struct sk_buff *skb,
- __be32 from, __be32 to, int pseudohdr)
-{
- __be32 diff[] = { ~from, to };
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
- ~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial((char *)diff, sizeof(diff),
- ~skb->csum);
- } else if (pseudohdr)
- *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
- csum_unfold(*sum)));
-}
-
-static struct sk_buff *
-set_nw_addr(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah;
- uint16_t eth_proto = ntohs(key->dl_type);
-
- if (eth_proto == ETH_P_IP) {
- struct iphdr *nh = ip_hdr(skb);
- uint32_t new, *field;
-
- new = na->nw_addr;
-
- if (ah->type == htons(OFPAT_SET_NW_SRC))
- field = &nh->saddr;
- else
- field = &nh->daddr;
-
- if (key->nw_proto == IPPROTO_TCP) {
- struct tcphdr *th = tcp_hdr(skb);
- update_csum(&th->check, skb, *field, new, 1);
- } else if (key->nw_proto == IPPROTO_UDP) {
- struct udphdr *th = udp_hdr(skb);
- update_csum(&th->check, skb, *field, new, 1);
- }
- update_csum(&nh->check, skb, *field, new, 0);
- *field = new;
- }
-
- return skb;
-}
-
-static struct sk_buff *
-set_tp_port(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah;
- uint16_t eth_proto = ntohs(key->dl_type);
-
- if (eth_proto == ETH_P_IP) {
- uint16_t new, *field;
-
- new = ta->tp_port;
-
- if (key->nw_proto == IPPROTO_TCP) {
- struct tcphdr *th = tcp_hdr(skb);
-
- if (ah->type == htons(OFPAT_SET_TP_SRC))
- field = &th->source;
- else
- field = &th->dest;
-
- update_csum(&th->check, skb, *field, new, 1);
- *field = new;
- } else if (key->nw_proto == IPPROTO_UDP) {
- struct udphdr *th = udp_hdr(skb);
-
- if (ah->type == htons(OFPAT_SET_TP_SRC))
- field = &th->source;
- else
- field = &th->dest;
-
- update_csum(&th->check, skb, *field, new, 1);
- *field = new;
- }
- }
-
- return skb;
-}
-
-struct openflow_action {
- size_t min_size;
- size_t max_size;
- uint16_t (*validate)(struct datapath *dp,
- const struct sw_flow_key *key,
- const struct ofp_action_header *ah);
- struct sk_buff *(*execute)(struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct ofp_action_header *ah);
-};
-
-static const struct openflow_action of_actions[] = {
- [OFPAT_OUTPUT] = {
- sizeof(struct ofp_action_output),
- sizeof(struct ofp_action_output),
- validate_output,
- NULL /* This is optimized into execute_actions */
- },
- [OFPAT_SET_VLAN_VID] = {
- sizeof(struct ofp_action_vlan_vid),
- sizeof(struct ofp_action_vlan_vid),
- NULL,
- set_vlan_vid
- },
- [OFPAT_SET_VLAN_PCP] = {
- sizeof(struct ofp_action_vlan_pcp),
- sizeof(struct ofp_action_vlan_pcp),
- NULL,
- set_vlan_pcp
- },
- [OFPAT_STRIP_VLAN] = {
- sizeof(struct ofp_action_header),
- sizeof(struct ofp_action_header),
- NULL,
- strip_vlan
- },
- [OFPAT_SET_DL_SRC] = {
- sizeof(struct ofp_action_dl_addr),
- sizeof(struct ofp_action_dl_addr),
- NULL,
- set_dl_addr
- },
- [OFPAT_SET_DL_DST] = {
- sizeof(struct ofp_action_dl_addr),
- sizeof(struct ofp_action_dl_addr),
- NULL,
- set_dl_addr
- },
- [OFPAT_SET_NW_SRC] = {
- sizeof(struct ofp_action_nw_addr),
- sizeof(struct ofp_action_nw_addr),
- NULL,
- set_nw_addr
- },
- [OFPAT_SET_NW_DST] = {
- sizeof(struct ofp_action_nw_addr),
- sizeof(struct ofp_action_nw_addr),
- NULL,
- set_nw_addr
- },
- [OFPAT_SET_TP_SRC] = {
- sizeof(struct ofp_action_tp_port),
- sizeof(struct ofp_action_tp_port),
- NULL,
- set_tp_port
- },
- [OFPAT_SET_TP_DST] = {
- sizeof(struct ofp_action_tp_port),
- sizeof(struct ofp_action_tp_port),
- NULL,
- set_tp_port
- }
- /* OFPAT_VENDOR is not here, since it would blow up the array size. */
-};
-
-/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK
- * or an OFPET_BAD_ACTION error code. */
-static uint16_t
-validate_ofpat(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t type, uint16_t len)
-{
- uint16_t ret = ACT_VALIDATION_OK;
- const struct openflow_action *act = &of_actions[type];
-
- if ((len < act->min_size) || (len > act->max_size))
- return OFPBAC_BAD_LEN;
-
- if (act->validate)
- ret = act->validate(dp, key, ah);
-
- return ret;
-}
-
-/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK
- * or an OFPET_BAD_ACTION error code. */
-static uint16_t
-validate_vendor(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t len)
-{
- struct ofp_action_vendor_header *avh;
- int ret = ACT_VALIDATION_OK;
-
- if (len < sizeof(struct ofp_action_vendor_header))
- return OFPBAC_BAD_LEN;
-
- avh = (struct ofp_action_vendor_header *)ah;
-
- switch(ntohl(avh->vendor)) {
- case NX_VENDOR_ID:
- ret = nx_validate_act(dp, key, (struct nx_action_header *)avh, len);
- break;
-
- default:
- return OFPBAC_BAD_VENDOR;
- }
-
- return ret;
-}
-
-/* Validates a list of actions. If a problem is found, a code for the
- * OFPET_BAD_ACTION error type is returned. If the action list validates,
- * ACT_VALIDATION_OK is returned. */
-uint16_t
-validate_actions(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- uint8_t *p = (uint8_t *)actions;
- int err;
-
- while (actions_len >= sizeof(struct ofp_action_header)) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = ntohs(ah->len);
- uint16_t type;
-
- /* Make there's enough remaining data for the specified length
- * and that the action length is a multiple of 64 bits. */
- if (!len || (actions_len < len) || (len % 8) != 0)
- return OFPBAC_BAD_LEN;
-
- type = ntohs(ah->type);
- if (type < ARRAY_SIZE(of_actions)) {
- err = validate_ofpat(dp, key, ah, type, len);
- if (err != ACT_VALIDATION_OK)
- return err;
- } else if (type == OFPAT_VENDOR) {
- err = validate_vendor(dp, key, ah, len);
- if (err != ACT_VALIDATION_OK)
- return err;
- } else
- return OFPBAC_BAD_TYPE;
-
- p += len;
- actions_len -= len;
- }
-
- /* Check if there's any trailing garbage. */
- if (actions_len != 0)
- return OFPBAC_BAD_LEN;
-
- return ACT_VALIDATION_OK;
-}
-
-/* Execute a built-in OpenFlow action against 'skb'. */
-static struct sk_buff *
-execute_ofpat(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t type)
-{
- const struct openflow_action *act = &of_actions[type];
- if (act->execute && make_writable(&skb))
- skb = act->execute(skb, key, ah);
- return skb;
-}
-
-/* Execute a vendor-defined action against 'skb'. */
-static struct sk_buff *
-execute_vendor(struct sk_buff *skb, const struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vendor_header *avh
- = (struct ofp_action_vendor_header *)ah;
- struct datapath *dp = skb->dev->br_port->dp;
-
- /* NB: If changes need to be made to the packet, a call should be
- * made to make_writable or its equivalent first. */
-
- switch(ntohl(avh->vendor)) {
- case NX_VENDOR_ID:
- skb = nx_execute_act(skb, key, (struct nx_action_header *)avh);
- break;
-
- default:
- /* This should not be possible due to prior validation. */
- if (net_ratelimit())
- printk(KERN_WARNING "%s: attempt to execute action "
- "with unknown vendor: %#x\n",
- dp->netdev->name, ntohl(avh->vendor));
- break;
- }
-
- return skb;
-}
-
-/* Execute a list of actions against 'skb'. */
-void execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct ofp_action_header *actions, size_t actions_len,
- int ignore_no_fwd)
-{
- /* Every output action needs a separate clone of 'skb', but the common
- * case is just a single output action, so that doing a clone and
- * then freeing the original skbuff is wasteful. So the following code
- * is slightly obscure just to avoid that. */
- int prev_port;
- size_t max_len=0; /* Initialze to make compiler happy */
- uint8_t *p = (uint8_t *)actions;
-
- prev_port = -1;
-
- /* The action list was already validated, so we can be a bit looser
- * in our sanity-checking. */
- while (actions_len > 0) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = htons(ah->len);
-
- WARN_ON_ONCE(skb_shared(skb));
- if (prev_port != -1) {
- do_output(dp, skb_clone(skb, GFP_ATOMIC),
- max_len, prev_port, ignore_no_fwd);
- prev_port = -1;
- }
-
- if (likely(ah->type == htons(OFPAT_OUTPUT))) {
- struct ofp_action_output *oa = (struct ofp_action_output *)p;
- prev_port = ntohs(oa->port);
- max_len = ntohs(oa->max_len);
- } else {
- uint16_t type = ntohs(ah->type);
-
- if (type < ARRAY_SIZE(of_actions))
- skb = execute_ofpat(skb, key, ah, type);
- else if (type == OFPAT_VENDOR)
- skb = execute_vendor(skb, key, ah);
-
- if (!skb) {
- if (net_ratelimit())
- printk(KERN_WARNING "%s: "
- "execute_actions lost skb\n",
- dp->netdev->name);
- return;
- }
- }
-
- p += len;
- actions_len -= len;
- }
- if (prev_port != -1)
- do_output(dp, skb, max_len, prev_port, ignore_no_fwd);
- else
- kfree_skb(skb);
-}
-
-/* Utility functions. */
-
-/* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to
- * the copy.
- * Returns 1 if successful, 0 on failure. */
-int
-make_writable(struct sk_buff **pskb)
-{
- struct sk_buff *skb = *pskb;
- if (skb_shared(skb) || skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
- if (!nskb)
- return 0;
- kfree_skb(skb);
- *pskb = nskb;
- return 1;
- } else {
- unsigned int hdr_len = (skb_transport_offset(skb)
- + sizeof(struct tcphdr));
- return pskb_may_pull(skb, min(hdr_len, skb->len));
- }
-}
+++ /dev/null
-#ifndef DP_ACT_H
-#define DP_ACT_H 1
-
-#include "datapath.h"
-
-#define ACT_VALIDATION_OK ((uint16_t)-1)
-
-uint16_t validate_actions(struct datapath *, const struct sw_flow_key *,
- const struct ofp_action_header *, size_t);
-void execute_actions(struct datapath *, struct sk_buff *,
- struct sw_flow_key *, const struct ofp_action_header *,
- size_t action_len, int ignore_no_fwd);
-int make_writable(struct sk_buff **pskb);
-
-#endif /* dp_act.h */
#include "datapath.h"
#include "dp_dev.h"
-#include "forward.h"
static struct dp_dev *dp_dev_priv(struct net_device *netdev)
while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
skb_reset_mac_header(skb);
rcu_read_lock();
- fwd_port_input(dp->chain, skb, dp->local_port);
+ dp_process_received_packet(skb, dp->ports[ODPP_LOCAL]);
rcu_read_unlock();
}
netif_wake_queue(dp->netdev);
return 0;
}
-static void dp_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+static void dp_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
strcpy(info->driver, "openflow");
- sprintf(info->version, "0x%d", OFP_VERSION);
- strcpy(info->fw_version, "N/A");
- strcpy(info->bus_info, "N/A");
+ sprintf(info->bus_info, "%d", dp_dev->dp->dp_idx);
}
static struct ethtool_ops dp_ethtool_ops = {
{
struct net_device *dev = ptr;
struct net_bridge_port *p = dev->br_port;
- unsigned long int flags;
-
-
- /* Check if monitored port */
- if (!p)
- return NOTIFY_DONE;
-
- spin_lock_irqsave(&p->lock, flags);
- switch (event) {
- case NETDEV_UNREGISTER:
- spin_unlock_irqrestore(&p->lock, flags);
- mutex_lock(&dp_mutex);
- dp_del_switch_port(p);
- mutex_unlock(&dp_mutex);
- return NOTIFY_DONE;
- break;
+ if (event == NETDEV_UNREGISTER && p) {
+ struct datapath *dp = p->dp;
+ mutex_lock(&dp->mutex);
+ dp_del_port(p);
+ mutex_unlock(&dp->mutex);
}
- spin_unlock_irqrestore(&p->lock, flags);
-
return NOTIFY_DONE;
}
/*
* Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
+ * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
* Stanford Junior University
*/
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <net/llc_pdu.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/jiffies.h>
#include <linux/llc.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
-#include "openflow/openflow.h"
-#include "openflow/nicira-ext.h"
#include "compat.h"
struct kmem_cache *flow_cache;
-/* Internal function used to compare fields in flow. */
-static inline
-int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b,
- uint32_t w, uint32_t src_mask, uint32_t dst_mask)
+static inline int iphdr_ok(struct sk_buff *skb)
{
- return ((w & OFPFW_IN_PORT || a->in_port == b->in_port)
- && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan)
- && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN))
- && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN))
- && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type)
- && !((a->nw_src ^ b->nw_src) & src_mask)
- && !((a->nw_dst ^ b->nw_dst) & dst_mask)
- && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto)
- && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src)
- && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst));
+ int nh_ofs = skb_network_offset(skb);
+ if (skb->len >= nh_ofs + sizeof(struct iphdr)) {
+ int ip_len = ip_hdrlen(skb);
+ return (ip_len >= sizeof(struct iphdr)
+ && pskb_may_pull(skb, nh_ofs + ip_len));
+ }
+ return 0;
}
-/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
- * modulo wildcards in 'b', zero otherwise. */
-int flow_matches_1wild(const struct sw_flow_key *a,
- const struct sw_flow_key *b)
+static inline int tcphdr_ok(struct sk_buff *skb)
{
- return flow_fields_match(a, b, b->wildcards,
- b->nw_src_mask, b->nw_dst_mask);
+ int th_ofs = skb_transport_offset(skb);
+ if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) {
+ int tcp_len = tcp_hdrlen(skb);
+ return (tcp_len >= sizeof(struct tcphdr)
+ && skb->len >= th_ofs + tcp_len);
+ }
+ return 0;
}
-EXPORT_SYMBOL(flow_matches_1wild);
-/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
- * modulo wildcards in 'a' or 'b', zero otherwise. */
-int flow_matches_2wild(const struct sw_flow_key *a,
- const struct sw_flow_key *b)
+static inline int udphdr_ok(struct sk_buff *skb)
{
- return flow_fields_match(a, b,
- a->wildcards | b->wildcards,
- a->nw_src_mask & b->nw_src_mask,
- a->nw_dst_mask & b->nw_dst_mask);
+ int th_ofs = skb_transport_offset(skb);
+ return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr));
}
-EXPORT_SYMBOL(flow_matches_2wild);
-
-/* Returns nonzero if 't' (the table entry's key) and 'd' (the key
- * describing the match) match, that is, if their fields are
- * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the
- * wildcards must match in both 't_key' and 'd_key'. Note that the
- * table's wildcards are ignored unless 'strict' is set. */
-int flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d,
- int strict)
+
+static inline int icmphdr_ok(struct sk_buff *skb)
{
- if (strict && d->wildcards != t->wildcards)
- return 0;
- return flow_matches_1wild(t, d);
+ int th_ofs = skb_transport_offset(skb);
+ return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr));
}
-EXPORT_SYMBOL(flow_matches_desc);
-static uint32_t make_nw_mask(int n_wild_bits)
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
+
+static inline struct ofp_tcphdr *ofp_tcp_hdr(const struct sk_buff *skb)
{
- n_wild_bits &= (1u << OFPFW_NW_SRC_BITS) - 1;
- return n_wild_bits < 32 ? htonl(~((1u << n_wild_bits) - 1)) : 0;
+ return (struct ofp_tcphdr *)skb_transport_header(skb);
}
-void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from)
+void flow_used(struct sw_flow *flow, struct sk_buff *skb)
{
- to->wildcards = ntohl(from->wildcards) & OFPFW_ALL;
- to->pad = 0;
- to->in_port = from->in_port;
- to->dl_vlan = from->dl_vlan;
- memcpy(to->dl_src, from->dl_src, ETH_ALEN);
- memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
- to->dl_type = from->dl_type;
-
- to->nw_src = to->nw_dst = to->nw_proto = 0;
- to->tp_src = to->tp_dst = 0;
-
-#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST)
-#define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO)
- if (to->wildcards & OFPFW_DL_TYPE) {
- /* Can't sensibly match on network or transport headers if the
- * data link type is unknown. */
- to->wildcards |= OFPFW_NW | OFPFW_TP;
- } else if (from->dl_type == htons(ETH_P_IP)) {
- to->nw_src = from->nw_src;
- to->nw_dst = from->nw_dst;
- to->nw_proto = from->nw_proto;
-
- if (to->wildcards & OFPFW_NW_PROTO) {
- /* Can't sensibly match on transport headers if the
- * network protocol is unknown. */
- to->wildcards |= OFPFW_TP;
- } else if (from->nw_proto == IPPROTO_TCP
- || from->nw_proto == IPPROTO_UDP
- || from->nw_proto == IPPROTO_ICMP) {
- to->tp_src = from->tp_src;
- to->tp_dst = from->tp_dst;
- } else {
- /* Transport layer fields are undefined. Mark them as
- * exact-match to allow such flows to reside in
- * table-hash, instead of falling into table-linear. */
- to->wildcards &= ~OFPFW_TP;
+ unsigned long flags;
+ u8 tcp_flags = 0;
+
+ if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
+ struct iphdr *nh = ip_hdr(skb);
+ flow->ip_tos = nh->tos;
+ if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) {
+ u8 *tcp = (u8 *)tcp_hdr(skb);
+ tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
}
- } else {
- /* Network and transport layer fields are undefined. Mark them
- * as exact-match to allow such flows to reside in table-hash,
- * instead of falling into table-linear. */
- to->wildcards &= ~(OFPFW_NW | OFPFW_TP);
}
- /* We set these late because code above adjusts to->wildcards. */
- to->nw_src_mask = make_nw_mask(to->wildcards >> OFPFW_NW_SRC_SHIFT);
- to->nw_dst_mask = make_nw_mask(to->wildcards >> OFPFW_NW_DST_SHIFT);
-}
-
-void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from)
-{
- to->wildcards = htonl(from->wildcards);
- to->in_port = from->in_port;
- to->dl_vlan = from->dl_vlan;
- memcpy(to->dl_src, from->dl_src, ETH_ALEN);
- memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
- to->dl_type = from->dl_type;
- to->nw_src = from->nw_src;
- to->nw_dst = from->nw_dst;
- to->nw_proto = from->nw_proto;
- to->tp_src = from->tp_src;
- to->tp_dst = from->tp_dst;
- to->pad = 0;
+ spin_lock_irqsave(&flow->lock, flags);
+ getnstimeofday(&flow->used);
+ flow->packet_count++;
+ flow->byte_count += skb->len;
+ flow->tcp_flags |= tcp_flags;
+ spin_unlock_irqrestore(&flow->lock, flags);
}
-int flow_timeout(struct sw_flow *flow)
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
{
- if (flow->idle_timeout != OFP_FLOW_PERMANENT
- && time_after64(get_jiffies_64(), flow->used + flow->idle_timeout * HZ))
- return NXFER_IDLE_TIMEOUT;
- else if (flow->hard_timeout != OFP_FLOW_PERMANENT
- && time_after64(get_jiffies_64(),
- flow->created + flow->hard_timeout * HZ))
- return NXFER_HARD_TIMEOUT;
- else
- return -1;
-}
-EXPORT_SYMBOL(flow_timeout);
-
-/* Returns nonzero if 'flow' contains an output action to 'out_port' or
- * has the value OFPP_NONE. 'out_port' is in network-byte order. */
-int flow_has_out_port(struct sw_flow *flow, uint16_t out_port)
-{
- struct sw_flow_actions *sf_acts;
- size_t actions_len;
- uint8_t *p;
-
- if (out_port == htons(OFPP_NONE))
- return 1;
-
- sf_acts = rcu_dereference(flow->sf_acts);
-
- actions_len = sf_acts->actions_len;
- p = (uint8_t *)sf_acts->actions;
-
- while (actions_len > 0) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = ntohs(ah->len);
-
- if (ah->type == htons(OFPAT_OUTPUT)) {
- struct ofp_action_output *oa = (struct ofp_action_output *)p;
- if (oa->port == out_port)
- return 1;
- }
+ struct sw_flow_actions *sfa;
- p += len;
- actions_len -= len;
- }
+ if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action))
+ return NULL;
- return 0;
+ sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action),
+ GFP_KERNEL);
+ if (sfa)
+ sfa->n_actions = n_actions;
+ return sfa;
}
-EXPORT_SYMBOL(flow_has_out_port);
-/* Allocates and returns a new flow with room for 'actions_len' actions,
- * using allocation flags 'flags'. Returns the new flow or a null pointer
- * on failure. */
-struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags)
+
+/* Allocates and returns a new flow with room for 'n_actions' actions. Returns
+ * the new flow or a null pointer on failure. */
+struct sw_flow *flow_alloc(size_t n_actions)
{
struct sw_flow_actions *sfa;
- size_t size = sizeof *sfa + actions_len;
- struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags);
- if (unlikely(!flow))
- return NULL;
+ struct sw_flow *flow;
- sfa = kmalloc(size, flags);
- if (unlikely(!sfa)) {
- kmem_cache_free(flow_cache, flow);
+ sfa = flow_actions_alloc(n_actions);
+ if (!sfa)
return NULL;
- }
- sfa->actions_len = actions_len;
- flow->sf_acts = sfa;
+
+ flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ if (flow)
+ rcu_assign_pointer(flow->sf_acts, sfa);
+ else
+ kfree(sfa);
return flow;
}
}
EXPORT_SYMBOL(flow_deferred_free_acts);
-/* Copies 'actions' into a newly allocated structure for use by 'flow'
- * and safely frees the structure that defined the previous actions. */
-void flow_replace_acts(struct sw_flow *flow,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_flow_actions *sfa;
- struct sw_flow_actions *orig_sfa = flow->sf_acts;
- size_t size = sizeof *sfa + actions_len;
-
- sfa = kmalloc(size, GFP_ATOMIC);
- if (unlikely(!sfa))
- return;
-
- sfa->actions_len = actions_len;
- memcpy(sfa->actions, actions, actions_len);
-
- rcu_assign_pointer(flow->sf_acts, sfa);
- flow_deferred_free_acts(orig_sfa);
-
- return;
-}
-EXPORT_SYMBOL(flow_replace_acts);
-
-/* Prints a representation of 'key' to the kernel log. */
-void print_flow(const struct sw_flow_key *key)
-{
- printk("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x"
- "->%02x:%02x:%02x:%02x:%02x:%02x "
- "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n",
- key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan),
- key->dl_src[0], key->dl_src[1], key->dl_src[2],
- key->dl_src[3], key->dl_src[4], key->dl_src[5],
- key->dl_dst[0], key->dl_dst[1], key->dl_dst[2],
- key->dl_dst[3], key->dl_dst[4], key->dl_dst[5],
- ntohs(key->dl_type),
- ((unsigned char *)&key->nw_src)[0],
- ((unsigned char *)&key->nw_src)[1],
- ((unsigned char *)&key->nw_src)[2],
- ((unsigned char *)&key->nw_src)[3],
- ((unsigned char *)&key->nw_dst)[0],
- ((unsigned char *)&key->nw_dst)[1],
- ((unsigned char *)&key->nw_dst)[2],
- ((unsigned char *)&key->nw_dst)[3],
- ntohs(key->tp_src), ntohs(key->tp_dst));
-}
-EXPORT_SYMBOL(print_flow);
-
#define SNAP_OUI_LEN 3
struct eth_snap_hdr
{
struct ethhdr eth;
- uint8_t dsap; /* Always 0xAA */
- uint8_t ssap; /* Always 0xAA */
- uint8_t ctrl;
- uint8_t oui[SNAP_OUI_LEN];
- uint16_t ethertype;
+ u8 dsap; /* Always 0xAA */
+ u8 ssap; /* Always 0xAA */
+ u8 ctrl;
+ u8 oui[SNAP_OUI_LEN];
+ u16 ethertype;
} __attribute__ ((packed));
static int is_snap(const struct eth_snap_hdr *esh)
/* Parses the Ethernet frame in 'skb', which was received on 'in_port',
* and initializes 'key' to match. Returns 1 if 'skb' contains an IP
* fragment, 0 otherwise. */
-int flow_extract(struct sk_buff *skb, uint16_t in_port,
- struct sw_flow_key *key)
+int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
{
struct ethhdr *eth;
struct eth_snap_hdr *esh;
int nh_ofs;
memset(key, 0, sizeof *key);
- key->dl_vlan = htons(OFP_VLAN_NONE);
- key->in_port = htons(in_port);
+ key->dl_vlan = htons(ODP_VLAN_NONE);
+ key->in_port = in_port;
if (skb->len < sizeof *eth)
return 0;
eth = eth_hdr(skb);
esh = (struct eth_snap_hdr *) eth;
nh_ofs = sizeof *eth;
- if (likely(ntohs(eth->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF))
+ if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF))
key->dl_type = eth->h_proto;
else if (skb->len >= sizeof *esh && is_snap(esh)) {
key->dl_type = esh->ethertype;
nh_ofs = sizeof *esh;
} else {
- key->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE);
+ key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE);
if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) {
nh_ofs += sizeof(struct llc_pdu_un);
}
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store them
* in 16-bit network byte order. */
- key->icmp_type = htons(icmp->type);
- key->icmp_code = htons(icmp->code);
+ key->tp_src = htons(icmp->type);
+ key->tp_dst = htons(icmp->code);
} else {
/* Avoid tricking other code into
* thinking that this packet has an L4
kmem_cache_destroy(flow_cache);
}
+void print_flow(const struct odp_flow_key *key)
+{
+#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5]
+ printk("port%04x:vlan%d mac"MAC_FMT"->"MAC_FMT" "
+ "type%04x proto%d ip%x->%x port%d->%d\n",
+ key->in_port, ntohs(key->dl_vlan),
+ MAC_ARG(key->dl_src), MAC_ARG(key->dl_dst),
+ ntohs(key->dl_type), key->nw_proto,
+ key->nw_src, key->nw_dst,
+ ntohs(key->tp_src), ntohs(key->tp_dst));
+}
#include <linux/kernel.h>
#include <linux/spinlock.h>
-#include <linux/list.h>
#include <linux/types.h>
-#include <linux/jiffies.h>
#include <linux/rcupdate.h>
#include <linux/gfp.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/ip.h>
-#include "openflow/openflow.h"
+#include "openflow/datapath-protocol.h"
struct sk_buff;
-struct ofp_flow_mod;
-/* Identification data for a flow.
- * Network byte order except for the "wildcards" field.
- * Ordered to make bytewise comparisons (e.g. with memcmp()) fail quickly and
- * to keep the amount of padding to a minimum.
- * If you change the ordering of fields here, change flow_keys_equal() to
- * compare the proper fields.
- */
-struct sw_flow_key {
- uint32_t nw_src; /* IP source address. */
- uint32_t nw_dst; /* IP destination address. */
- uint16_t in_port; /* Input switch port */
- uint16_t dl_vlan; /* Input VLAN. */
- uint16_t dl_type; /* Ethernet frame type. */
- uint16_t tp_src; /* TCP/UDP source port. */
- uint16_t tp_dst; /* TCP/UDP destination port. */
- uint8_t dl_src[ETH_ALEN]; /* Ethernet source address. */
- uint8_t dl_dst[ETH_ALEN]; /* Ethernet destination address. */
- uint8_t nw_proto; /* IP protocol. */
- uint8_t pad; /* Pad to 32-bit alignment. */
- uint32_t wildcards; /* Wildcard fields (host byte order). */
- uint32_t nw_src_mask; /* 1-bit in each significant nw_src bit. */
- uint32_t nw_dst_mask; /* 1-bit in each significant nw_dst bit. */
-};
-
-/* The match fields for ICMP type and code use the transport source and
- * destination port fields, respectively. */
-#define icmp_type tp_src
-#define icmp_code tp_dst
-
-/* Compare two sw_flow_keys and return true if they are the same flow, false
- * otherwise. Wildcards and netmasks are not considered. */
-static inline int flow_keys_equal(const struct sw_flow_key *a,
- const struct sw_flow_key *b)
-{
- return !memcmp(a, b, offsetof(struct sw_flow_key, wildcards));
-}
-
-/* We need to manually make sure that the structure is 32-bit aligned,
- * since we don't want garbage values in compiler-generated pads from
- * messing up hash matches.
- */
-static inline void check_key_align(void)
-{
- BUILD_BUG_ON(sizeof(struct sw_flow_key) != 44);
-}
-
-/* We keep actions as a separate structure because we need to be able to
- * swap them out atomically when the modify command comes from a Flow
- * Modify message. */
struct sw_flow_actions {
- size_t actions_len;
struct rcu_head rcu;
-
- struct ofp_action_header actions[0];
+ unsigned int n_actions;
+ union odp_action actions[];
};
-/* Locking:
- *
- * - Readers must take rcu_read_lock and hold it the entire time that the flow
- * must continue to exist.
- *
- * - Writers must hold dp_mutex.
- */
struct sw_flow {
- struct sw_flow_key key;
-
- uint16_t priority; /* Only used on entries with wildcards. */
- uint16_t idle_timeout; /* Idle time before discarding (seconds). */
- uint16_t hard_timeout; /* Hard expiration time (seconds) */
- uint64_t used; /* Last used time (in jiffies). */
-
+ struct rcu_head rcu;
+ struct odp_flow_key key;
struct sw_flow_actions *sf_acts;
- /* For use by table implementation. */
- struct list_head node;
- struct list_head iter_node;
- unsigned long serial;
- void *private;
+ struct timespec used; /* Last used time. */
- spinlock_t lock; /* Lock this entry...mostly for stat updates */
- uint64_t created; /* When the flow was created (in jiffies_64). */
- uint64_t packet_count; /* Number of packets associated with this entry */
- uint64_t byte_count; /* Number of bytes associated with this entry */
+ u8 ip_tos; /* IP TOS value. */
- uint8_t tcp_flags; /* Union of seen TCP flags. */
- uint8_t ip_tos; /* IP TOS value. */
-
- struct rcu_head rcu;
+ spinlock_t lock; /* Lock for values below. */
+ u64 packet_count; /* Number of packets matched. */
+ u64 byte_count; /* Number of bytes matched. */
+ u8 tcp_flags; /* Union of seen TCP flags. */
};
-int flow_matches_1wild(const struct sw_flow_key *, const struct sw_flow_key *);
-int flow_matches_2wild(const struct sw_flow_key *, const struct sw_flow_key *);
-int flow_matches_desc(const struct sw_flow_key *, const struct sw_flow_key *,
- int);
-int flow_has_out_port(struct sw_flow *, uint16_t);
-struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags);
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
+struct sw_flow *flow_alloc(size_t n_actions);
void flow_free(struct sw_flow *);
void flow_deferred_free(struct sw_flow *);
void flow_deferred_free_acts(struct sw_flow_actions *);
-void flow_replace_acts(struct sw_flow *, const struct ofp_action_header *,
- size_t);
-int flow_extract(struct sk_buff *, uint16_t in_port, struct sw_flow_key *);
-void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from);
-void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from);
-int flow_timeout(struct sw_flow *);
-
-void print_flow(const struct sw_flow_key *);
-
-static inline int iphdr_ok(struct sk_buff *skb)
-{
- int nh_ofs = skb_network_offset(skb);
- if (skb->len >= nh_ofs + sizeof(struct iphdr)) {
- int ip_len = ip_hdrlen(skb);
- return (ip_len >= sizeof(struct iphdr)
- && pskb_may_pull(skb, nh_ofs + ip_len));
- }
- return 0;
-}
-
-static inline int tcphdr_ok(struct sk_buff *skb)
-{
- int th_ofs = skb_transport_offset(skb);
- if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) {
- int tcp_len = tcp_hdrlen(skb);
- return (tcp_len >= sizeof(struct tcphdr)
- && skb->len >= th_ofs + tcp_len);
- }
- return 0;
-}
-
-static inline int udphdr_ok(struct sk_buff *skb)
-{
- int th_ofs = skb_transport_offset(skb);
- return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr));
-}
-
-static inline int icmphdr_ok(struct sk_buff *skb)
-{
- int th_ofs = skb_transport_offset(skb);
- return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr));
-}
-
-#define TCP_FLAGS_OFFSET 13
-#define TCP_FLAG_MASK 0x3f
-
-static inline struct ofp_tcphdr *ofp_tcp_hdr(const struct sk_buff *skb)
-{
- return (struct ofp_tcphdr *)skb_transport_header(skb);
-}
-
-static inline void flow_used(struct sw_flow *flow, struct sk_buff *skb)
-{
- unsigned long flags;
-
- flow->used = get_jiffies_64();
-
- spin_lock_irqsave(&flow->lock, flags);
- if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
- struct iphdr *nh = ip_hdr(skb);
- flow->ip_tos = nh->tos;
-
- if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) {
- uint8_t *tcp = (uint8_t *)tcp_hdr(skb);
- flow->tcp_flags |= *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
- }
- }
-
- flow->packet_count++;
- flow->byte_count += skb->len;
- spin_unlock_irqrestore(&flow->lock, flags);
-}
+int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
+void flow_used(struct sw_flow *, struct sk_buff *);
-extern struct kmem_cache *flow_cache;
+void print_flow(const struct odp_flow_key *);
int flow_init(void);
void flow_exit(void);
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include "forward.h"
-#include "datapath.h"
-#include "openflow/nicira-ext.h"
-#include "dp_act.h"
-#include "nx_msg.h"
-#include "chain.h"
-#include "flow.h"
-
-/* FIXME: do we need to use GFP_ATOMIC everywhere here? */
-
-
-static struct sk_buff *retrieve_skb(uint32_t id);
-static void discard_skb(uint32_t id);
-
-/* 'skb' was received on port 'p', which may be a physical switch port, the
- * local port, or a null pointer. Process it according to 'chain'. Returns 0
- * if successful, in which case 'skb' is destroyed, or -ESRCH if there is no
- * matching flow, in which case 'skb' still belongs to the caller. */
-int run_flow_through_tables(struct sw_chain *chain, struct sk_buff *skb,
- struct net_bridge_port *p)
-{
- /* Ethernet address used as the destination for STP frames. */
- static const uint8_t stp_eth_addr[ETH_ALEN]
- = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 };
- struct sw_flow_key key;
- struct sw_flow *flow;
-
- if (flow_extract(skb, p ? p->port_no : OFPP_NONE, &key)
- && (chain->dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) {
- /* Drop fragment. */
- kfree_skb(skb);
- return 0;
- }
- if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
- p->config & (compare_ether_addr(key.dl_dst, stp_eth_addr)
- ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) {
- kfree_skb(skb);
- return 0;
- }
-
- flow = chain_lookup(chain, &key);
- if (likely(flow != NULL)) {
- struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts);
- flow_used(flow, skb);
- execute_actions(chain->dp, skb, &key,
- sf_acts->actions, sf_acts->actions_len, 0);
- return 0;
- } else {
- return -ESRCH;
- }
-}
-
-/* 'skb' was received on port 'p', which may be a physical switch port, the
- * local port, or a null pointer. Process it according to 'chain', sending it
- * up to the controller if no flow matches. Takes ownership of 'skb'. */
-void fwd_port_input(struct sw_chain *chain, struct sk_buff *skb,
- struct net_bridge_port *p)
-{
- WARN_ON_ONCE(skb_shared(skb));
- WARN_ON_ONCE(skb->destructor);
- if (run_flow_through_tables(chain, skb, p))
- dp_output_control(chain->dp, skb, chain->dp->miss_send_len,
- OFPR_NO_MATCH);
-}
-
-static int
-recv_hello(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- return dp_send_hello(chain->dp, sender, msg);
-}
-
-static int
-recv_features_request(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- return dp_send_features_reply(chain->dp, sender);
-}
-
-static int
-recv_get_config_request(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- return dp_send_config_reply(chain->dp, sender);
-}
-
-static int
-recv_set_config(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_switch_config *osc = msg;
- int flags;
-
- flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK);
- if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL
- && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) {
- flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP;
- }
- chain->dp->flags = flags;
-
- chain->dp->miss_send_len = ntohs(osc->miss_send_len);
-
- return 0;
-}
-
-static int
-recv_packet_out(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_packet_out *opo = msg;
- struct sk_buff *skb;
- uint16_t v_code;
- struct sw_flow_key key;
- size_t actions_len = ntohs(opo->actions_len);
-
- if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) {
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: message too short for number "
- "of actions\n", chain->dp->netdev->name);
- return -EINVAL;
- }
-
- if (ntohl(opo->buffer_id) == (uint32_t) -1) {
- int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len;
-
- /* FIXME: there is likely a way to reuse the data in msg. */
- skb = alloc_skb(data_len, GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
-
- /* FIXME? We don't reserve NET_IP_ALIGN or NET_SKB_PAD since
- * we're just transmitting this raw without examining anything
- * at those layers. */
- skb_put(skb, data_len);
- skb_copy_to_linear_data(skb,
- (uint8_t *)opo->actions + actions_len,
- data_len);
- skb_reset_mac_header(skb);
- } else {
- skb = retrieve_skb(ntohl(opo->buffer_id));
- if (!skb)
- return -ESRCH;
- }
-
- dp_set_origin(chain->dp, ntohs(opo->in_port), skb);
-
- flow_extract(skb, ntohs(opo->in_port), &key);
-
- v_code = validate_actions(chain->dp, &key, opo->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
- msg, ntohs(opo->header.length));
- goto error;
- }
-
- execute_actions(chain->dp, skb, &key, opo->actions, actions_len, 1);
-
- return 0;
-
-error:
- kfree_skb(skb);
- return -EINVAL;
-}
-
-static int
-recv_port_mod(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_port_mod *opm = msg;
-
- dp_update_port_flags(chain->dp, opm);
-
- return 0;
-}
-
-static int
-recv_echo_request(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- return dp_send_echo_reply(chain->dp, sender, msg);
-}
-
-static int
-recv_echo_reply(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- return 0;
-}
-
-static int
-add_flow(struct sw_chain *chain, const struct sender *sender,
- const struct ofp_flow_mod *ofm)
-{
- int error = -ENOMEM;
- uint16_t v_code;
- struct sw_flow *flow;
- size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm;
-
- /* Allocate memory. */
- flow = flow_alloc(actions_len, GFP_ATOMIC);
- if (flow == NULL)
- goto error;
-
- flow_extract_match(&flow->key, &ofm->match);
-
- v_code = validate_actions(chain->dp, &flow->key, ofm->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
- ofm, ntohs(ofm->header.length));
- goto error_free_flow;
- }
-
- /* Fill out flow. */
- flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1;
- flow->idle_timeout = ntohs(ofm->idle_timeout);
- flow->hard_timeout = ntohs(ofm->hard_timeout);
- flow->used = flow->created = get_jiffies_64();
- flow->byte_count = 0;
- flow->packet_count = 0;
- flow->tcp_flags = 0;
- flow->ip_tos = 0;
- spin_lock_init(&flow->lock);
- memcpy(flow->sf_acts->actions, ofm->actions, actions_len);
-
- /* Act. */
- error = chain_insert(chain, flow);
- if (error == -ENOBUFS) {
- dp_send_error_msg(chain->dp, sender, OFPET_FLOW_MOD_FAILED,
- OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length));
- goto error_free_flow;
- } else if (error)
- goto error_free_flow;
- error = 0;
- if (ntohl(ofm->buffer_id) != (uint32_t) -1) {
- struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id));
- if (skb) {
- struct sw_flow_key key;
- flow_used(flow, skb);
- dp_set_origin(chain->dp, ntohs(ofm->match.in_port), skb);
- flow_extract(skb, ntohs(ofm->match.in_port), &key);
- execute_actions(chain->dp, skb, &key, ofm->actions, actions_len, 0);
- }
- else
- error = -ESRCH;
- }
- return error;
-
-error_free_flow:
- flow_free(flow);
-error:
- if (ntohl(ofm->buffer_id) != (uint32_t) -1)
- discard_skb(ntohl(ofm->buffer_id));
- return error;
-}
-
-static int
-mod_flow(struct sw_chain *chain, const struct sender *sender,
- const struct ofp_flow_mod *ofm)
-{
- int error = -ENOMEM;
- uint16_t v_code;
- size_t actions_len;
- struct sw_flow_key key;
- uint16_t priority;
- int strict;
-
- flow_extract_match(&key, &ofm->match);
-
- actions_len = ntohs(ofm->header.length) - sizeof *ofm;
-
- v_code = validate_actions(chain->dp, &key, ofm->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
- ofm, ntohs(ofm->header.length));
- goto error;
- }
-
- priority = key.wildcards ? ntohs(ofm->priority) : -1;
- strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0;
- chain_modify(chain, &key, priority, strict, ofm->actions, actions_len);
-
- if (ntohl(ofm->buffer_id) != (uint32_t) -1) {
- struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id));
- if (skb) {
- struct sw_flow_key skb_key;
- flow_extract(skb, ntohs(ofm->match.in_port), &skb_key);
- execute_actions(chain->dp, skb, &skb_key,
- ofm->actions, actions_len, 0);
- }
- else
- error = -ESRCH;
- }
- return error;
-
-error:
- if (ntohl(ofm->buffer_id) != (uint32_t) -1)
- discard_skb(ntohl(ofm->buffer_id));
- return error;
-}
-
-static int
-recv_flow(struct sw_chain *chain, const struct sender *sender, const void *msg)
-{
- const struct ofp_flow_mod *ofm = msg;
- uint16_t command = ntohs(ofm->command);
-
- if (command == OFPFC_ADD) {
- return add_flow(chain, sender, ofm);
- } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) {
- return mod_flow(chain, sender, ofm);
- } else if (command == OFPFC_DELETE) {
- struct sw_flow_key key;
- flow_extract_match(&key, &ofm->match);
- return chain_delete(chain, &key, ofm->out_port, 0, 0) ? 0 : -ESRCH;
- } else if (command == OFPFC_DELETE_STRICT) {
- struct sw_flow_key key;
- uint16_t priority;
- flow_extract_match(&key, &ofm->match);
- priority = key.wildcards ? ntohs(ofm->priority) : -1;
- return chain_delete(chain, &key, ofm->out_port,
- priority, 1) ? 0 : -ESRCH;
- } else {
- return -ENOTSUPP;
- }
-}
-
-static int
-recv_vendor(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_vendor_header *ovh = msg;
-
- switch(ntohl(ovh->vendor))
- {
- case NX_VENDOR_ID:
- return nx_recv_msg(chain, sender, msg);
- default:
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: unknown vendor: 0x%x\n",
- chain->dp->netdev->name, ntohl(ovh->vendor));
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_VENDOR, msg, ntohs(ovh->header.length));
- return -EINVAL;
- }
-}
-
-/* 'msg', which is 'length' bytes long, was received across Netlink from
- * 'sender'. Apply it to 'chain'. */
-int
-fwd_control_input(struct sw_chain *chain, const struct sender *sender,
- const void *msg, size_t length)
-{
-
- struct openflow_packet {
- size_t min_size;
- int (*handler)(struct sw_chain *, const struct sender *,
- const void *);
- };
-
- static const struct openflow_packet packets[] = {
- [OFPT_HELLO] = {
- sizeof (struct ofp_header),
- recv_hello,
- },
- [OFPT_ECHO_REQUEST] = {
- sizeof (struct ofp_header),
- recv_echo_request,
- },
- [OFPT_ECHO_REPLY] = {
- sizeof (struct ofp_header),
- recv_echo_reply,
- },
- [OFPT_VENDOR] = {
- sizeof (struct ofp_vendor_header),
- recv_vendor,
- },
- [OFPT_FEATURES_REQUEST] = {
- sizeof (struct ofp_header),
- recv_features_request,
- },
- [OFPT_GET_CONFIG_REQUEST] = {
- sizeof (struct ofp_header),
- recv_get_config_request,
- },
- [OFPT_SET_CONFIG] = {
- sizeof (struct ofp_switch_config),
- recv_set_config,
- },
- [OFPT_PACKET_OUT] = {
- sizeof (struct ofp_packet_out),
- recv_packet_out,
- },
- [OFPT_FLOW_MOD] = {
- sizeof (struct ofp_flow_mod),
- recv_flow,
- },
- [OFPT_PORT_MOD] = {
- sizeof (struct ofp_port_mod),
- recv_port_mod,
- }
- };
-
- struct ofp_header *oh;
-
- oh = (struct ofp_header *) msg;
- if (oh->version != OFP_VERSION
- && oh->type != OFPT_HELLO
- && oh->type != OFPT_ERROR
- && oh->type != OFPT_ECHO_REQUEST
- && oh->type != OFPT_ECHO_REPLY
- && oh->type != OFPT_VENDOR)
- {
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_VERSION, msg, length);
- return -EINVAL;
- }
- if (ntohs(oh->length) != length) {
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: received message length "
- "wrong: %d/%d\n", chain->dp->netdev->name,
- ntohs(oh->length), length);
- return -EINVAL;
- }
-
- if (oh->type < ARRAY_SIZE(packets)) {
- const struct openflow_packet *pkt = &packets[oh->type];
- if (pkt->handler) {
- if (length < pkt->min_size)
- return -EFAULT;
- return pkt->handler(chain, sender, msg);
- }
- }
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_TYPE, msg, length);
- return -EINVAL;
-}
-
-/* Packet buffering. */
-
-#define OVERWRITE_SECS 1
-#define OVERWRITE_JIFFIES (OVERWRITE_SECS * HZ)
-
-struct packet_buffer {
- struct sk_buff *skb;
- uint32_t cookie;
- unsigned long exp_jiffies;
-};
-
-static struct packet_buffer buffers[N_PKT_BUFFERS];
-static unsigned int buffer_idx;
-static DEFINE_SPINLOCK(buffer_lock);
-
-uint32_t fwd_save_skb(struct sk_buff *skb)
-{
- struct sk_buff *old_skb = NULL;
- struct packet_buffer *p;
- unsigned long int flags;
- uint32_t id;
-
- /* FIXME: Probably just need a skb_clone() here. */
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb)
- return -1;
-
- spin_lock_irqsave(&buffer_lock, flags);
- buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK;
- p = &buffers[buffer_idx];
- if (p->skb) {
- /* Don't buffer packet if existing entry is less than
- * OVERWRITE_SECS old. */
- if (time_before(jiffies, p->exp_jiffies)) {
- spin_unlock_irqrestore(&buffer_lock, flags);
- kfree_skb(skb);
- return -1;
- } else {
- /* Defer kfree_skb() until interrupts re-enabled.
- * FIXME: we only need to do that if it has a
- * destructor, but it never should since we orphan
- * sk_buffs on entry. */
- old_skb = p->skb;
- }
- }
- /* Don't use maximum cookie value since the all-bits-1 id is
- * special. */
- if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1)
- p->cookie = 0;
- p->skb = skb;
- p->exp_jiffies = jiffies + OVERWRITE_JIFFIES;
- id = buffer_idx | (p->cookie << PKT_BUFFER_BITS);
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- if (old_skb)
- kfree_skb(old_skb);
-
- return id;
-}
-
-static struct sk_buff *retrieve_skb(uint32_t id)
-{
- unsigned long int flags;
- struct sk_buff *skb = NULL;
- struct packet_buffer *p;
-
- spin_lock_irqsave(&buffer_lock, flags);
- p = &buffers[id & PKT_BUFFER_MASK];
- if (p->cookie == id >> PKT_BUFFER_BITS) {
- skb = p->skb;
- p->skb = NULL;
- } else {
- if (net_ratelimit())
- printk(KERN_NOTICE "cookie mismatch: %x != %x\n",
- id >> PKT_BUFFER_BITS, p->cookie);
- }
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- return skb;
-}
-
-void fwd_discard_all(void)
-{
- int i;
-
- for (i = 0; i < N_PKT_BUFFERS; i++) {
- struct sk_buff *skb;
- unsigned long int flags;
-
- /* Defer kfree_skb() until interrupts re-enabled. */
- spin_lock_irqsave(&buffer_lock, flags);
- skb = buffers[i].skb;
- buffers[i].skb = NULL;
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- kfree_skb(skb);
- }
-}
-
-static void discard_skb(uint32_t id)
-{
- struct sk_buff *old_skb = NULL;
- unsigned long int flags;
- struct packet_buffer *p;
-
- spin_lock_irqsave(&buffer_lock, flags);
- p = &buffers[id & PKT_BUFFER_MASK];
- if (p->cookie == id >> PKT_BUFFER_BITS) {
- /* Defer kfree_skb() until interrupts re-enabled. */
- old_skb = p->skb;
- p->skb = NULL;
- }
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- if (old_skb)
- kfree_skb(old_skb);
-}
-
-void fwd_exit(void)
-{
- fwd_discard_all();
-}
+++ /dev/null
-#ifndef FORWARD_H
-#define FORWARD_H 1
-
-#include <linux/types.h>
-#include "datapath.h"
-#include "flow.h"
-
-struct sk_buff;
-struct sw_chain;
-struct sender;
-
-/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID
- * into a buffer number (low bits) and a cookie (high bits). The buffer number
- * is an index into an array of buffers. The cookie distinguishes between
- * different packets that have occupied a single buffer. Thus, the more
- * buffers we have, the lower-quality the cookie... */
-#define PKT_BUFFER_BITS 8
-#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS)
-#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1)
-
-#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS)
-
-
-void fwd_port_input(struct sw_chain *, struct sk_buff *,
- struct net_bridge_port *);
-int run_flow_through_tables(struct sw_chain *, struct sk_buff *,
- struct net_bridge_port *);
-int fwd_control_input(struct sw_chain *, const struct sender *,
- const void *, size_t);
-
-uint32_t fwd_save_skb(struct sk_buff *skb);
-void fwd_discard_all(void);
-void fwd_exit(void);
-
-#endif /* forward.h */
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
static struct sw_flow *table_dummy_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
+ const struct odp_flow_key *key)
{
struct sw_table_dummy *td = (struct sw_table_dummy *) swt;
struct sw_flow *flow;
}
static int table_dummy_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
+ const struct odp_flow_key *key, uint16_t priority, int strict,
const struct ofp_action_header *actions, size_t actions_len)
{
struct sw_table_dummy *td = (struct sw_table_dummy *) swt;
}
static int table_dummy_delete(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict)
+ const struct odp_flow_key *key, uint16_t priority, int strict)
{
struct sw_table_dummy *td = (struct sw_table_dummy *) swt;
struct sw_flow *flow;
}
static int table_dummy_iterate(struct sw_table *swt,
- const struct sw_flow_key *key,
+ const struct odp_flow_key *key,
struct sw_table_position *position,
int (*callback)(struct sw_flow *, void *),
void *private)
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2008 Nicira Networks
- */
-
-/* Functions for Nicira-extended actions. */
-#include "openflow/nicira-ext.h"
-#include "dp_act.h"
-#include "nx_act.h"
-#include "nx_act_snat.h"
-
-uint16_t
-nx_validate_act(struct datapath *dp, const struct sw_flow_key *key,
- const struct nx_action_header *nah, uint16_t len)
-{
- if (len < sizeof *nah)
- return OFPBAC_BAD_LEN;
-
-#ifdef SUPPORT_SNAT
- if (nah->subtype == ntohs(NXAST_SNAT)) {
- struct nx_action_snat *nas = (struct nx_action_snat *)nah;
- if (len != sizeof(*nas))
- return OFPBAC_BAD_LEN;
- else if (ntohs(nas->port) >= OFPP_MAX)
- return OFPBAC_BAD_ARGUMENT;
-
- return ACT_VALIDATION_OK;
- }
-#endif
- return OFPBAC_BAD_VENDOR_TYPE;
-}
-
-struct sk_buff *
-nx_execute_act(struct sk_buff *skb, const struct sw_flow_key *key,
- const struct nx_action_header *nah)
-{
-#ifdef SUPPORT_SNAT
- if (nah->subtype == ntohs(NXAST_SNAT)) {
- struct nx_action_snat *nas = (struct nx_action_snat *)nah;
- snat_skb(skb->dev->br_port->dp, skb, ntohs(nas->port));
- }
-#endif
-
- return skb;
-}
-
+++ /dev/null
-#ifndef NX_ACT_H
-#define NX_ACT_H 1
-
-#include "datapath.h"
-
-
-uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key,
- const struct nx_action_header *nah, uint16_t len);
-
-struct sk_buff *nx_execute_act(struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct nx_action_header *nah);
-
-#endif /* nx_act.h */
+++ /dev/null
-#ifdef SUPPORT_SNAT
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2008, 2009 Nicira Networks
- */
-
-#include <linux/etherdevice.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_bridge.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/in.h>
-#include <net/ip.h>
-#include <linux/icmp.h>
-#include <linux/if_ether.h>
-#include <net/arp.h>
-#include <net/route.h>
-
-#include "forward.h"
-#include "dp_act.h"
-#include "nx_act_snat.h"
-
-
-/* We need these fake structures to make netfilter happy --
- * lots of places assume that skb->dst != NULL, which isn't
- * all that unreasonable.
- *
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it. Future netfilter modules might
- * require us to fill additional fields. */
-static struct net_device __fake_net_device = {
- .hard_header_len = ETH_HLEN
-};
-
-static struct rtable __fake_rtable = {
- .u = {
- .dst = {
- .__refcnt = ATOMIC_INIT(1),
- .dev = &__fake_net_device,
- .path = &__fake_rtable.u.dst,
- .metrics = {[RTAX_MTU - 1] = 1500},
- .flags = DST_NOXFRM,
- }
- },
- .rt_flags = 0,
-};
-
-/* Define ARP for IP since the Linux headers don't do it cleanly. */
-struct ip_arphdr {
- uint16_t ar_hrd;
- uint16_t ar_pro;
- uint8_t ar_hln;
- uint8_t ar_pln;
- uint16_t ar_op;
- uint8_t ar_sha[ETH_ALEN];
- uint32_t ar_sip;
- uint8_t ar_tha[ETH_ALEN];
- uint32_t ar_tip;
-} __attribute__((packed));
-OFP_ASSERT(sizeof(struct ip_arphdr) == 28);
-
-static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
-{
- skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
- if (likely(skb->nf_bridge))
- atomic_set(&(skb->nf_bridge->use), 1);
-
- return skb->nf_bridge;
-}
-
-/* Save a copy of the original Ethernet header. */
-void snat_save_header(struct sk_buff *skb)
-{
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
- if (!skb->nf_bridge)
- return;
-
- skb_copy_from_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
-}
-
-/* Restore a saved Ethernet header. */
-int snat_copy_header(struct sk_buff *skb)
-{
- int err;
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
- if (!skb->nf_bridge)
- return 0;
-
- err = skb_cow_head(skb, header_size);
- if (err)
- return err;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return 0;
-}
-
-/* Push the Ethernet header back on and tranmit the packet. */
-static int
-dp_xmit_skb_push(struct sk_buff *skb)
-{
- skb_push(skb, ETH_HLEN);
- return dp_xmit_skb(skb);
-}
-
-/* Perform maintainence related to a SNAT'd interface. Currently, this only
- * checks whether MAC->IP bindings have expired.
- *
- * Called with the RCU read lock */
-void
-snat_maint(struct net_bridge_port *p)
-{
- struct snat_conf *sc;
- struct snat_mapping *m, *n;
- unsigned long flags;
- unsigned long timeout;
-
- spin_lock_irqsave(&p->lock, flags);
- sc = p->snat;
- if (!sc)
- goto done;
-
- timeout = sc->mac_timeout * HZ;
-
- list_for_each_entry_safe (m, n, &sc->mappings, node) {
- if (time_after(jiffies, m->used + timeout)) {
- list_del(&m->node);
- kfree(m);
- }
- }
-
-done:
- spin_unlock_irqrestore(&p->lock, flags);
-}
-
-/* When the packet is bound for a local interface, strip off the fake
- * routing table.
- */
-void snat_local_in(struct sk_buff *skb)
-{
- if (skb->dst == (struct dst_entry *)&__fake_rtable) {
- dst_release(skb->dst);
- skb->dst = NULL;
- }
-}
-
-/* Check whether destination IP's address is in the IP->MAC mappings.
- * If it is, then overwrite the destination MAC with the value from the
- * cache.
- *
- * Returns -1 if there is a problem, otherwise 0. */
-static int
-dnat_mac(struct net_bridge_port *p, struct sk_buff *skb)
-{
- struct snat_conf *sc = p->snat;
- struct iphdr *iph = ip_hdr(skb);
- struct ethhdr *eh = eth_hdr(skb);
- struct snat_mapping *m;
-
- if (skb->protocol != htons(ETH_P_IP))
- return 0;
-
- list_for_each_entry (m, &sc->mappings, node) {
- if (m->ip_addr == iph->daddr){
- /* Found it! */
- if (!make_writable(&skb))
- return -EINVAL;
- m->used = jiffies;
- memcpy(eh->h_dest, m->hw_addr, ETH_ALEN);
- break;
- }
- }
-
- return 0;
-}
-
-static int
-__snat_this_address(struct snat_conf *sc, u32 ip_addr)
-{
- if (sc) {
- u32 h_ip_addr = ntohl(ip_addr);
- return (h_ip_addr >= sc->ip_addr_start &&
- h_ip_addr <= sc->ip_addr_end);
- }
- return 0;
-}
-
-static int
-snat_this_address(struct net_bridge_port *p, u32 ip_addr)
-{
- unsigned long int flags;
- int retval;
-
- spin_lock_irqsave(&p->lock, flags);
- retval = __snat_this_address(p->snat, ip_addr);
- spin_unlock_irqrestore(&p->lock, flags);
-
- return retval;
-}
-
-/* Must hold RCU lock. */
-static struct net_bridge_port *
-get_nbp_by_ip_addr(struct datapath *dp, u32 ip_addr)
-{
- struct net_bridge_port *p;
-
- list_for_each_entry_rcu (p, &dp->port_list, node)
- if (snat_this_address(p, ip_addr))
- return p;
-
- return NULL;
-}
-
-static int
-snat_pre_route_finish(struct sk_buff *skb)
-{
- struct net_bridge_port *p = skb->dev->br_port;
- struct snat_conf *sc;
- struct iphdr *iph = ip_hdr(skb);
- unsigned long flags;
-
- skb->dst = (struct dst_entry *)&__fake_rtable;
- dst_hold(skb->dst);
-
- /* Don't process packets that were not translated due to NAT */
- spin_lock_irqsave(&p->lock, flags);
- sc = p->snat;
- if (!__snat_this_address(sc, iph->daddr)) {
- /* If SNAT is configured for this input device, check the
- * IP->MAC mappings to see if we should update the destination
- * MAC. */
- if (sc)
- dnat_mac(skb->dev->br_port, skb);
-
- }
- spin_unlock_irqrestore(&p->lock, flags);
-
- /* Pass the translated packet as input to the OpenFlow stack, which
- * consumes it. */
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- fwd_port_input(p->dp->chain, skb, p);
-
- return 0;
-}
-
-/* Checks whether 'skb' is an ARP request for an SNAT'd interface. If
- * so, it will generate a response.
- *
- * Returns 0 if the packet was not handled. Otherwise, -1 is returned
- * and the caller is responsible for freeing 'skb'. */
-static int
-handle_arp_snat(struct sk_buff *skb)
-{
- struct net_bridge_port *s_nbp = skb->dev->br_port;
- struct net_bridge_port *nat_nbp;
- struct ip_arphdr *ah;
- uint8_t mac_addr[ETH_ALEN];
-
- if (!pskb_may_pull(skb, sizeof *ah))
- return 0;
-
- ah = (struct ip_arphdr *)arp_hdr(skb);
- if ((ah->ar_op != htons(ARPOP_REQUEST))
- || ah->ar_hln != ETH_ALEN
- || ah->ar_pro != htons(ETH_P_IP)
- || ah->ar_pln != 4)
- return 0;
-
- rcu_read_lock();
- nat_nbp = get_nbp_by_ip_addr(s_nbp->dp, ah->ar_tip);
- if (!nat_nbp) {
- rcu_read_unlock();
- return 0;
- }
- if (s_nbp == nat_nbp)
- memcpy(mac_addr, s_nbp->dp->netdev->dev_addr, sizeof(mac_addr));
- else if (!is_zero_ether_addr(nat_nbp->snat->mac_addr))
- memcpy(mac_addr, nat_nbp->snat->mac_addr, sizeof(mac_addr));
- else {
- rcu_read_unlock();
- return 0;
- }
- rcu_read_unlock();
-
- arp_send(ARPOP_REPLY, ETH_P_ARP, ah->ar_sip, skb->dev, ah->ar_tip,
- ah->ar_sha, mac_addr, ah->ar_sha);
-
- return -1;
-}
-
-/* Checks whether 'skb' is a ping request for an SNAT'd interface. If
- * so, it will generate a response.
- *
- * Returns 0 if the packet was not handled. Otherwise, -1 is returned
- * and the caller is responsible for freeing 'skb'. */
-static int
-handle_icmp_snat(struct sk_buff *skb)
-{
- struct net_bridge_port *p = skb->dev->br_port;
- struct ethhdr *eh;
- struct iphdr *iph;
- struct icmphdr *icmph;
- uint8_t tmp_eth[ETH_ALEN];
- uint32_t tmp_ip;
- struct sk_buff *nskb;
-
- /* We're only interested in addresses we rewrite. */
- iph = ip_hdr(skb);
- if (!snat_this_address(p, iph->daddr)) {
- return 0;
- }
-
- /* Drop fragments and packets not long enough to hold the ICMP
- * header. */
- if ((ntohs(iph->frag_off) & IP_OFFSET) != 0 ||
- !pskb_may_pull(skb, skb_transport_offset(skb) + 4))
- return 0;
-
- /* We only respond to echo requests to our address. Continue
- * processing replies and other ICMP messages since they may be
- * intended for NAT'd hosts. */
- icmph = icmp_hdr(skb);
- if (icmph->type != ICMP_ECHO)
- return 0;
-
- /* Send an echo reply in response */
- nskb = skb_copy(skb, GFP_ATOMIC);
- if (!nskb)
- return -1;
-
- /* Update Ethernet header. */
- eh = eth_hdr(nskb);
- memcpy(tmp_eth, eh->h_dest, ETH_ALEN);
- memcpy(eh->h_dest, eh->h_source, ETH_ALEN);
- memcpy(eh->h_source, tmp_eth, ETH_ALEN);
-
- /* Update IP header.
- * This is kind of busted, at least in that it doesn't check that the
- * echoed IP options make sense. */
- iph = ip_hdr(nskb);
- iph->id = 0;
- iph->frag_off = 0;
- iph->ttl = IPDEFTTL;
- iph->check = 0;
- tmp_ip = iph->daddr;
- iph->daddr = iph->saddr;
- iph->saddr = tmp_ip;
- iph->check = ip_fast_csum((void *)iph, iph->ihl);
-
- /* Update ICMP header. */
- icmph = icmp_hdr(nskb);
- icmph->type = ICMP_ECHOREPLY;
- icmph->checksum = 0;
- icmph->checksum = ip_compute_csum((void *)icmph,
- nskb->tail - skb_transport_header(nskb));
-
- dp_xmit_skb_push(nskb);
-
- return -1;
-}
-
-/* Check if any SNAT maintenance needs to be done on 'skb' before it's
- * checked against the datapath's tables. This includes DNAT
- * modification based on prior SNAT action and responding to ARP and
- * echo requests for the SNAT interface.
- *
- * Returns -1 if the packet was handled and consumed, 0 if the caller
- * should continue to process 'skb'.
- */
-int
-snat_pre_route(struct sk_buff *skb)
-{
- struct iphdr *iph;
- int len;
-
- WARN_ON_ONCE(skb_network_offset(skb));
- if (skb->protocol == htons(ETH_P_ARP)) {
- if (handle_arp_snat(skb))
- goto consume;
- return 0;
- }
- else if (skb->protocol != htons(ETH_P_IP))
- return 0;
-
- if (!pskb_may_pull(skb, sizeof *iph))
- goto consume;
-
- iph = ip_hdr(skb);
- if (iph->ihl < 5 || iph->version != 4)
- goto consume;
-
- if (!pskb_may_pull(skb, ip_hdrlen(skb)))
- goto consume;
- skb_set_transport_header(skb, ip_hdrlen(skb));
-
- /* Check if we need to echo reply for this address */
- iph = ip_hdr(skb);
- if ((iph->protocol == IPPROTO_ICMP) && (handle_icmp_snat(skb)))
- goto consume;
-
- iph = ip_hdr(skb);
- if (unlikely(ip_fast_csum((void *)iph, iph->ihl)))
- goto consume;
-
- len = ntohs(iph->tot_len);
- if ((skb->len < len) || len < (iph->ihl*4))
- goto consume;
-
- if (pskb_trim_rcsum(skb, len))
- goto consume;
-
- nf_bridge_put(skb->nf_bridge);
- if (!nf_bridge_alloc(skb))
- return 0;
-
- NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
- snat_pre_route_finish);
- return -1;
-
-consume:
- kfree_skb(skb);
- return -1;
-}
-
-
-static int
-snat_skb_finish(struct sk_buff *skb)
-{
- NF_HOOK(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
- dp_xmit_skb_push);
-
- return 0;
-}
-
-/* Update the MAC->IP mappings for the private side of the SNAT'd
- * interface. */
-static void
-update_mapping(struct net_bridge_port *p, const struct sk_buff *skb)
-{
- unsigned long flags;
- struct snat_conf *sc;
- const struct iphdr *iph = ip_hdr(skb);
- const struct ethhdr *eh = eth_hdr(skb);
- struct snat_mapping *m;
-
- spin_lock_irqsave(&p->lock, flags);
- sc = p->snat;
- if (!sc)
- goto done;
-
- list_for_each_entry (m, &sc->mappings, node) {
- if (m->ip_addr == iph->saddr){
- memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
- m->used = jiffies;
- goto done;
- }
- }
-
- m = kmalloc(sizeof *m, GFP_ATOMIC);
- if (!m)
- goto done;
- m->ip_addr = iph->saddr;
- memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
- m->used = jiffies;
-
- list_add(&m->node, &sc->mappings);
-
-done:
- spin_unlock_irqrestore(&p->lock, flags);
-}
-
-/* Perform SNAT modification on 'skb' and send out 'out_port'. If the
- * port was not configured for SNAT, it will be sent through the interface
- * unmodified. 'skb' is not consumed, so caller will need to free it.
- */
-void
-snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port)
-{
- struct net_bridge_port *p = dp->ports[out_port];
- struct sk_buff *nskb;
-
- if (!p)
- return;
-
- /* FIXME: Expensive. Just need to skb_clone() here?
- * (However, the skb_copy() does linearize and ensure that the headers
- * are accessible.) */
- nskb = skb_copy(skb, GFP_ATOMIC);
- if (!nskb)
- return;
-
- nskb->dev = p->dev;
-
- /* We only SNAT IP, so just send it on its way if not */
- if (skb->protocol != htons(ETH_P_IP)) {
- dp_xmit_skb(nskb);
- return;
- }
-
- /* Set the source MAC to the OF interface */
- memcpy(eth_hdr(nskb)->h_source, dp->netdev->dev_addr, ETH_ALEN);
-
- update_mapping(p, skb);
-
- /* Take the Ethernet header back off for netfilter hooks. */
- skb_pull(nskb, ETH_HLEN);
-
- NF_HOOK(PF_INET, NF_INET_FORWARD, nskb, skb->dev, nskb->dev,
- snat_skb_finish);
-}
-
-/* Remove SNAT configuration on port 'p'.
- *
- * NB: The caller must hold the port's spinlock. */
-int
-snat_free_conf(struct net_bridge_port *p)
-{
- struct snat_conf *sc = p->snat;
-
- if (!sc)
- return -EINVAL;
-
- /* Free existing mapping entries */
- while (!list_empty(&sc->mappings)) {
- struct snat_mapping *m = list_entry(sc->mappings.next,
- struct snat_mapping, node);
- list_del(&m->node);
- kfree(m);
- }
-
- kfree(p->snat);
- p->snat = NULL;
-
- return 0;
-}
-
-/* Remove SNAT configuration from an interface. */
-static int
-snat_del_port(struct datapath *dp, const struct nx_snat_config *nsc)
-{
- unsigned long flags;
- uint16_t port = ntohs(nsc->port);
- struct net_bridge_port *p = dp->ports[port];
-
- if (!p) {
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: attempt to remove snat on "
- "non-existent port: %d\n",
- dp->netdev->name, port);
- return -EINVAL;
- }
-
- spin_lock_irqsave(&p->lock, flags);
- if (snat_free_conf(p)) {
- /* SNAT not configured on this port */
- spin_unlock_irqrestore(&p->lock, flags);
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: attempt to remove snat on "
- "non-snat port: %d\n", dp->netdev->name, port);
- return -EINVAL;
- }
-
- spin_unlock_irqrestore(&p->lock, flags);
-
- return 0;
-}
-
-/* Add SNAT configuration to an interface. */
-static int
-snat_add_port(struct datapath *dp, const struct nx_snat_config *nsc)
-{
- unsigned long flags;
- uint16_t port = ntohs(nsc->port);
- struct net_bridge_port *p = dp->ports[port];
- uint16_t mac_timeout = ntohs(nsc->mac_timeout);
- struct snat_conf *sc;
-
- if (mac_timeout == 0)
- mac_timeout = MAC_TIMEOUT_DEFAULT;
-
- if (!p) {
- if (net_ratelimit())
- printk(KERN_NOTICE "%s: attempt to add snat on "
- "non-existent port: %d\n",
- dp->netdev->name, port);
- return -EINVAL;
- }
-
- /* If SNAT is already configured on the port, check whether the same
- * IP addresses are used. If so, just update the mac timeout
- * configuration. Otherwise, drop all SNAT configuration and
- * reconfigure it. */
- spin_lock_irqsave(&p->lock, flags);
- if (p->snat) {
- if ((p->snat->ip_addr_start == ntohl(nsc->ip_addr_start))
- && (p->snat->ip_addr_end == ntohl(nsc->ip_addr_end))) {
- p->snat->mac_timeout = mac_timeout;
- spin_unlock_irqrestore(&p->lock, flags);
- return 0;
- }
-
- /* Free the existing configuration and mappings. */
- snat_free_conf(p);
- }
-
- sc = kzalloc(sizeof *sc, GFP_ATOMIC);
- if (!sc) {
- spin_unlock_irqrestore(&p->lock, flags);
- return -ENOMEM;
- }
-
- sc->ip_addr_start = ntohl(nsc->ip_addr_start);
- sc->ip_addr_end = ntohl(nsc->ip_addr_end);
- sc->mac_timeout = mac_timeout;
- memcpy(sc->mac_addr, nsc->mac_addr, sizeof(sc->mac_addr));
- INIT_LIST_HEAD(&sc->mappings);
-
- p->snat = sc;
- spin_unlock_irqrestore(&p->lock, flags);
-
- return 0;
-}
-
-/* Handle a SNAT configuration message.
- *
- * Returns 0 if no problems are found. Otherwise, a negative errno. */
-int
-snat_mod_config(struct datapath *dp, const struct nx_act_config *nac)
-{
- int n_entries = (ntohs(nac->header.header.length) - sizeof *nac)
- / sizeof (struct nx_snat_config);
- int ret = 0;
- int i;
-
- for (i=0; i<n_entries; i++) {
- const struct nx_snat_config *nsc = &nac->snat[i];
- int r = 0;
-
- if (nsc->command == NXSC_ADD)
- r = snat_add_port(dp, nsc);
- else
- r = snat_del_port(dp, nsc);
-
- if (r)
- ret = r;
- }
-
- return ret;
-}
-#endif
+++ /dev/null
-#ifdef SUPPORT_SNAT
-#ifndef ACT_SNAT_H
-#define ACT_SNAT_H
-
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/rcupdate.h>
-
-#include "openflow/nicira-ext.h"
-#include "datapath.h"
-
-/* Cache of IP->MAC mappings on the side hidden by the SNAT */
-struct snat_mapping {
- struct list_head node;
- uint32_t ip_addr; /* Stored in network-order */
- uint8_t hw_addr[ETH_ALEN];
- unsigned long used; /* Last used time (in jiffies). */
-
- struct rcu_head rcu;
-};
-
-struct snat_conf {
- uint32_t ip_addr_start; /* Stored in host-order */
- uint32_t ip_addr_end; /* Stored in host-order */
- uint16_t mac_timeout;
-
- uint8_t mac_addr[ETH_ALEN];
-
- struct list_head mappings; /* List of snat_mapping entries */
-};
-
-#define MAC_TIMEOUT_DEFAULT 120
-
-void snat_local_in(struct sk_buff *skb);
-int snat_pre_route(struct sk_buff *skb);
-void snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port);
-void snat_save_header(struct sk_buff *skb);
-int snat_copy_header(struct sk_buff *skb);
-void snat_maint(struct net_bridge_port *p);
-int snat_mod_config(struct datapath *, const struct nx_act_config *);
-int snat_free_conf(struct net_bridge_port *p);
-
-#endif
-#endif
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2008 Nicira Networks
- */
-
-#include "chain.h"
-#include "datapath.h"
-#include "openflow/nicira-ext.h"
-#include "nx_act_snat.h"
-#include "nx_msg.h"
-
-
-int
-nx_recv_msg(struct sw_chain *chain, const struct sender *sender,
- const void *msg)
-{
- const struct nicira_header *nh = msg;
-
- switch (ntohl(nh->subtype)) {
-
- case NXT_FLOW_END_CONFIG: {
- const struct nx_flow_end_config *nfec = msg;
- chain->dp->send_flow_end = nfec->enable;
- return 0;
- }
-
-#ifdef SUPPORT_SNAT
- case NXT_ACT_SET_CONFIG: {
- const struct nx_act_config *nac = msg;
- if (ntohs(nh->header.length) < sizeof(*nac))
- return -EINVAL;
-
- if (nac->type == htons(NXAST_SNAT))
- return snat_mod_config(chain->dp, nac);
- else
- return -EINVAL;
- break;
- }
-#endif
-
- default:
- dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_SUBTYPE, msg, ntohs(nh->header.length));
- return -EINVAL;
- }
-
- return -EINVAL;
-}
+++ /dev/null
-#ifndef NX_MSG_H
-#define NX_MSG_H 1
-
-int nx_recv_msg(struct sw_chain *chain, const struct sender *sender,
- const void *msg);
-
-#endif /* nx_msg.h */
--- /dev/null
+#ifdef SUPPORT_SNAT
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2008, 2009 Nicira Networks
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/in.h>
+#include <net/ip.h>
+#include <linux/icmp.h>
+#include <linux/if_ether.h>
+#include <net/arp.h>
+#include <net/route.h>
+
+#include "actions.h"
+#include "snat.h"
+
+/* Cache of IP->MAC mappings on the side hidden by the SNAT */
+struct snat_mapping {
+ struct list_head node;
+ u32 ip_addr; /* Stored in network-order */
+ u8 hw_addr[ETH_ALEN];
+ unsigned long used; /* Last used time (in jiffies). */
+
+ struct rcu_head rcu;
+};
+
+struct snat_conf {
+ u32 ip_addr_start; /* Stored in host-order */
+ u32 ip_addr_end; /* Stored in host-order */
+ u16 mac_timeout;
+
+ u8 mac_addr[ETH_ALEN];
+
+ struct list_head mappings; /* List of snat_mapping entries */
+};
+
+#define MAC_TIMEOUT_DEFAULT 120
+
+/* We need these fake structures to make netfilter happy --
+ * lots of places assume that skb->dst != NULL, which isn't
+ * all that unreasonable.
+ *
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields. */
+static struct net_device __fake_net_device = {
+ .hard_header_len = ETH_HLEN
+};
+
+static struct rtable __fake_rtable = {
+ .u = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .dev = &__fake_net_device,
+ .path = &__fake_rtable.u.dst,
+ .metrics = {[RTAX_MTU - 1] = 1500},
+ .flags = DST_NOXFRM,
+ }
+ },
+ .rt_flags = 0,
+};
+
+/* Define ARP for IP since the Linux headers don't do it cleanly. */
+struct ip_arphdr {
+ u16 ar_hrd;
+ u16 ar_pro;
+ u8 ar_hln;
+ u8 ar_pln;
+ u16 ar_op;
+ u8 ar_sha[ETH_ALEN];
+ u32 ar_sip;
+ u8 ar_tha[ETH_ALEN];
+ u32 ar_tip;
+} __attribute__((packed));
+
+static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
+{
+ skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
+ if (likely(skb->nf_bridge))
+ atomic_set(&(skb->nf_bridge->use), 1);
+
+ return skb->nf_bridge;
+}
+
+/* Save a copy of the original Ethernet header. */
+void snat_save_header(struct sk_buff *skb)
+{
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+
+ if (!skb->nf_bridge)
+ return;
+
+ skb_copy_from_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+}
+
+/* Restore a saved Ethernet header. */
+int snat_copy_header(struct sk_buff *skb)
+{
+ int err;
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+
+ if (!skb->nf_bridge)
+ return 0;
+
+ err = skb_cow_head(skb, header_size);
+ if (err)
+ return err;
+
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
+ return 0;
+}
+
+/* Push the Ethernet header back on and tranmit the packet. */
+static int
+dp_xmit_skb_push(struct sk_buff *skb)
+{
+ skb_push(skb, ETH_HLEN);
+ return dp_xmit_skb(skb);
+}
+
+/* Perform maintainence related to a SNAT'd interface. Currently, this only
+ * checks whether MAC->IP bindings have expired.
+ *
+ * Called with the RCU read lock */
+void
+snat_maint(struct net_bridge_port *p)
+{
+ struct snat_conf *sc;
+ struct snat_mapping *m, *n;
+ unsigned long flags;
+ unsigned long timeout;
+
+ spin_lock_irqsave(&p->lock, flags);
+ sc = p->snat;
+ if (!sc)
+ goto done;
+
+ timeout = sc->mac_timeout * HZ;
+
+ list_for_each_entry_safe (m, n, &sc->mappings, node) {
+ if (time_after(jiffies, m->used + timeout)) {
+ list_del(&m->node);
+ kfree(m);
+ }
+ }
+
+done:
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+
+/* When the packet is bound for a local interface, strip off the fake
+ * routing table.
+ */
+void snat_local_in(struct sk_buff *skb)
+{
+ if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ }
+}
+
+/* Check whether destination IP's address is in the IP->MAC mappings.
+ * If it is, then overwrite the destination MAC with the value from the
+ * cache.
+ *
+ * Returns -1 if there is a problem, otherwise 0. */
+static int
+dnat_mac(struct net_bridge_port *p, struct sk_buff *skb)
+{
+ struct snat_conf *sc = p->snat;
+ struct iphdr *iph = ip_hdr(skb);
+ struct ethhdr *eh = eth_hdr(skb);
+ struct snat_mapping *m;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return 0;
+
+ list_for_each_entry (m, &sc->mappings, node) {
+ if (m->ip_addr == iph->daddr){
+ /* Found it! */
+ skb = make_writable(skb, GFP_ATOMIC);
+ if (!skb)
+ return -1;
+ m->used = jiffies;
+ memcpy(eh->h_dest, m->hw_addr, ETH_ALEN);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+__snat_this_address(struct snat_conf *sc, u32 ip_addr)
+{
+ if (sc) {
+ u32 h_ip_addr = ntohl(ip_addr);
+ return (h_ip_addr >= sc->ip_addr_start &&
+ h_ip_addr <= sc->ip_addr_end);
+ }
+ return 0;
+}
+
+static int
+snat_this_address(struct net_bridge_port *p, u32 ip_addr)
+{
+ unsigned long int flags;
+ int retval;
+
+ spin_lock_irqsave(&p->lock, flags);
+ retval = __snat_this_address(p->snat, ip_addr);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ return retval;
+}
+
+/* Must hold RCU lock. */
+static struct net_bridge_port *
+get_nbp_by_ip_addr(struct datapath *dp, u32 ip_addr)
+{
+ struct net_bridge_port *p;
+
+ list_for_each_entry_rcu (p, &dp->port_list, node)
+ if (snat_this_address(p, ip_addr))
+ return p;
+
+ return NULL;
+}
+
+static int
+snat_pre_route_finish(struct sk_buff *skb)
+{
+ struct net_bridge_port *p = skb->dev->br_port;
+ struct snat_conf *sc;
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned long flags;
+
+ skb->dst = (struct dst_entry *)&__fake_rtable;
+ dst_hold(skb->dst);
+
+ /* Don't process packets that were not translated due to NAT */
+ spin_lock_irqsave(&p->lock, flags);
+ sc = p->snat;
+ if (!__snat_this_address(sc, iph->daddr)) {
+ /* If SNAT is configured for this input device, check the
+ * IP->MAC mappings to see if we should update the destination
+ * MAC. */
+ if (sc)
+ dnat_mac(skb->dev->br_port, skb);
+
+ }
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ /* Pass the translated packet as input to the OpenFlow stack, which
+ * consumes it. */
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ dp_process_received_packet(skb, p);
+
+ return 0;
+}
+
+/* Checks whether 'skb' is an ARP request for an SNAT'd interface. If
+ * so, it will generate a response.
+ *
+ * Returns 0 if the packet was not handled. Otherwise, -1 is returned
+ * and the caller is responsible for freeing 'skb'. */
+static int
+handle_arp_snat(struct sk_buff *skb)
+{
+ struct net_bridge_port *s_nbp = skb->dev->br_port;
+ struct net_bridge_port *nat_nbp;
+ struct ip_arphdr *ah;
+ u8 mac_addr[ETH_ALEN];
+
+ if (!pskb_may_pull(skb, sizeof *ah))
+ return 0;
+
+ ah = (struct ip_arphdr *)arp_hdr(skb);
+ if ((ah->ar_op != htons(ARPOP_REQUEST))
+ || ah->ar_hln != ETH_ALEN
+ || ah->ar_pro != htons(ETH_P_IP)
+ || ah->ar_pln != 4)
+ return 0;
+
+ rcu_read_lock();
+ nat_nbp = get_nbp_by_ip_addr(s_nbp->dp, ah->ar_tip);
+ if (!nat_nbp) {
+ rcu_read_unlock();
+ return 0;
+ }
+ if (s_nbp == nat_nbp)
+ memcpy(mac_addr, s_nbp->dp->netdev->dev_addr, sizeof(mac_addr));
+ else if (!is_zero_ether_addr(nat_nbp->snat->mac_addr))
+ memcpy(mac_addr, nat_nbp->snat->mac_addr, sizeof(mac_addr));
+ else {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ arp_send(ARPOP_REPLY, ETH_P_ARP, ah->ar_sip, skb->dev, ah->ar_tip,
+ ah->ar_sha, mac_addr, ah->ar_sha);
+
+ return -1;
+}
+
+/* Checks whether 'skb' is a ping request for an SNAT'd interface. If
+ * so, it will generate a response.
+ *
+ * Returns 0 if the packet was not handled. Otherwise, -1 is returned
+ * and the caller is responsible for freeing 'skb'. */
+static int
+handle_icmp_snat(struct sk_buff *skb)
+{
+ struct net_bridge_port *p = skb->dev->br_port;
+ struct ethhdr *eh;
+ struct iphdr *iph;
+ struct icmphdr *icmph;
+ u8 tmp_eth[ETH_ALEN];
+ u32 tmp_ip;
+ struct sk_buff *nskb;
+
+ /* We're only interested in addresses we rewrite. */
+ iph = ip_hdr(skb);
+ if (!snat_this_address(p, iph->daddr)) {
+ return 0;
+ }
+
+ /* Drop fragments and packets not long enough to hold the ICMP
+ * header. */
+ if ((ntohs(iph->frag_off) & IP_OFFSET) != 0 ||
+ !pskb_may_pull(skb, skb_transport_offset(skb) + 4))
+ return 0;
+
+ /* We only respond to echo requests to our address. Continue
+ * processing replies and other ICMP messages since they may be
+ * intended for NAT'd hosts. */
+ icmph = icmp_hdr(skb);
+ if (icmph->type != ICMP_ECHO)
+ return 0;
+
+ /* Send an echo reply in response */
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -1;
+
+ /* Update Ethernet header. */
+ eh = eth_hdr(nskb);
+ memcpy(tmp_eth, eh->h_dest, ETH_ALEN);
+ memcpy(eh->h_dest, eh->h_source, ETH_ALEN);
+ memcpy(eh->h_source, tmp_eth, ETH_ALEN);
+
+ /* Update IP header.
+ * This is kind of busted, at least in that it doesn't check that the
+ * echoed IP options make sense. */
+ iph = ip_hdr(nskb);
+ iph->id = 0;
+ iph->frag_off = 0;
+ iph->ttl = IPDEFTTL;
+ iph->check = 0;
+ tmp_ip = iph->daddr;
+ iph->daddr = iph->saddr;
+ iph->saddr = tmp_ip;
+ iph->check = ip_fast_csum((void *)iph, iph->ihl);
+
+ /* Update ICMP header. */
+ icmph = icmp_hdr(nskb);
+ icmph->type = ICMP_ECHOREPLY;
+ icmph->checksum = 0;
+ icmph->checksum = ip_compute_csum((void *)icmph,
+ nskb->tail - skb_transport_header(nskb));
+
+ dp_xmit_skb_push(nskb);
+
+ return -1;
+}
+
+/* Check if any SNAT maintenance needs to be done on 'skb' before it's
+ * checked against the datapath's tables. This includes DNAT
+ * modification based on prior SNAT action and responding to ARP and
+ * echo requests for the SNAT interface.
+ *
+ * Returns -1 if the packet was handled and consumed, 0 if the caller
+ * should continue to process 'skb'.
+ */
+int
+snat_pre_route(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ int len;
+
+ WARN_ON_ONCE(skb_network_offset(skb));
+ if (skb->protocol == htons(ETH_P_ARP)) {
+ if (handle_arp_snat(skb))
+ goto consume;
+ return 0;
+ }
+ else if (skb->protocol != htons(ETH_P_IP))
+ return 0;
+
+ if (!pskb_may_pull(skb, sizeof *iph))
+ goto consume;
+
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ goto consume;
+
+ if (!pskb_may_pull(skb, ip_hdrlen(skb)))
+ goto consume;
+ skb_set_transport_header(skb, ip_hdrlen(skb));
+
+ /* Check if we need to echo reply for this address */
+ iph = ip_hdr(skb);
+ if ((iph->protocol == IPPROTO_ICMP) && (handle_icmp_snat(skb)))
+ goto consume;
+
+ iph = ip_hdr(skb);
+ if (unlikely(ip_fast_csum((void *)iph, iph->ihl)))
+ goto consume;
+
+ len = ntohs(iph->tot_len);
+ if ((skb->len < len) || len < (iph->ihl*4))
+ goto consume;
+
+ if (pskb_trim_rcsum(skb, len))
+ goto consume;
+
+ nf_bridge_put(skb->nf_bridge);
+ if (!nf_bridge_alloc(skb))
+ return 0;
+
+ NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ snat_pre_route_finish);
+ return -1;
+
+consume:
+ kfree_skb(skb);
+ return -1;
+}
+
+
+static int
+snat_skb_finish(struct sk_buff *skb)
+{
+ NF_HOOK(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
+ dp_xmit_skb_push);
+
+ return 0;
+}
+
+/* Update the MAC->IP mappings for the private side of the SNAT'd
+ * interface. */
+static void
+update_mapping(struct net_bridge_port *p, const struct sk_buff *skb)
+{
+ unsigned long flags;
+ struct snat_conf *sc;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct ethhdr *eh = eth_hdr(skb);
+ struct snat_mapping *m;
+
+ spin_lock_irqsave(&p->lock, flags);
+ sc = p->snat;
+ if (!sc)
+ goto done;
+
+ list_for_each_entry (m, &sc->mappings, node) {
+ if (m->ip_addr == iph->saddr){
+ memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
+ m->used = jiffies;
+ goto done;
+ }
+ }
+
+ m = kmalloc(sizeof *m, GFP_ATOMIC);
+ if (!m)
+ goto done;
+ m->ip_addr = iph->saddr;
+ memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
+ m->used = jiffies;
+
+ list_add(&m->node, &sc->mappings);
+
+done:
+ spin_unlock_irqrestore(&p->lock, flags);
+}
+
+/* Perform SNAT modification on 'skb' and send out 'out_port'. If the
+ * port was not configured for SNAT, it will be sent through the interface
+ * unmodified. 'skb' is not consumed, so caller will need to free it.
+ */
+void
+snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port,
+ gfp_t gfp)
+{
+ struct net_bridge_port *p = dp->ports[out_port];
+ struct sk_buff *nskb;
+
+ if (!p)
+ return;
+
+ /* FIXME: Expensive. Just need to skb_clone() here?
+ * (However, the skb_copy() does linearize and ensure that the headers
+ * are accessible.) */
+ nskb = skb_copy(skb, gfp);
+ if (!nskb)
+ return;
+
+ nskb->dev = p->dev;
+
+ /* We only SNAT IP, so just send it on its way if not */
+ if (skb->protocol != htons(ETH_P_IP)) {
+ dp_xmit_skb(nskb);
+ return;
+ }
+
+ /* Set the source MAC to the OF interface */
+ memcpy(eth_hdr(nskb)->h_source, dp->netdev->dev_addr, ETH_ALEN);
+
+ update_mapping(p, skb);
+
+ /* Take the Ethernet header back off for netfilter hooks. */
+ skb_pull(nskb, ETH_HLEN);
+
+ NF_HOOK(PF_INET, NF_INET_FORWARD, nskb, skb->dev, nskb->dev,
+ snat_skb_finish);
+}
+
+/* Remove SNAT configuration on port 'p'.
+ *
+ * NB: The caller must hold the port's spinlock. */
+int
+snat_free_conf(struct net_bridge_port *p)
+{
+ struct snat_conf *sc = p->snat;
+
+ if (!sc)
+ return -EINVAL;
+
+ /* Free existing mapping entries */
+ while (!list_empty(&sc->mappings)) {
+ struct snat_mapping *m = list_entry(sc->mappings.next,
+ struct snat_mapping, node);
+ list_del(&m->node);
+ kfree(m);
+ }
+
+ kfree(p->snat);
+ p->snat = NULL;
+
+ return 0;
+}
+
+/* Remove SNAT configuration from an interface. */
+int snat_del_port(struct datapath *dp, int port)
+{
+ unsigned long flags;
+ struct net_bridge_port *p;
+ int error;
+
+ if (port < 0 || port >= DP_MAX_PORTS)
+ return -EINVAL;
+
+ p = dp->ports[port];
+ if (!p)
+ return -ENOENT;
+
+ spin_lock_irqsave(&p->lock, flags);
+ error = snat_free_conf(p);
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ return error;
+}
+
+/* Add SNAT configuration to an interface. */
+int snat_add_port(struct datapath *dp, const struct odp_snat_config *osc)
+{
+ unsigned long flags;
+ struct net_bridge_port *p;
+ struct snat_conf *sc;
+ int mac_timeout;
+
+ if (osc->port < 0 || osc->port >= DP_MAX_PORTS)
+ return -EINVAL;
+
+ p = dp->ports[osc->port];
+ if (!p)
+ return -ENOENT;
+
+ mac_timeout = osc->mac_timeout;
+ if (!mac_timeout)
+ mac_timeout = MAC_TIMEOUT_DEFAULT;
+
+ sc = kzalloc(sizeof *sc, GFP_KERNEL);
+ if (!sc)
+ return -ENOMEM;
+
+ /* If SNAT is already configured on the port, check whether the same
+ * IP addresses are used. If so, just update the mac timeout
+ * configuration. Otherwise, drop all SNAT configuration and
+ * reconfigure it. */
+ spin_lock_irqsave(&p->lock, flags);
+ if (p->snat) {
+ if (p->snat->ip_addr_start == ntohl(osc->ip_start) &&
+ p->snat->ip_addr_end == ntohl(osc->ip_end)) {
+ p->snat->mac_timeout = mac_timeout;
+ spin_unlock_irqrestore(&p->lock, flags);
+ kfree(sc);
+ return 0;
+ }
+
+ /* Free the existing configuration and mappings. */
+ snat_free_conf(p);
+ }
+
+ sc->ip_addr_start = ntohl(osc->ip_start);
+ sc->ip_addr_end = ntohl(osc->ip_end);
+ sc->mac_timeout = mac_timeout;
+ memcpy(sc->mac_addr, osc->mac_addr, ETH_ALEN);
+ INIT_LIST_HEAD(&sc->mappings);
+
+ p->snat = sc;
+ spin_unlock_irqrestore(&p->lock, flags);
+
+ return 0;
+}
+#endif
--- /dev/null
+#ifdef SUPPORT_SNAT
+#ifndef ACT_SNAT_H
+#define ACT_SNAT_H
+
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/rcupdate.h>
+
+#include "datapath.h"
+
+void snat_local_in(struct sk_buff *skb);
+int snat_pre_route(struct sk_buff *skb);
+void snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port,
+ gfp_t gfp);
+void snat_save_header(struct sk_buff *skb);
+int snat_copy_header(struct sk_buff *skb);
+void snat_maint(struct net_bridge_port *p);
+int snat_add_port(struct datapath *, const struct odp_snat_config *);
+int snat_del_port(struct datapath *, int port);
+int snat_free_conf(struct net_bridge_port *p);
+
+#endif
+#endif
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-#include "table.h"
-#include "crc32.h"
-#include "flow.h"
-#include "datapath.h"
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <asm/pgtable.h>
-
-static void *kmem_alloc(size_t);
-static void *kmem_zalloc(size_t);
-static void kmem_free(void *, size_t);
-
-struct sw_table_hash {
- struct sw_table swt;
- struct crc32 crc32;
- unsigned int n_flows;
- unsigned int bucket_mask; /* Number of buckets minus 1. */
- struct sw_flow **buckets;
-};
-
-static struct sw_flow **find_bucket(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int crc = crc32_calculate(&th->crc32, key,
- offsetof(struct sw_flow_key, wildcards));
- return &th->buckets[crc & th->bucket_mask];
-}
-
-static struct sw_flow *table_hash_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_flow *flow = *find_bucket(swt, key);
- return flow && flow_keys_equal(&flow->key, key) ? flow : NULL;
-}
-
-static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- struct sw_flow **bucket;
- int retval;
-
- if (flow->key.wildcards != 0)
- return 0;
-
- bucket = find_bucket(swt, &flow->key);
- if (*bucket == NULL) {
- th->n_flows++;
- rcu_assign_pointer(*bucket, flow);
- retval = 1;
- } else {
- struct sw_flow *old_flow = *bucket;
- if (flow_keys_equal(&old_flow->key, &flow->key)) {
- rcu_assign_pointer(*bucket, flow);
- flow_deferred_free(old_flow);
- retval = 1;
- } else {
- retval = 0;
- }
- }
- return retval;
-}
-
-static int table_hash_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int count = 0;
-
- if (key->wildcards == 0) {
- struct sw_flow **bucket = find_bucket(swt, key);
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count = 1;
- }
- } else {
- unsigned int i;
-
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count++;
- }
- }
- }
- return count;
-}
-
-/* Caller must update n_flows. */
-static int do_delete(struct datapath *dp, struct sw_flow **bucket,
- struct sw_flow *flow, enum nx_flow_end_reason reason)
-{
- dp_send_flow_end(dp, flow, reason);
- rcu_assign_pointer(*bucket, NULL);
- flow_deferred_free(flow);
- return 1;
-}
-
-/* Returns number of deleted flows. We can ignore the priority
- * argument, since all exact-match entries are the same (highest)
- * priority. */
-static int table_hash_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- uint16_t priority, int strict)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int count = 0;
-
- if (key->wildcards == 0) {
- struct sw_flow **bucket = find_bucket(swt, key);
- struct sw_flow *flow = *bucket;
- if (flow && flow_keys_equal(&flow->key, key)
- && flow_has_out_port(flow, out_port))
- count = do_delete(dp, bucket, flow, NXFER_DELETE);
- } else {
- unsigned int i;
-
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && flow_has_out_port(flow, out_port))
- count += do_delete(dp, bucket, flow, NXFER_DELETE);
- }
- }
- th->n_flows -= count;
- return count;
-}
-
-static int table_hash_timeout(struct datapath *dp, struct sw_table *swt)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int i;
- int count = 0;
-
- if (mutex_lock_interruptible(&dp_mutex))
- return 0;
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow) {
- int reason = flow_timeout(flow);
- if (reason >= 0) {
- count += do_delete(dp, bucket, flow, reason);
- }
- }
- }
- th->n_flows -= count;
- mutex_unlock(&dp_mutex);
-
- return count;
-}
-
-static void table_hash_destroy(struct sw_table *swt)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int i;
- for (i = 0; i <= th->bucket_mask; i++)
- if (th->buckets[i])
- flow_free(th->buckets[i]);
- kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets);
- kfree(th);
-}
-
-static int table_hash_iterate(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *private),
- void *private)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
-
- if (position->private[0] > th->bucket_mask)
- return 0;
-
- if (key->wildcards == 0) {
- struct sw_flow *flow;
- int error;
-
- flow = table_hash_lookup(swt, key);
- if (!flow || !flow_has_out_port(flow, out_port))
- return 0;
-
- error = callback(flow, private);
- if (!error)
- position->private[0] = -1;
- return error;
- } else {
- int i;
-
- for (i = position->private[0]; i <= th->bucket_mask; i++) {
- struct sw_flow *flow = th->buckets[i];
- if (flow && flow_matches_1wild(&flow->key, key)
- && flow_has_out_port(flow, out_port)) {
- int error = callback(flow, private);
- if (error) {
- position->private[0] = i;
- return error;
- }
- }
- }
- return 0;
- }
-}
-static void table_hash_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- stats->name = "hash";
- stats->wildcards = 0; /* No wildcards are supported. */
- stats->n_flows = th->n_flows;
- stats->max_flows = th->bucket_mask + 1;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-struct sw_table *table_hash_create(unsigned int polynomial,
- unsigned int n_buckets)
-{
- struct sw_table_hash *th;
- struct sw_table *swt;
-
- th = kzalloc(sizeof *th, GFP_KERNEL);
- if (th == NULL)
- return NULL;
-
- BUG_ON(n_buckets & (n_buckets - 1));
- th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets);
- if (th->buckets == NULL) {
- printk(KERN_EMERG "failed to allocate %u buckets\n",
- n_buckets);
- kfree(th);
- return NULL;
- }
- th->bucket_mask = n_buckets - 1;
-
- swt = &th->swt;
- swt->lookup = table_hash_lookup;
- swt->insert = table_hash_insert;
- swt->delete = table_hash_delete;
- swt->timeout = table_hash_timeout;
- swt->destroy = table_hash_destroy;
- swt->iterate = table_hash_iterate;
- swt->stats = table_hash_stats;
-
- crc32_init(&th->crc32, polynomial);
- th->n_flows = 0;
-
- return swt;
-}
-
-/* Double-hashing table. */
-
-struct sw_table_hash2 {
- struct sw_table swt;
- struct sw_table *subtable[2];
-};
-
-static struct sw_flow *table_hash2_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- int i;
-
- for (i = 0; i < 2; i++) {
- struct sw_flow *flow = *find_bucket(t2->subtable[i], key);
- if (flow && flow_keys_equal(&flow->key, key))
- return flow;
- }
- return NULL;
-}
-
-static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
-
- if (table_hash_insert(t2->subtable[0], flow))
- return 1;
- return table_hash_insert(t2->subtable[1], flow);
-}
-
-static int table_hash2_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- return (table_hash_modify(t2->subtable[0], key, priority, strict,
- actions, actions_len)
- + table_hash_modify(t2->subtable[1], key, priority, strict,
- actions, actions_len));
-}
-
-static int table_hash2_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- uint16_t priority, int strict)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- return (table_hash_delete(dp, t2->subtable[0], key, out_port,
- priority, strict)
- + table_hash_delete(dp, t2->subtable[1], key, out_port,
- priority, strict));
-}
-
-static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- return (table_hash_timeout(dp, t2->subtable[0])
- + table_hash_timeout(dp, t2->subtable[1]));
-}
-
-static void table_hash2_destroy(struct sw_table *swt)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- table_hash_destroy(t2->subtable[0]);
- table_hash_destroy(t2->subtable[1]);
- kfree(t2);
-}
-
-static int table_hash2_iterate(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *),
- void *private)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- int i;
-
- for (i = position->private[1]; i < 2; i++) {
- int error = table_hash_iterate(t2->subtable[i], key, out_port,
- position, callback, private);
- if (error) {
- return error;
- }
- position->private[0] = 0;
- position->private[1]++;
- }
- return 0;
-}
-
-static void table_hash2_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- struct sw_table_stats substats[2];
- int i;
-
- for (i = 0; i < 2; i++)
- table_hash_stats(t2->subtable[i], &substats[i]);
- stats->name = "hash2";
- stats->wildcards = 0; /* No wildcards are supported. */
- stats->n_flows = substats[0].n_flows + substats[1].n_flows;
- stats->max_flows = substats[0].max_flows + substats[1].max_flows;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
- unsigned int poly1, unsigned int buckets1)
-
-{
- struct sw_table_hash2 *t2;
- struct sw_table *swt;
-
- t2 = kzalloc(sizeof *t2, GFP_KERNEL);
- if (t2 == NULL)
- return NULL;
-
- t2->subtable[0] = table_hash_create(poly0, buckets0);
- if (t2->subtable[0] == NULL)
- goto out_free_t2;
-
- t2->subtable[1] = table_hash_create(poly1, buckets1);
- if (t2->subtable[1] == NULL)
- goto out_free_subtable0;
-
- swt = &t2->swt;
- swt->lookup = table_hash2_lookup;
- swt->insert = table_hash2_insert;
- swt->modify = table_hash2_modify;
- swt->delete = table_hash2_delete;
- swt->timeout = table_hash2_timeout;
- swt->destroy = table_hash2_destroy;
- swt->iterate = table_hash2_iterate;
- swt->stats = table_hash2_stats;
-
- return swt;
-
-out_free_subtable0:
- table_hash_destroy(t2->subtable[0]);
-out_free_t2:
- kfree(t2);
- return NULL;
-}
-
-/* From fs/xfs/linux-2.4/kmem.c. */
-
-static void *
-kmem_alloc(size_t size)
-{
- void *ptr;
-
-#ifdef KMALLOC_MAX_SIZE
- if (size > KMALLOC_MAX_SIZE)
- return NULL;
-#endif
- ptr = kmalloc(size, GFP_KERNEL);
- if (!ptr) {
- ptr = vmalloc(size);
- if (ptr)
- printk(KERN_NOTICE "openflow: used vmalloc for %lu "
- "bytes\n", (unsigned long)size);
- }
- return ptr;
-}
-
-static void *
-kmem_zalloc(size_t size)
-{
- void *ptr = kmem_alloc(size);
- if (ptr)
- memset(ptr, 0, size);
- return ptr;
-}
-
-static void
-kmem_free(void *ptr, size_t size)
-{
- if (((unsigned long)ptr < VMALLOC_START) ||
- ((unsigned long)ptr >= VMALLOC_END)) {
- kfree(ptr);
- } else {
- vfree(ptr);
- }
-}
+++ /dev/null
-/*
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
- * Stanford Junior University
- */
-
-#include "table.h"
-#include "flow.h"
-#include "datapath.h"
-
-#include <linux/rcupdate.h>
-#include <linux/slab.h>
-#include <linux/rculist.h>
-
-struct sw_table_linear {
- struct sw_table swt;
-
- unsigned int max_flows;
- unsigned int n_flows;
- struct list_head flows;
- struct list_head iter_flows;
- unsigned long int next_serial;
-};
-
-static struct sw_flow *table_linear_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- list_for_each_entry_rcu (flow, &tl->flows, node) {
- if (flow_matches_1wild(key, &flow->key))
- return flow;
- }
- return NULL;
-}
-
-static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *f;
-
-
- /* Loop through the existing list of entries. New entries will
- * always be placed behind those with equal priority. Just replace
- * any flows that match exactly.
- */
- list_for_each_entry (f, &tl->flows, node) {
- if (f->priority == flow->priority
- && f->key.wildcards == flow->key.wildcards
- && flow_matches_2wild(&f->key, &flow->key)) {
- flow->serial = f->serial;
- list_replace_rcu(&f->node, &flow->node);
- list_replace_rcu(&f->iter_node, &flow->iter_node);
- flow_deferred_free(f);
- return 1;
- }
-
- if (f->priority < flow->priority)
- break;
- }
-
- /* Make sure there's room in the table. */
- if (tl->n_flows >= tl->max_flows) {
- return 0;
- }
- tl->n_flows++;
-
- /* Insert the entry immediately in front of where we're pointing. */
- flow->serial = tl->next_serial++;
- list_add_tail_rcu(&flow->node, &f->node);
- list_add_rcu(&flow->iter_node, &tl->iter_flows);
- return 1;
-}
-
-static int table_linear_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- unsigned int count = 0;
-
- list_for_each_entry (flow, &tl->flows, node) {
- if (flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count++;
- }
- }
- return count;
-}
-
-static int do_delete(struct datapath *dp, struct sw_table *swt,
- struct sw_flow *flow, enum nx_flow_end_reason reason)
-{
- dp_send_flow_end(dp, flow, reason);
- list_del_rcu(&flow->node);
- list_del_rcu(&flow->iter_node);
- flow_deferred_free(flow);
- return 1;
-}
-
-static int table_linear_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- uint16_t priority, int strict)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- unsigned int count = 0;
-
- list_for_each_entry (flow, &tl->flows, node) {
- if (flow_matches_desc(&flow->key, key, strict)
- && flow_has_out_port(flow, out_port)
- && (!strict || (flow->priority == priority)))
- count += do_delete(dp, swt, flow, NXFER_DELETE);
- }
- tl->n_flows -= count;
- return count;
-}
-
-static int table_linear_timeout(struct datapath *dp, struct sw_table *swt)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- int count = 0;
-
- if (mutex_lock_interruptible(&dp_mutex))
- return 0;
- list_for_each_entry (flow, &tl->flows, node) {
- int reason = flow_timeout(flow);
- if (reason >= 0) {
- count += do_delete(dp, swt, flow, reason);
- }
- }
- tl->n_flows -= count;
- mutex_unlock(&dp_mutex);
- return count;
-}
-
-static void table_linear_destroy(struct sw_table *swt)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
-
- while (!list_empty(&tl->flows)) {
- struct sw_flow *flow = list_entry(tl->flows.next,
- struct sw_flow, node);
- list_del(&flow->node);
- flow_free(flow);
- }
- kfree(tl);
-}
-
-static int table_linear_iterate(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *),
- void *private)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- unsigned long start;
-
- start = position->private[0];
- list_for_each_entry (flow, &tl->iter_flows, iter_node) {
- if (flow->serial >= start
- && flow_matches_2wild(key, &flow->key)
- && flow_has_out_port(flow, out_port)) {
- int error = callback(flow, private);
- if (error) {
- position->private[0] = flow->serial;
- return error;
- }
- }
- }
- return 0;
-}
-
-static void table_linear_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- stats->name = "linear";
- stats->wildcards = OFPFW_ALL;
- stats->n_flows = tl->n_flows;
- stats->max_flows = tl->max_flows;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-
-struct sw_table *table_linear_create(unsigned int max_flows)
-{
- struct sw_table_linear *tl;
- struct sw_table *swt;
-
- tl = kzalloc(sizeof *tl, GFP_KERNEL);
- if (tl == NULL)
- return NULL;
-
- swt = &tl->swt;
- swt->lookup = table_linear_lookup;
- swt->insert = table_linear_insert;
- swt->modify = table_linear_modify;
- swt->delete = table_linear_delete;
- swt->timeout = table_linear_timeout;
- swt->destroy = table_linear_destroy;
- swt->iterate = table_linear_iterate;
- swt->stats = table_linear_stats;
-
- tl->max_flows = max_flows;
- tl->n_flows = 0;
- INIT_LIST_HEAD(&tl->flows);
- INIT_LIST_HEAD(&tl->iter_flows);
- tl->next_serial = 0;
-
- return swt;
-}
--- /dev/null
+#include "flow.h"
+#include "datapath.h"
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+
+static void free_table(struct sw_flow ***flows, unsigned int n_buckets,
+ int free_flows)
+{
+ unsigned int i;
+
+ for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
+ struct sw_flow **l2 = flows[i];
+ if (free_flows) {
+ unsigned int j;
+ for (j = 0; j < DP_L1_SIZE; j++) {
+ if (l2[j])
+ flow_free(l2[j]);
+ }
+ }
+ free_page((unsigned long)l2);
+ }
+ kfree(flows);
+}
+
+static struct sw_flow ***alloc_table(unsigned int n_buckets)
+{
+ struct sw_flow ***flows;
+ unsigned int i;
+
+ flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**),
+ GFP_KERNEL);
+ if (!flows)
+ return NULL;
+ for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
+ flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL);
+ if (!flows[i]) {
+ free_table(flows, i << DP_L1_BITS, 0);
+ return NULL;
+ }
+ }
+ return flows;
+}
+
+struct dp_table *dp_table_create(unsigned int n_buckets)
+{
+ struct dp_table *table;
+
+ table = kzalloc(sizeof *table, GFP_KERNEL);
+ if (!table)
+ goto err;
+
+ table->n_buckets = n_buckets;
+ table->flows[0] = alloc_table(n_buckets);
+ if (!table[0].flows)
+ goto err_free_tables;
+
+ table->flows[1] = alloc_table(n_buckets);
+ if (!table->flows[1])
+ goto err_free_flows0;
+
+ return table;
+
+err_free_flows0:
+ free_table(table->flows[0], table->n_buckets, 0);
+err_free_tables:
+ kfree(table);
+err:
+ return NULL;
+}
+
+void dp_table_destroy(struct dp_table *table, int free_flows)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ free_table(table->flows[i], table->n_buckets, free_flows);
+ kfree(table);
+}
+
+static struct sw_flow **find_bucket(struct dp_table *table,
+ struct sw_flow ***flows, u32 hash)
+{
+ unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT;
+ unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1);
+ return &flows[l1][l2];
+}
+
+static struct sw_flow *lookup_table(struct dp_table *table,
+ struct sw_flow ***flows, u32 hash,
+ const struct odp_flow_key *key)
+{
+ struct sw_flow **bucket = find_bucket(table, flows, hash);
+ struct sw_flow *flow = rcu_dereference(*bucket);
+ return flow && !memcmp(&flow->key, key, sizeof key) ? flow : NULL;
+}
+
+static u32 flow_hash0(const struct odp_flow_key *key)
+{
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa);
+}
+
+static u32 flow_hash1(const struct odp_flow_key *key)
+{
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555);
+}
+
+static void find_buckets(struct dp_table *table, struct odp_flow_key *key,
+ struct sw_flow **buckets[2])
+{
+ buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key));
+ buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key));
+}
+
+struct sw_flow *dp_table_lookup(struct dp_table *table,
+ const struct odp_flow_key *key)
+{
+ struct sw_flow *flow;
+ flow = lookup_table(table, table->flows[0], flow_hash0(key), key);
+ if (!flow)
+ flow = lookup_table(table, table->flows[1],
+ flow_hash1(key), key);
+ return flow;
+}
+
+static void dp_table_swap(struct datapath *dp, struct dp_table *new_table,
+ int free_flows)
+{
+ struct dp_table *old_table = rcu_dereference(dp->table);
+ rcu_assign_pointer(dp->table, new_table);
+ synchronize_rcu();
+ dp_table_destroy(old_table, free_flows);
+}
+
+int dp_table_foreach(struct dp_table *table,
+ int (*callback)(struct sw_flow *flow, void *aux),
+ void *aux)
+{
+ unsigned int i, j, k;
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) {
+ struct sw_flow **l2 = table->flows[i][j];
+ for (k = 0; k < DP_L1_SIZE; k++) {
+ struct sw_flow *flow = rcu_dereference(l2[k]);
+ if (flow) {
+ int error = callback(flow, aux);
+ if (error)
+ return error;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static int insert_flow(struct sw_flow *flow, void *new_table_)
+{
+ struct dp_table *new_table = new_table_;
+ struct sw_flow **buckets[2];
+ int i;
+
+ printk(".");
+ find_buckets(new_table, &flow->key, buckets);
+ for (i = 0; i < 2; i++) {
+ if (!*buckets[i]) {
+ rcu_assign_pointer(*buckets[i], flow);
+ return 0;
+ }
+ }
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+int dp_table_expand(struct datapath *dp)
+{
+ struct dp_table *old_table = rcu_dereference(dp->table);
+ struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2);
+ if (!new_table)
+ return -ENOMEM;
+ dp_table_foreach(old_table, insert_flow, new_table);
+ dp_table_swap(dp, new_table, 0);
+ return 0;
+}
+
+int dp_table_flush(struct datapath *dp)
+{
+ struct dp_table *new_table = dp_table_create(DP_L1_SIZE);
+ if (!new_table)
+ return -ENOMEM;
+ dp_table_swap(dp, new_table, 1);
+ return 0;
+}
+
+struct sw_flow **
+dp_table_lookup_for_insert(struct dp_table *table, struct sw_flow *target)
+{
+ struct sw_flow **buckets[2];
+ struct sw_flow **empty_bucket = NULL;
+ int i;
+
+ find_buckets(table, &target->key, buckets);
+ for (i = 0; i < 2; i++) {
+ struct sw_flow *f = rcu_dereference(*buckets[i]);
+ if (f) {
+ if (!memcmp(&f->key, &target->key, sizeof f->key))
+ return buckets[i];
+ } else if (!empty_bucket)
+ empty_bucket = buckets[i];
+ }
+ return empty_bucket;
+}
+
+int dp_table_delete(struct dp_table *table, struct sw_flow *target)
+{
+ struct sw_flow **buckets[2];
+ int i;
+
+ find_buckets(table, &target->key, buckets);
+ for (i = 0; i < 2; i++) {
+ struct sw_flow *flow = rcu_dereference(*buckets[i]);
+ if (flow == target) {
+ rcu_assign_pointer(*buckets[i], NULL);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+++ /dev/null
-/* Individual switching tables. Generally grouped together in a chain (see
- * chain.h). */
-
-#ifndef TABLE_H
-#define TABLE_H 1
-
-#include <linux/types.h>
-
-struct sw_flow;
-struct sw_flow_key;
-struct ofp_action_header;
-struct datapath;
-
-/* Table statistics. */
-struct sw_table_stats {
- const char *name; /* Human-readable name. */
- uint32_t wildcards; /* Bitmap of OFPFW_* wildcards that are
- supported by the table. */
- unsigned int n_flows; /* Number of active flows. */
- unsigned int max_flows; /* Flow capacity. */
- unsigned long int n_lookup; /* Number of packets looked up. */
- unsigned long int n_matched; /* Number of packets that have hit. */
-};
-
-/* Position within an iteration of a sw_table.
- *
- * The contents are private to the table implementation, except that a position
- * initialized to all-zero-bits represents the start of a table. */
-struct sw_table_position {
- unsigned long private[4];
-};
-
-/* A single table of flows.
- *
- * All functions, except destroy, must be called holding the
- * rcu_read_lock. destroy must be fully serialized.
- */
-struct sw_table {
- /* The number of packets that have been looked up and matched,
- * respecitvely. To make these 100% accurate, they should be atomic.
- * However, we're primarily concerned about speed. */
- unsigned long long n_lookup;
- unsigned long long n_matched;
-
- /* Searches 'table' for a flow matching 'key', which must not have any
- * wildcard fields. Returns the flow if successful, a null pointer
- * otherwise. */
- struct sw_flow *(*lookup)(struct sw_table *table,
- const struct sw_flow_key *key);
-
- /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns
- * 0 if successful or a negative error. Error can be due to an
- * over-capacity table or because the flow is not one of the kind that
- * the table accepts.
- *
- * If successful, 'flow' becomes owned by 'table', otherwise it is
- * retained by the caller. */
- int (*insert)(struct sw_table *table, struct sw_flow *flow);
-
- /* Modifies the actions in 'table' that match 'key'. If 'strict'
- * set, wildcards and priority must match. Returns the number of flows
- * that were modified. */
- int (*modify)(struct sw_table *table, const struct sw_flow_key *key,
- uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len);
-
- /* Deletes from 'table' any and all flows that match 'key' from
- * 'table'. If 'out_port' is not OFPP_NONE, then matching entries
- * must have that port as an argument for an output action. If
- * 'strict' is set, wildcards and priority must match. Returns the
- * number of flows that were deleted. */
- int (*delete)(struct datapath *dp, struct sw_table *table,
- const struct sw_flow_key *key,
- uint16_t out_port, uint16_t priority, int strict);
-
- /* Performs timeout processing on all the flow entries in 'table'.
- * Returns the number of flow entries deleted through expiration. */
- int (*timeout)(struct datapath *dp, struct sw_table *table);
-
- /* Destroys 'table', which must not have any users. */
- void (*destroy)(struct sw_table *table);
-
- /* Iterates through the flow entries in 'table', passing each one
- * matches 'key' and output port 'out_port' to 'callback'. The
- * callback function should return 0 to continue iteration or a
- * nonzero error code to stop. The iterator function returns either
- * 0 if the table iteration completed or the value returned by the
- * callback function otherwise.
- *
- * The iteration starts at 'position', which may be initialized to
- * all-zero-bits to iterate from the beginning of the table. If the
- * iteration terminates due to an error from the callback function,
- * 'position' is updated to a value that can be passed back to the
- * iterator function to continue iteration later from the same position
- * that caused the error (assuming that that flow entry has not been
- * deleted in the meantime). */
- int (*iterate)(struct sw_table *table,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *flow, void *private),
- void *private);
-
- /* Dumps statistics for 'table' into 'stats'. */
- void (*stats)(struct sw_table *table, struct sw_table_stats *stats);
-};
-
-struct sw_table *table_hash_create(unsigned int polynomial,
- unsigned int n_buckets);
-struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
- unsigned int poly1, unsigned int buckets1);
-struct sw_table *table_linear_create(unsigned int max_flows);
-
-#endif /* table.h */
--- /dev/null
+/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+/* Protocol between secchan and datapath. */
+
+#ifndef OPENFLOW_DATAPATH_PROTOCOL_H
+#define OPENFLOW_DATAPATH_PROTOCOL_H 1
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#define ODP_MAX 256 /* Maximum number of datapaths. */
+
+#define ODP_DP_CREATE _IO('O', 0)
+#define ODP_DP_DESTROY _IO('O', 1)
+#define ODP_DP_STATS _IOW('O', 2, struct odp_stats)
+
+#define ODP_GET_DROP_FRAGS _IOW('O', 3, int)
+#define ODP_SET_DROP_FRAGS _IOR('O', 4, int)
+
+#define ODP_GET_LISTEN_MASK _IOW('O', 5, int)
+#define ODP_SET_LISTEN_MASK _IOR('O', 6, int)
+
+#define ODP_PORT_ADD _IOR('O', 7, struct odp_port)
+#define ODP_PORT_DEL _IOR('O', 8, int)
+#define ODP_PORT_QUERY _IOWR('O', 9, struct odp_port)
+#define ODP_PORT_LIST _IOWR('O', 10, struct odp_portvec)
+
+#define ODP_PORT_GROUP_SET _IOR('O', 11, struct odp_port_group)
+#define ODP_PORT_GROUP_GET _IOWR('O', 12, struct odp_port_group)
+
+#define ODP_FLOW_FLUSH _IO('O', 13)
+#define ODP_FLOW_ADD _IOR('O', 14, struct odp_flow)
+#define ODP_FLOW_SET_ACTS _IOR('O', 15, struct odp_flow)
+#define ODP_FLOW_DEL _IOWR('O', 16, struct odp_flow)
+#define ODP_FLOW_QUERY _IOWR('O', 17, struct odp_flow)
+#define ODP_FLOW_QUERY_MULTIPLE _IOWR('O', 18, struct odp_flowvec)
+#define ODP_FLOW_LIST _IOWR('O', 19, struct odp_flowvec)
+
+#define ODP_EXECUTE _IOR('O', 20, struct odp_execute)
+
+#define ODP_SNAT_ADD_PORT _IOR('O', 21, struct odp_snat_config)
+#define ODP_SNAT_DEL_PORT _IOR('O', 22, int)
+
+struct odp_stats {
+ /* Flows. */
+ __u32 n_flows; /* Number of flows in flow table. */
+ __u32 cur_capacity; /* Current flow table capacity. */
+ __u32 max_capacity; /* Maximum expansion of flow table capacity. */
+
+ /* Ports. */
+ __u32 n_ports; /* Current number of ports. */
+ __u32 max_ports; /* Maximum supported number of ports. */
+ __u16 max_groups; /* Maximum number of port groups. */
+ __u16 reserved;
+
+ /* Lookups. */
+ __u64 n_frags; /* Number of dropped IP fragments. */
+ __u64 n_hit; /* Number of flow table matches. */
+ __u64 n_missed; /* Number of flow table misses. */
+ __u64 n_lost; /* Number of misses not sent to userspace. */
+};
+
+/* Logical ports. */
+#define ODPP_LOCAL ((__u16)0)
+#define ODPP_NONE ((__u16)-1)
+
+/* Listening channels. */
+#define _ODPL_MISS_NR 0 /* Packet missed in flow table. */
+#define ODPL_MISS (1 << _ODPL_MISS_NR)
+#define _ODPL_ACTION_NR 1 /* Packet output to ODPP_CONTROLLER. */
+#define ODPL_ACTION (1 << _ODPL_ACTION_NR)
+#define ODPL_ALL (ODPL_MISS | ODPL_ACTION)
+
+/* Format of messages read from datapath fd. */
+struct odp_msg {
+ __u32 type; /* _ODPL_MISS_NR or _ODPL_ACTION_NR. */
+ __u32 length; /* Message length, including header. */
+ __u16 port; /* Port on which frame was received. */
+ __u16 reserved;
+ __u32 arg; /* Argument value specified in action. */
+ /* Followed by packet data. */
+};
+
+struct odp_port {
+ char devname[16]; /* IFNAMSIZ */
+ __u16 port;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+struct odp_portvec {
+ struct odp_port *ports;
+ int n_ports;
+};
+
+struct odp_port_group {
+ __u16 *ports;
+ __u16 n_ports; /* Number of ports. */
+ __u16 group; /* Group number. */
+};
+
+struct odp_flow_stats {
+ __u64 n_packets; /* Number of matched packets. */
+ __u64 n_bytes; /* Number of matched bytes. */
+ __u64 used_sec; /* Time last used. */
+ __u32 used_nsec;
+ __u8 tcp_flags;
+ __u8 ip_tos;
+ __u16 reserved;
+};
+
+struct odp_flow_key {
+ __be32 nw_src; /* IP source address. */
+ __be32 nw_dst; /* IP destination address. */
+ __u16 in_port; /* Input switch port. */
+ __be16 dl_vlan; /* Input VLAN. */
+ __be16 dl_type; /* Ethernet frame type. */
+ __be16 tp_src; /* TCP/UDP source port. */
+ __be16 tp_dst; /* TCP/UDP destination port. */
+ __u8 dl_src[ETH_ALEN]; /* Ethernet source address. */
+ __u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */
+ __u8 nw_proto; /* IP protocol. */
+ __u8 reserved; /* Pad to 64 bits. */
+};
+
+struct odp_flow {
+ struct odp_flow_stats stats;
+ struct odp_flow_key key;
+ union odp_action *actions;
+ __u32 n_actions;
+};
+
+struct odp_flowvec {
+ struct odp_flow *flows;
+ int n_flows;
+};
+
+/* The VLAN id is 12 bits, so we can use the entire 16 bits to indicate
+ * special conditions. All ones is used to match that no VLAN id was
+ * set. */
+#define ODP_VLAN_NONE 0xffff
+
+/* Action types. */
+#define ODPAT_OUTPUT 0 /* Output to switch port. */
+#define ODPAT_OUTPUT_GROUP 1 /* Output to all ports in group. */
+#define ODPAT_CONTROLLER 2 /* Send copy to controller. */
+#define ODPAT_SET_VLAN_VID 3 /* Set the 802.1q VLAN id. */
+#define ODPAT_SET_VLAN_PCP 4 /* Set the 802.1q priority. */
+#define ODPAT_STRIP_VLAN 5 /* Strip the 802.1q header. */
+#define ODPAT_SET_DL_SRC 6 /* Ethernet source address. */
+#define ODPAT_SET_DL_DST 7 /* Ethernet destination address. */
+#define ODPAT_SET_NW_SRC 8 /* IP source address. */
+#define ODPAT_SET_NW_DST 9 /* IP destination address. */
+#define ODPAT_SET_TP_SRC 10 /* TCP/UDP source port. */
+#define ODPAT_SET_TP_DST 11 /* TCP/UDP destination port. */
+#define ODPAT_SNAT 12 /* Source NAT. */
+#define ODPAT_N_ACTIONS 13
+
+struct odp_action_output {
+ __u16 type; /* ODPAT_OUTPUT. */
+ __u16 port; /* Output port. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+struct odp_action_output_group {
+ __u16 type; /* ODPAT_OUTPUT_GROUP. */
+ __u16 group; /* Group number. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+struct odp_action_controller {
+ __u16 type; /* ODPAT_OUTPUT_CONTROLLER. */
+ __u16 reserved;
+ __u32 arg; /* Copied to struct odp_msg 'arg' member. */
+};
+
+/* Action structure for ODPAT_SET_VLAN_VID. */
+struct odp_action_vlan_vid {
+ __u16 type; /* ODPAT_SET_VLAN_VID. */
+ __be16 vlan_vid; /* VLAN id. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+/* Action structure for ODPAT_SET_VLAN_PCP. */
+struct odp_action_vlan_pcp {
+ __u16 type; /* ODPAT_SET_VLAN_PCP. */
+ __u8 vlan_pcp; /* VLAN priority. */
+ __u8 reserved1;
+ __u16 reserved2;
+ __u16 reserved3;
+};
+
+/* Action structure for ODPAT_SET_DL_SRC/DST. */
+struct odp_action_dl_addr {
+ __u16 type; /* ODPAT_SET_DL_SRC/DST. */
+ __u8 dl_addr[ETH_ALEN]; /* Ethernet address. */
+};
+
+/* Action structure for ODPAT_SET_NW_SRC/DST. */
+struct odp_action_nw_addr {
+ __u16 type; /* ODPAT_SET_TW_SRC/DST. */
+ __u16 reserved;
+ __be32 nw_addr; /* IP address. */
+};
+
+/* Action structure for ODPAT_SET_TP_SRC/DST. */
+struct odp_action_tp_port {
+ __u16 type; /* ODPAT_SET_TP_SRC/DST. */
+ __be16 tp_port; /* TCP/UDP port. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+struct odp_action_snat {
+ __u16 type; /* ODPAT_SNAT. */
+ __u16 port; /* Output port. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+union odp_action {
+ __u16 type;
+ struct odp_action_output output;
+ struct odp_action_output_group output_group;
+ struct odp_action_controller controller;
+ struct odp_action_vlan_vid vlan_vid;
+ struct odp_action_vlan_pcp vlan_pcp;
+ struct odp_action_dl_addr dl_addr;
+ struct odp_action_nw_addr nw_addr;
+ struct odp_action_tp_port tp_port;
+ struct odp_action_snat snat;
+};
+
+struct odp_execute {
+ __u16 in_port;
+ __u16 reserved1;
+ __u32 reserved2;
+
+ union odp_action *actions;
+ __u32 n_actions;
+
+ const void *data;
+ __u32 length;
+};
+
+/* Values below this cutoff are 802.3 packets and the two bytes
+ * following MAC addresses are used as a frame length. Otherwise, the
+ * two bytes are used as the Ethernet type.
+ */
+#define ODP_DL_TYPE_ETH2_CUTOFF 0x0600
+
+/* Value of dl_type to indicate that the frame does not include an
+ * Ethernet type.
+ */
+#define ODP_DL_TYPE_NOT_ETH_TYPE 0x05ff
+
+/* The VLAN id is 12-bits, so we can use the entire 16 bits to indicate
+ * special conditions. All ones indicates that no VLAN id was set.
+ */
+#define ODP_VLAN_NONE 0xffff
+
+/* Configuration for source-NATing */
+struct odp_snat_config {
+ __u16 port;
+
+ /* Time to cache MAC addresses of SNAT'd hosts in seconds (0=default). */
+ __u16 mac_timeout;
+
+ /* Range of IP addresses to impersonate. Set both values to the same to
+ * support a single address. */
+ __be32 ip_start, ip_end;
+
+ /* Range of transport ports that should be used as new source port. A
+ * value of zero lets the kernel choose. */
+ __be16 tcp_start, tcp_end;
+ __be16 udp_start, udp_end;
+
+ /* MAC address to use for ARP requests for a SNAT IP address that comes in
+ * on a different interface than 'port'. A value of all zeros silently
+ * drops those ARP requests. Requests that arrive on 'port' get a response
+ * with the mac address of the datapath device. */
+ __u8 mac_addr[ETH_ALEN];
+ __u16 reserved;
+};
+
+#endif /* openflow/datapath-protocol.h */
/*
* Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2008 Nicira Networks
+ * Copyright (c) 2008, 2009 Nicira Networks
*/
#ifndef OPENFLOW_NICIRA_EXT_H
/* Remote command execution reply, sent when the command's execution
* completes. The reply body is struct nx_command_reply. */
NXT_COMMAND_REPLY,
-
- /* Configure whether Flow End messages should be sent. */
- NXT_FLOW_END_CONFIG,
-
- /* Sent by switch when a flow ends. These messages are turned into
- * ofp_flow_expired and NetFlow messages in user-space. */
- NXT_FLOW_END
};
struct nicira_header {
};
OFP_ASSERT(sizeof(struct nx_command_reply) == 20);
-enum nx_flow_end_reason {
- NXFER_IDLE_TIMEOUT, /* Flow idle time exceeded idle_timeout. */
- NXFER_HARD_TIMEOUT, /* Time exceeded hard_timeout. */
- NXFER_DELETE, /* Flow was removed by delete command. */
- NXFER_EJECT /* Flow was ejected. */
-};
-
-struct nx_flow_end_config {
- struct nicira_header header;
- uint8_t enable; /* Set to 1 to enable Flow End message
- generation. 0 to disable. */
- uint8_t pad[3];
-};
-OFP_ASSERT(sizeof(struct nx_flow_end_config) == 20);
-
-struct nx_flow_end {
- struct nicira_header header;
- struct ofp_match match; /* Description of fields. */
-
- uint16_t priority; /* Priority level of flow entry. */
- uint8_t reason; /* One of NXFER_*. */
-
- uint8_t tcp_flags; /* Union of seen TCP flags. */
- uint8_t ip_tos; /* IP TOS value. */
-
- uint8_t pad[7]; /* Align to 64-bits. */
-
- uint64_t init_time; /* Time flow started in milliseconds. */
- uint64_t used_time; /* Time entry was last used in milliseconds. */
- uint64_t end_time; /* Time flow ended in milliseconds. */
-
- uint64_t packet_count;
- uint64_t byte_count;
-};
-OFP_ASSERT(sizeof(struct nx_flow_end) == 104);
-
#endif /* openflow/nicira-ext.h */
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
DP_GENL_C_MAX = __DP_GENL_C_MAX - 1
};
-/* Maximum number of datapaths. */
-#define DP_MAX 256
-
#endif /* openflow/openflow-netlink.h */
lib/dpif.h \
lib/netlink-protocol.h \
lib/netlink.c \
- lib/netlink.h \
- lib/vconn-netlink.c
+ lib/netlink.h
endif
if HAVE_OPENSSL
const struct cls_field cls_fields[CLS_N_FIELDS + 1] = {
#define CLS_FIELD(WILDCARDS, MEMBER, NAME) \
- { offsetof(struct flow, MEMBER), \
- sizeof ((struct flow *)0)->MEMBER, \
+ { offsetof(flow_t, MEMBER), \
+ sizeof ((flow_t *)0)->MEMBER, \
WILDCARDS, \
- #NAME }, \
+ #NAME },
CLS_FIELDS
#undef CLS_FIELD
- { sizeof(struct flow), 0, 0, "exact" },
+ { sizeof(flow_t), 0, 0, "exact" },
};
-static uint32_t hash_fields(const struct flow *, int table_idx);
-static bool equal_fields(const struct flow *, const struct flow *, int table_idx);
+static uint32_t hash_fields(const flow_t *, int table_idx);
+static bool equal_fields(const flow_t *, const flow_t *, int table_idx);
static int table_idx_from_wildcards(uint32_t wildcards);
static struct cls_rule *table_insert(struct hmap *, struct cls_rule *);
static struct cls_rule *search_table(const struct hmap *table, int field_idx,
const struct cls_rule *);
static struct cls_rule *search_exact_table(const struct classifier *,
- size_t hash, const struct flow *);
+ size_t hash, const flow_t *);
static bool rules_match_1wild(const struct cls_rule *fixed,
const struct cls_rule *wild, int field_idx);
*
* Rules without wildcards always have the maximum priority 65535. */
void
-cls_rule_from_flow(struct cls_rule *rule, const struct flow *flow,
+cls_rule_from_flow(struct cls_rule *rule, const flow_t *flow,
uint32_t wildcards, uint16_t priority)
{
assert(flow->reserved == 0);
* rules added more recently take priority over rules added less recently, but
* this is subject to change and should not be depended upon.) */
struct cls_rule *
-classifier_lookup(const struct classifier *cls, const struct flow *flow)
+classifier_lookup(const struct classifier *cls, const flow_t *flow)
{
struct cls_rule *best = NULL;
if (!hmap_is_empty(&cls->exact_table)) {
struct cls_rule *
classifier_find_rule_exactly(const struct classifier *cls,
- const struct flow *target, uint32_t wildcards,
+ const flow_t *target, uint32_t wildcards,
uint16_t priority)
{
struct cls_bucket *bucket;
}
\f
static struct cls_bucket *create_bucket(struct hmap *, size_t hash,
- const struct flow *fixed);
+ const flow_t *fixed);
static struct cls_rule *bucket_insert(struct cls_bucket *, struct cls_rule *);
static inline bool equal_bytes(const void *, const void *, size_t n);
* (CLS_F_IDX_*) are less than 'table_idx'. (If 'table_idx' is
* CLS_F_IDX_EXACT, hashes all the fields in 'flow'). */
static uint32_t
-hash_fields(const struct flow *flow, int table_idx)
+hash_fields(const flow_t *flow, int table_idx)
{
/* I just know I'm going to hell for writing code this way.
*
*
* Returns true if all the compared fields are equal, false otherwise. */
static bool
-equal_fields(const struct flow *a, const struct flow *b, int table_idx)
+equal_fields(const flow_t *a, const flow_t *b, int table_idx)
{
/* XXX The generated code could be better here. */
#define CLS_FIELD(WILDCARDS, MEMBER, NAME) \
/* Creates a bucket and inserts it in 'table' with the given 'hash' and 'fixed'
* values. Returns the new bucket. */
static struct cls_bucket *
-create_bucket(struct hmap *table, size_t hash, const struct flow *fixed)
+create_bucket(struct hmap *table, size_t hash, const flow_t *fixed)
{
struct cls_bucket *bucket = xmalloc(sizeof *bucket);
list_init(&bucket->rules);
* The compared field is the one with wildcard bit or bits 'field_wc', offset
* 'rule_ofs' within cls_rule's "fields" member, and length 'len', in bytes. */
static inline bool ALWAYS_INLINE
-field_matches(const struct flow *a_, const struct flow *b_,
+field_matches(const flow_t *a_, const flow_t *b_,
uint32_t wildcards, uint32_t nw_src_mask, uint32_t nw_dst_mask,
uint32_t field_wc, int ofs, int len)
{
case CLS_F_IDX_##NAME: \
if (!field_matches(&a->flow, &b->flow, \
wildcards, nw_src_mask, nw_dst_mask, \
- WILDCARDS, offsetof(struct flow, MEMBER), \
+ WILDCARDS, offsetof(flow_t, MEMBER), \
sizeof a->flow.MEMBER)) { \
return false; \
} \
static struct cls_rule *
search_exact_table(const struct classifier *cls, size_t hash,
- const struct flow *target)
+ const flow_t *target)
{
struct cls_rule *rule;
struct cls_bucket {
struct hmap_node hmap_node; /* Within struct classifier 'tables'. */
struct list rules; /* In order from highest to lowest priority. */
- struct flow fixed; /* Values for fixed fields. */
+ flow_t fixed; /* Values for fixed fields. */
};
/* A flow classification rule.
struct list list; /* Within struct cls_bucket 'rules'. */
struct hmap_node hmap; /* Within struct classifier 'exact_table'. */
} node;
- struct flow flow; /* All field values. */
+ flow_t flow; /* All field values. */
struct flow_wildcards wc; /* Wildcards for fields. */
uint16_t priority; /* Larger numbers are higher priorities. */
unsigned short table_idx; /* Index into struct classifier 'tables'. */
};
-void cls_rule_from_flow(struct cls_rule *, const struct flow *,
- uint32_t wildcards, uint16_t priority);
+void cls_rule_from_flow(struct cls_rule *, const flow_t *, uint32_t wildcards,
+ uint16_t priority);
void cls_rule_from_match(struct cls_rule *, const struct ofp_match *,
uint16_t priority);
void cls_rule_print(const struct cls_rule *);
int classifier_count_exact(const struct classifier *);
struct cls_rule *classifier_insert(struct classifier *, struct cls_rule *);
void classifier_remove(struct classifier *, struct cls_rule *);
-struct cls_rule *classifier_lookup(const struct classifier *,
- const struct flow *);
+struct cls_rule *classifier_lookup(const struct classifier *, const flow_t *);
typedef void cls_cb_func(struct cls_rule *, void *aux);
void classifier_for_each(const struct classifier *, cls_cb_func *, void *aux);
int include, cls_cb_func *, void *aux);
struct cls_rule *classifier_find_rule_exactly(const struct classifier *,
- const struct flow *target,
+ const flow_t *target,
uint32_t wildcards,
uint16_t priority);
for (; cli->received < 50; cli->received++) {
const struct ip_header *ip;
const struct dhcp_header *dhcp;
- struct flow flow;
+ flow_t flow;
int error;
ofpbuf_clear(&b);
/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
- *
+ *
* We are making the OpenFlow specification and associated documentation
* (Software) available for public use and benefit with the expectation
* that others will use, modify and enhance the Software and contribute
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
+ *
* The name and trademarks of copyright holder(s) may NOT be used in
* advertising or publicity pertaining to the Software or any
* derivatives without specific, written prior permission.
#include <assert.h>
#include <ctype.h>
#include <errno.h>
+#include <fcntl.h>
#include <inttypes.h>
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+#include "dynamic-string.h"
+#include "flow.h"
#include "netlink.h"
-#include "netlink-protocol.h"
+#include "ofp-print.h"
#include "ofpbuf.h"
-#include "openflow/openflow-netlink.h"
-#include "openflow/openflow.h"
#include "packets.h"
+#include "poll-loop.h"
#include "util.h"
-#include "xtoxll.h"
#include "vlog.h"
#define THIS_MODULE VLM_dpif
-/* Not really much point in logging many dpif errors. */
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60);
+/* Rate limit for individual messages going to or from the datapath, output at
+ * DBG level. This is very high because, if these are enabled, it is because
+ * we really need to see them. */
+static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
-/* The Generic Netlink family number used for OpenFlow. */
-static int openflow_family;
+/* Not really much point in logging many dpif errors. */
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
-static int lookup_openflow_multicast_group(int dp_idx, int *multicast_group);
-static int send_mgmt_command(struct dpif *, int dp_idx, int command,
- const char *netdev);
+static int get_minor_from_name(const char *name, unsigned int *minor);
+static int name_to_minor(const char *name, unsigned int *minor);
+static int lookup_minor(const char *name, unsigned int *minor);
+static int open_by_minor(unsigned int minor, struct dpif *);
+static int make_openflow_device(unsigned int minor, char **fnp);
+static char *odp_actions_to_string(const union odp_action actions[],
+ size_t n_actions);
-/* Opens a socket for a local datapath, initializing 'dp'. If
- * 'subscribe_dp_idx' is nonnegative, listens for asynchronous messages
- * (packet-in, etc.) from the datapath with that number; otherwise, 'dp' will
- * receive only replies to explicitly initiated requests. */
int
-dpif_open(int subscribe_dp_idx, struct dpif *dp)
+dpif_open(const char *name, struct dpif *dpif)
{
- struct nl_sock *sock;
- int multicast_group = 0;
- int retval;
+ unsigned int minor;
+ int listen_mask;
+ int error;
- retval = nl_lookup_genl_family(DP_GENL_FAMILY_NAME, &openflow_family);
- if (retval) {
- return retval;
- }
+ dpif->fd = -1;
- if (subscribe_dp_idx >= 0) {
- retval = lookup_openflow_multicast_group(subscribe_dp_idx,
- &multicast_group);
- if (retval) {
- return retval;
- }
+ error = name_to_minor(name, &minor);
+ if (error) {
+ return error;
}
- /* Specify a large so_rcvbuf size because we occasionally need to be able
- * to retrieve large collections of flow records. */
- retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0,
- 4 * 1024u * 1024, &sock);
- if (retval) {
- return retval;
+ error = open_by_minor(minor, dpif);
+ if (error) {
+ return error;
}
- dp->sock = sock;
+ /* We can open the device, but that doesn't mean that it's been created.
+ * If it hasn't been, then any command other than ODP_DP_CREATE will
+ * return ENODEV. Try something innocuous. */
+ if (ioctl(dpif->fd, ODP_GET_LISTEN_MASK, &listen_mask)) {
+ error = errno;
+ if (error != ENODEV) {
+ VLOG_WARN("dp%u: probe returned unexpected error: %s",
+ minor, strerror(error));
+ }
+ dpif_close(dpif);
+ return error;
+ }
return 0;
}
-/* Closes 'dp'. */
void
-dpif_close(struct dpif *dp)
+dpif_close(struct dpif *dpif)
{
- if (dp) {
- nl_sock_destroy(dp->sock);
+ if (dpif) {
+ close(dpif->fd);
+ dpif->fd = -1;
}
}
-static const struct nl_policy openflow_policy[] = {
- [DP_GENL_A_DP_IDX] = { .type = NL_A_U32,
- .optional = false },
- [DP_GENL_A_OPENFLOW] = { .type = NL_A_UNSPEC,
- .min_len = sizeof(struct ofp_header),
- .max_len = 65535,
- .optional = false },
-};
+#define IOCTL(DPIF, CMD, ARG) do_ioctl(DPIF, CMD, #CMD, ARG)
+
+static int
+do_ioctl(const struct dpif *dpif, int cmd, const char *cmd_name,
+ const void *arg)
+{
+ if (ioctl(dpif->fd, cmd, arg)) {
+ VLOG_WARN_RL(&error_rl, "dp%u: ioctl(%s) failed (%s)",
+ dpif->minor, cmd_name, strerror(errno));
+ return errno;
+ } else {
+ VLOG_DBG_RL(&dpmsg_rl, "dp%u: ioctl(%s): success",
+ dpif->minor, cmd_name);
+ return 0;
+ }
+}
-/* Tries to receive an openflow message from datapath 'dp_idx' on 'sock'. If
- * successful, stores the received message into '*msgp' and returns 0. The
- * caller is responsible for destroying the message with ofpbuf_delete(). On
- * failure, returns a positive errno value and stores a null pointer into
- * '*msgp'.
- *
- * Only Netlink messages with embedded OpenFlow messages are accepted. Other
- * Netlink messages provoke errors.
- *
- * If 'wait' is true, dpif_recv_openflow waits for a message to be ready;
- * otherwise, returns EAGAIN if the 'sock' receive buffer is empty. */
int
-dpif_recv_openflow(struct dpif *dp, int dp_idx, struct ofpbuf **bufferp,
- bool wait)
+dpif_create(const char *name, struct dpif *dpif)
{
- struct nlattr *attrs[ARRAY_SIZE(openflow_policy)];
- struct ofpbuf *buffer;
- struct ofp_header *oh;
- uint16_t ofp_len;
+ unsigned int minor;
+ int error;
+
+ if (!get_minor_from_name(name, &minor)) {
+ /* Minor was specified in 'name', go ahead and create it. */
+ dpif->fd = -1;
+ error = open_by_minor(minor, dpif);
+ if (error) {
+ return error;
+ }
- buffer = *bufferp = NULL;
- do {
- int retval;
-
- do {
- ofpbuf_delete(buffer);
- retval = nl_sock_recv(dp->sock, &buffer, wait);
- } while (retval == ENOBUFS
- || (!retval
- && (nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE
- || nl_msg_nlmsgerr(buffer, NULL))));
- if (retval) {
- if (retval != EAGAIN) {
- VLOG_WARN_RL(&rl, "dpif_recv_openflow: %s", strerror(retval));
- }
- return retval;
+ if (!strncmp(name, "nl:", 3)) {
+ char devname[128];
+ sprintf(devname, "of%u", minor);
+ error = ioctl(dpif->fd, ODP_DP_CREATE, devname) < 0 ? errno : 0;
+ } else {
+ error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
+ }
+ if (error) {
+ dpif_close(dpif);
}
+ return error;
+ } else {
+ for (minor = 0; minor < ODP_MAX; minor++) {
+ error = open_by_minor(minor, dpif);
+ if (error) {
+ return error;
+ }
- if (nl_msg_genlmsghdr(buffer) == NULL) {
- VLOG_DBG_RL(&rl, "received packet too short for Generic Netlink");
- goto error;
+ error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0;
+ if (!error) {
+ return 0;
+ } else if (error != EEXIST) {
+ dpif_close(dpif);
+ return error;
+ }
}
- if (nl_msg_nlmsghdr(buffer)->nlmsg_type != openflow_family) {
- VLOG_DBG_RL(&rl,
- "received type (%"PRIu16") != openflow family (%d)",
- nl_msg_nlmsghdr(buffer)->nlmsg_type, openflow_family);
+ return ENOBUFS;
+ }
+}
+
+int
+dpif_delete(struct dpif *dpif)
+{
+ return IOCTL(dpif, ODP_DP_DESTROY, NULL);
+}
+
+int
+dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
+{
+ memset(stats, 0, sizeof *stats);
+ return IOCTL(dpif, ODP_DP_STATS, stats);
+}
+
+int
+dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
+{
+ int tmp;
+ int error = IOCTL(dpif, ODP_GET_DROP_FRAGS, &tmp);
+ *drop_frags = error ? tmp & 1 : false;
+ return error;
+}
+
+int
+dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
+{
+ int tmp = drop_frags;
+ return IOCTL(dpif, ODP_SET_DROP_FRAGS, &tmp);
+}
+
+int
+dpif_get_listen_mask(const struct dpif *dpif, int *listen_mask)
+{
+ int error = IOCTL(dpif, ODP_GET_LISTEN_MASK, listen_mask);
+ if (error) {
+ *listen_mask = 0;
+ }
+ return error;
+}
+
+int
+dpif_set_listen_mask(struct dpif *dpif, int listen_mask)
+{
+ return IOCTL(dpif, ODP_SET_LISTEN_MASK, &listen_mask);
+}
+
+int
+dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no)
+{
+ struct odp_port port;
+
+ memset(&port, 0, sizeof port);
+ strncpy(port.devname, devname, sizeof port.devname);
+ port.port = port_no;
+ return IOCTL(dpif, ODP_PORT_ADD, &port);
+}
+
+int
+dpif_port_del(struct dpif *dpif, uint16_t port_no)
+{
+ int tmp = port_no;
+ return IOCTL(dpif, ODP_PORT_DEL, &tmp);
+}
+
+int
+dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
+ struct odp_port *port)
+{
+ memset(port, 0, sizeof *port);
+ port->port = port_no;
+ return IOCTL(dpif, ODP_PORT_QUERY, port);
+}
+
+int
+dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
+ struct odp_port *port)
+{
+ memset(port, 0, sizeof *port);
+ strncpy(port->devname, devname, sizeof port->devname);
+ return IOCTL(dpif, ODP_PORT_QUERY, port);
+}
+
+int
+dpif_port_list(const struct dpif *dpif,
+ struct odp_port **ports, size_t *n_ports)
+{
+ struct odp_portvec pv;
+ struct odp_stats stats;
+ int error;
+
+ do {
+ error = dpif_get_dp_stats(dpif, &stats);
+ if (error) {
goto error;
}
- if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN,
- openflow_policy, attrs,
- ARRAY_SIZE(openflow_policy))) {
+ *ports = xcalloc(1, stats.n_ports * sizeof **ports);
+ pv.ports = *ports;
+ pv.n_ports = stats.n_ports;
+ error = IOCTL(dpif, ODP_PORT_LIST, &pv);
+ if (error) {
+ free(*ports);
goto error;
}
- } while (nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]) != dp_idx);
-
- oh = buffer->data = (void *) nl_attr_get(attrs[DP_GENL_A_OPENFLOW]);
- buffer->size = nl_attr_get_size(attrs[DP_GENL_A_OPENFLOW]);
- ofp_len = ntohs(oh->length);
- if (ofp_len != buffer->size) {
- VLOG_WARN_RL(&rl,
- "ofp_header.length %"PRIu16" != attribute length %zu\n",
- ofp_len, buffer->size);
- buffer->size = MIN(ofp_len, buffer->size);
- }
- *bufferp = buffer;
+ } while (pv.n_ports != stats.n_ports);
+ *n_ports = pv.n_ports;
return 0;
error:
- ofpbuf_delete(buffer);
- return EPROTO;
+ *ports = NULL;
+ *n_ports = 0;
+ return error;
}
-/* Encapsulates 'msg', which must contain an OpenFlow message, in a Netlink
- * message, and sends it to the OpenFlow local datapath numbered 'dp_idx' via
- * 'sock'.
- *
- * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
- * if the 'sock' send buffer is full.
- *
- * If the send is successful, then the kernel module will receive it, but there
- * is no guarantee that any reply will not be dropped (see nl_sock_transact()
- * for details).
- */
int
-dpif_send_openflow(struct dpif *dp, int dp_idx, struct ofpbuf *buffer)
-{
- struct ofp_header *oh;
- unsigned int dump_flag;
- struct ofpbuf hdr;
- struct nlattr *nla;
- uint32_t fixed_buffer[64 / 4];
- struct iovec iov[3];
- int pad_bytes;
- int n_iov;
- int retval;
+dpif_port_group_set(struct dpif *dpif, uint16_t group,
+ const uint16_t ports[], size_t n_ports)
+{
+ struct odp_port_group pg;
+
+ assert(n_ports <= UINT16_MAX);
+ pg.group = group;
+ pg.ports = (uint16_t *) ports;
+ pg.n_ports = n_ports;
+ return IOCTL(dpif, ODP_PORT_GROUP_SET, &pg);
+}
+
+/* Careful: '*n_out' can be greater than 'n_ports' on return, if 'n_ports' is
+ * less than the number of ports in 'group'. */
+int
+dpif_port_group_get(const struct dpif *dpif, uint16_t group,
+ uint16_t ports[], size_t n_ports, size_t *n_out)
+{
+ struct odp_port_group pg;
+ int error;
+
+ assert(n_ports <= UINT16_MAX);
+ pg.group = group;
+ pg.ports = ports;
+ pg.n_ports = n_ports;
+ error = IOCTL(dpif, ODP_PORT_GROUP_SET, &pg);
+ *n_out = error ? 0 : pg.n_ports;
+ return error;
+}
+
+int
+dpif_flow_flush(struct dpif *dpif)
+{
+ return IOCTL(dpif, ODP_FLOW_FLUSH, NULL);
+}
+
+int
+dpif_flow_add(struct dpif *dpif, struct odp_flow *flow)
+{
+ if (VLOG_IS_DBG_ENABLED()) {
+ char *actions_string = odp_actions_to_string(flow->actions,
+ flow->n_actions);
+ char *flow_string = flow_to_string(&flow->key);
+ VLOG_DBG("adding flow %s with actions %s",
+ flow_string, actions_string);
+ free(flow_string);
+ free(actions_string);
+ }
+ return IOCTL(dpif, ODP_FLOW_ADD, flow);
+}
+
+int
+dpif_flow_set_actions(struct dpif *dpif, const struct odp_flow_key *key,
+ const union odp_action *actions, size_t n_actions)
+{
+ struct odp_flow flow;
+
+ flow.key = *key;
+ flow.actions = (union odp_action *) actions;
+ flow.n_actions = n_actions;
+ return IOCTL(dpif, ODP_FLOW_SET_ACTS, &flow);
+}
+
+int
+dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
+{
+ return IOCTL(dpif, ODP_FLOW_DEL, flow);
+}
- /* The reply to OFPT_STATS_REQUEST may be multiple segments long, so we
- * need to specify NLM_F_DUMP in the request. */
- oh = ofpbuf_at_assert(buffer, 0, sizeof *oh);
- dump_flag = oh->type == OFPT_STATS_REQUEST ? NLM_F_DUMP : 0;
-
- ofpbuf_use(&hdr, fixed_buffer, sizeof fixed_buffer);
- nl_msg_put_genlmsghdr(&hdr, dp->sock, 32, openflow_family,
- NLM_F_REQUEST | dump_flag, DP_GENL_C_OPENFLOW, 1);
- nl_msg_put_u32(&hdr, DP_GENL_A_DP_IDX, dp_idx);
- nla = ofpbuf_put_uninit(&hdr, sizeof *nla);
- nla->nla_len = sizeof *nla + buffer->size;
- nla->nla_type = DP_GENL_A_OPENFLOW;
- pad_bytes = NLA_ALIGN(nla->nla_len) - nla->nla_len;
- nl_msg_nlmsghdr(&hdr)->nlmsg_len = hdr.size + buffer->size + pad_bytes;
- n_iov = 2;
- iov[0].iov_base = hdr.data;
- iov[0].iov_len = hdr.size;
- iov[1].iov_base = buffer->data;
- iov[1].iov_len = buffer->size;
- if (pad_bytes) {
- static char zeros[NLA_ALIGNTO];
- n_iov++;
- iov[2].iov_base = zeros;
- iov[2].iov_len = pad_bytes;
- }
- retval = nl_sock_sendv(dp->sock, iov, n_iov, false);
- if (retval && retval != EAGAIN) {
- VLOG_WARN_RL(&rl, "dpif_send_openflow: %s", strerror(retval));
- }
- return retval;
-}
-
-/* Creates local datapath numbered 'dp_idx' with the name 'dp_name'. A
- * 'dp_idx' of -1 or null 'dp_name' will have the kernel module choose values.
- * (At least one or the other must be provided, however, so that the caller can
- * identify the datapath that was created.) Returns 0 if successful, otherwise
- * a positive errno value. */
int
-dpif_add_dp(struct dpif *dp, int dp_idx, const char *dp_name)
+dpif_flow_query(const struct dpif *dpif, struct odp_flow *flow)
{
- return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_DP, dp_name);
+ return IOCTL(dpif, ODP_FLOW_QUERY, flow);
}
-/* Destroys a local datapath. If 'dp_idx' is not -1, destroys the datapath
- * with that number; if 'dp_name' is not NULL, destroys the datapath with that
- * name. Exactly one of 'dp_idx' and 'dp_name' should be used. Returns 0 if
- * successful, otherwise a positive errno value. */
int
-dpif_del_dp(struct dpif *dp, int dp_idx, const char *dp_name)
+dpif_flow_query_multiple(const struct dpif *dpif,
+ struct odp_flow flows[], size_t n)
{
- return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_DP, dp_name);
+ struct odp_flowvec fv;
+ fv.flows = flows;
+ fv.n_flows = n;
+ return IOCTL(dpif, ODP_FLOW_QUERY_MULTIPLE, &fv);
}
-/* Adds the Ethernet device named 'netdev' to the local datapath numbered
- * 'dp_idx'. Returns 0 if successful, otherwise a positive errno value. */
int
-dpif_add_port(struct dpif *dp, int dp_idx, const char *netdev)
+dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
+ size_t *n_out)
{
- return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_PORT, netdev);
+ struct odp_flowvec fv;
+ uint32_t i;
+ int error;
+
+ fv.flows = flows;
+ fv.n_flows = n;
+ for (i = 0; i < n; i++) {
+ flows[i].actions = NULL;
+ flows[i].n_actions = 0;
+ }
+ error = IOCTL(dpif, ODP_FLOW_LIST, &fv);
+ *n_out = error ? 0 : fv.n_flows;
+ return error;
}
-/* Removes the Ethernet device named 'netdev' from the local datapath numbered
- * 'dp_idx'. Returns 0 if successful, otherwise a positive errno value. */
int
-dpif_del_port(struct dpif *dp, int dp_idx, const char *netdev)
+dpif_flow_list_all(const struct dpif *dpif,
+ struct odp_flow **flowsp, size_t *np)
{
- return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_PORT, netdev);
+ struct odp_stats stats;
+ struct odp_flow *flows;
+ size_t n_flows;
+ int error;
+
+ *flowsp = NULL;
+ *np = 0;
+
+ error = dpif_get_dp_stats(dpif, &stats);
+ if (error) {
+ return error;
+ }
+
+ flows = xmalloc(sizeof *flows * stats.n_flows);
+ error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
+ if (error) {
+ free(flows);
+ return error;
+ }
+
+ if (stats.n_flows != n_flows) {
+ VLOG_WARN_RL(&error_rl, "dp%u: datapath stats reported %"PRIu32" "
+ "flows but flow listing reported %zu",
+ dpif->minor, stats.n_flows, n_flows);
+ }
+ *flowsp = flows;
+ *np = n_flows;
+ return 0;
+}
+
+int
+dpif_execute(struct dpif *dpif, uint16_t in_port,
+ const union odp_action actions[], size_t n_actions,
+ const struct ofpbuf *buf)
+{
+ struct odp_execute execute;
+ memset(&execute, 0, sizeof execute);
+ execute.in_port = in_port;
+ execute.actions = (union odp_action *) actions;
+ execute.n_actions = n_actions;
+ execute.data = buf->data;
+ execute.length = buf->size;
+ if (VLOG_IS_DBG_ENABLED()) {
+ char *actions_string = odp_actions_to_string(actions, n_actions);
+ char *packet_string = ofp_packet_to_string(buf->data, buf->size,
+ buf->size);
+ VLOG_DBG("executing %s with in_port=%"PRIu16" on packet %s",
+ actions_string, in_port, packet_string);
+ free(actions_string);
+ free(packet_string);
+ }
+ return IOCTL(dpif, ODP_EXECUTE, &execute);
+}
+
+int
+dpif_snat_add_port(struct dpif *dpif, const struct odp_snat_config *osc)
+{
+ return IOCTL(dpif, ODP_SNAT_ADD_PORT, osc);
+}
+
+int
+dpif_snat_del_port(struct dpif *dpif, uint16_t port)
+{
+ int tmp = port;
+ return IOCTL(dpif, ODP_SNAT_DEL_PORT, &tmp);
+}
+
+int
+dpif_recv(struct dpif *dpif, struct ofpbuf **bufp)
+{
+ struct ofpbuf *buf;
+ int retval;
+ int error;
+
+ buf = ofpbuf_new(2048); /* XXX scale based on netdev MTUs */
+ retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
+ if (retval < 0) {
+ error = errno;
+ if (error != EAGAIN) {
+ VLOG_WARN_RL(&error_rl, "dp%u: read failed: %s",
+ dpif->minor, strerror(error));
+ }
+ } else if (retval >= sizeof(struct odp_msg)) {
+ struct odp_msg *msg = buf->data;
+ if (msg->length <= retval) {
+ buf->size += retval;
+ if (VLOG_IS_DBG_ENABLED()) {
+ void *payload = msg + 1;
+ size_t length = buf->size - sizeof *msg;
+ char *s = ofp_packet_to_string(payload, length, length);
+ VLOG_DBG_RL(&dpmsg_rl, "dp%u: received %s message of length "
+ "%zu on port %"PRIu16": %s", dpif->minor,
+ (msg->type == _ODPL_MISS_NR ? "miss"
+ : msg->type == _ODPL_ACTION_NR ? "action"
+ : "<unknown>"),
+ msg->length - sizeof(struct odp_msg),
+ msg->port, s);
+ free(s);
+ }
+ *bufp = buf;
+ return 0;
+ } else {
+ VLOG_WARN_RL(&error_rl, "dp%u: discarding message truncated "
+ "from %zu bytes to %d",
+ dpif->minor, msg->length, retval);
+ error = ERANGE;
+ }
+ } else if (!retval) {
+ VLOG_WARN_RL(&error_rl, "dp%u: unexpected end of file", dpif->minor);
+ error = EPROTO;
+ } else {
+ VLOG_WARN_RL(&error_rl,
+ "dp%u: discarding too-short message (%d bytes)",
+ dpif->minor, retval);
+ error = ERANGE;
+ }
+
+ *bufp = NULL;
+ ofpbuf_delete(buf);
+ return error;
+}
+
+void
+dpif_recv_wait(struct dpif *dpif)
+{
+ poll_fd_wait(dpif->fd, POLLIN);
}
\f
-static const struct nl_policy openflow_multicast_policy[] = {
- [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 },
- [DP_GENL_A_DP_NAME] = { .type = NL_A_STRING },
- [DP_GENL_A_MC_GROUP] = { .type = NL_A_U32 },
+struct dpifmon {
+ const struct dpif *dpif;
+ struct nl_sock *sock;
+ int local_ifindex;
};
-/* Looks up the Netlink multicast group and datapath index of a datapath
- * by either the datapath index or name. If 'dp_idx' points to a value
- * of '-1', then 'dp_name' is used to lookup the datapath. If successful,
- * stores the multicast group in '*multicast_group' and the index in
- * '*dp_idx' and returns 0. Otherwise, returns a positive errno value. */
-static int
-query_datapath(int *dp_idx, int *multicast_group, const char *dp_name)
+int
+dpifmon_create(const struct dpif *dpif, struct dpifmon **monp)
{
struct nl_sock *sock;
- struct ofpbuf request, *reply;
- struct nlattr *attrs[ARRAY_SIZE(openflow_multicast_policy)];
- int retval;
+ struct dpifmon *mon;
+ struct odp_port local;
+ unsigned int local_ifindex;
+ int error;
+
+ *monp = NULL;
+
+ error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &local);
+ if (error) {
+ return error;
+ }
- retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock);
- if (retval) {
- return retval;
- }
- ofpbuf_init(&request, 0);
- nl_msg_put_genlmsghdr(&request, sock, 0, openflow_family, NLM_F_REQUEST,
- DP_GENL_C_QUERY_DP, 1);
- if (*dp_idx != -1) {
- nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, *dp_idx);
- }
- if (dp_name) {
- nl_msg_put_string(&request, DP_GENL_A_DP_NAME, dp_name);
- }
- retval = nl_sock_transact(sock, &request, &reply);
- ofpbuf_uninit(&request);
- if (retval) {
- nl_sock_destroy(sock);
- return retval;
- }
- if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN,
- openflow_multicast_policy, attrs,
- ARRAY_SIZE(openflow_multicast_policy))) {
- nl_sock_destroy(sock);
- ofpbuf_delete(reply);
- return EPROTO;
- }
- *dp_idx = nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]);
- *multicast_group = nl_attr_get_u32(attrs[DP_GENL_A_MC_GROUP]);
- nl_sock_destroy(sock);
- ofpbuf_delete(reply);
+ local_ifindex = if_nametoindex(local.devname);
+ if (!local_ifindex) {
+ VLOG_WARN("could not get ifindex of %s device: %s",
+ local.devname, strerror(errno));
+ return errno;
+ }
+
+ error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &sock);
+ if (error) {
+ VLOG_WARN("could not create rtnetlink socket: %s", strerror(error));
+ return error;
+ }
+ mon = *monp = xmalloc(sizeof *mon);
+ mon->dpif = dpif;
+ mon->sock = sock;
+ mon->local_ifindex = local_ifindex;
return 0;
}
-/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If
- * successful, stores the multicast group in '*multicast_group' and returns 0.
- * Otherwise, returns a positve errno value. */
-static int
-lookup_openflow_multicast_group(int dp_idx, int *multicast_group)
+void
+dpifmon_destroy(struct dpifmon *mon)
{
- return query_datapath(&dp_idx, multicast_group, NULL);
+ if (mon) {
+ nl_sock_destroy(mon->sock);
+ }
}
-/* Looks up the datatpath index based on the name. Returns the index, or
- * -1 on error. */
int
-dpif_get_idx(const char *name)
+dpifmon_poll(struct dpifmon *mon, char **devnamep)
{
- int dp_idx = -1;
- int mc_group = 0;
+ static struct vlog_rate_limit slow_rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ static const struct nl_policy rtnlgrp_link_policy[] = {
+ [IFLA_IFNAME] = { .type = NL_A_STRING },
+ [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
+ };
+ struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
+ struct ofpbuf *buf;
+ int error;
+
+ *devnamep = NULL;
+again:
+ error = nl_sock_recv(mon->sock, &buf, false);
+ switch (error) {
+ case 0:
+ if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
+ rtnlgrp_link_policy,
+ attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
+ VLOG_WARN_RL(&slow_rl, "received bad rtnl message");
+ error = ENOBUFS;
+ } else {
+ const char *devname = nl_attr_get_string(attrs[IFLA_IFNAME]);
+ bool for_us;
+
+ if (attrs[IFLA_MASTER]) {
+ uint32_t master_ifindex = nl_attr_get_u32(attrs[IFLA_MASTER]);
+ for_us = master_ifindex == mon->local_ifindex;
+ } else {
+ struct odp_port odp_port;
+ for_us = (dpif_port_query_by_name(mon->dpif, devname,
+ &odp_port) == 0);
+ }
- if (query_datapath(&dp_idx, &mc_group, name)) {
- return -1;
+ if (!for_us) {
+ /* Not for us, try again. */
+ ofpbuf_delete(buf);
+ goto again;
+ }
+ *devnamep = xstrdup(devname);
+ }
+ ofpbuf_delete(buf);
+ break;
+
+ case EAGAIN:
+ /* Nothing to do. */
+ break;
+
+ case ENOBUFS:
+ VLOG_WARN_RL(&slow_rl, "dpifmon socket overflowed");
+ break;
+
+ default:
+ VLOG_WARN_RL(&slow_rl, "error on dpifmon socket: %s", strerror(error));
+ break;
}
+ return error;
+}
+
+void
+dpifmon_run(struct dpifmon *mon UNUSED)
+{
+ /* Nothing to do in this implementation. */
+}
- return dp_idx;
+void
+dpifmon_wait(struct dpifmon *mon)
+{
+ nl_sock_wait(mon->sock, POLLIN);
}
+\f
+static int get_openflow_major(void);
+static int get_major(const char *target, int default_major);
-/* Sends the given 'command' to datapath 'dp', related to the local datapath
- * numbered 'dp_idx'. If 'arg' is nonnull, adds it to the command as the
- * datapath or port name attribute depending on the requested operation.
- * Returns 0 if successful, otherwise a positive errno value. */
static int
-send_mgmt_command(struct dpif *dp, int dp_idx, int command, const char *arg)
+lookup_minor(const char *name, unsigned int *minor)
{
- struct ofpbuf request, *reply;
- int retval;
+ struct ethtool_drvinfo drvinfo;
+ struct ifreq ifr;
+ int error;
+ int sock;
+
+ *minor = -1;
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno));
+ error = errno;
+ goto error;
+ }
- ofpbuf_init(&request, 0);
- nl_msg_put_genlmsghdr(&request, dp->sock, 32, openflow_family,
- NLM_F_REQUEST | NLM_F_ACK, command, 1);
- if (dp_idx != -1) {
- nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx);
+ memset(&ifr, 0, sizeof ifr);
+ strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
+ ifr.ifr_data = (caddr_t) &drvinfo;
+
+ memset(&drvinfo, 0, sizeof drvinfo);
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ if (ioctl(sock, SIOCETHTOOL, &ifr)) {
+ VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno));
+ error = errno;
+ goto error_close_sock;
+ }
+
+ if (strcmp(drvinfo.driver, "openflow")) {
+ VLOG_WARN("%s is not an openflow device", name);
+ error = EOPNOTSUPP;
+ goto error_close_sock;
}
- if (arg) {
- if ((command == DP_GENL_C_ADD_DP) || (command == DP_GENL_C_DEL_DP)) {
- nl_msg_put_string(&request, DP_GENL_A_DP_NAME, arg);
+
+ if (!isdigit(drvinfo.bus_info[0])) {
+ VLOG_WARN("%s ethtool info does not contain an openflow minor", name);
+ error = EPROTOTYPE;
+ goto error_close_sock;
+ }
+
+ *minor = atoi(drvinfo.bus_info);
+ close(sock);
+ return 0;
+
+error_close_sock:
+ close(sock);
+error:
+ return error;
+}
+
+static int
+make_openflow_device(unsigned int minor, char **fnp)
+{
+ dev_t dev = makedev(get_openflow_major(), minor);
+ const char dirname[] = "/dev/net";
+ struct stat s;
+ char fn[128];
+
+ *fnp = NULL;
+ sprintf(fn, "%s/dp%d", dirname, minor);
+ if (!stat(fn, &s)) {
+ if (!S_ISCHR(s.st_mode)) {
+ VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
+ fn);
+ } else if (s.st_rdev != dev) {
+ VLOG_WARN_RL(&error_rl,
+ "%s is device %u:%u instead of %u:%u, fixing",
+ fn, major(s.st_rdev), minor(s.st_rdev),
+ major(dev), minor(dev));
+ } else {
+ goto success;
+ }
+ if (unlink(fn)) {
+ VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
+ fn, strerror(errno));
+ return errno;
+ }
+ } else if (errno == ENOENT) {
+ if (stat(dirname, &s)) {
+ if (errno == ENOENT) {
+ if (mkdir(dirname, 0755)) {
+ VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
+ dirname, strerror(errno));
+ return errno;
+ }
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
+ dirname, strerror(errno));
+ return errno;
+ }
+ }
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
+ return errno;
+ }
+
+ /* The device needs to be created. */
+ if (mknod(fn, S_IFCHR | 0700, dev)) {
+ VLOG_WARN_RL(&error_rl,
+ "%s: creating character device %u:%u failed (%s)",
+ fn, major(dev), minor(dev), strerror(errno));
+ return errno;
+ }
+
+success:
+ *fnp = xstrdup(fn);
+ return 0;
+}
+
+
+static int
+get_openflow_major(void)
+{
+ static unsigned int openflow_major;
+ if (!openflow_major) {
+ enum { DEFAULT_MAJOR = 248 };
+ openflow_major = get_major("openflow", DEFAULT_MAJOR);
+ }
+ return openflow_major;
+}
+
+static int
+get_major(const char *target, int default_major)
+{
+ const char fn[] = "/proc/devices";
+ char line[128];
+ FILE *file;
+ int ln;
+
+ file = fopen(fn, "r");
+ if (!file) {
+ VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
+ goto error;
+ }
+
+ for (ln = 1; fgets(line, sizeof line, file); ln++) {
+ char name[64];
+ int major;
+
+ if (!strncmp(line, "Character", 9) || line[0] == '\0') {
+ /* Nothing to do. */
+ } else if (!strncmp(line, "Block", 5)) {
+ /* We only want character devices, so skip the rest of the file. */
+ break;
+ } else if (sscanf(line, "%d %63s", &major, name)) {
+ if (!strcmp(name, target)) {
+ fclose(file);
+ return major;
+ }
} else {
- nl_msg_put_string(&request, DP_GENL_A_PORTNAME, arg);
+ static bool warned;
+ if (!warned) {
+ VLOG_WARN("%s:%d: syntax error", fn, ln);
+ }
+ warned = true;
}
}
- retval = nl_sock_transact(dp->sock, &request, &reply);
- ofpbuf_uninit(&request);
- ofpbuf_delete(reply);
- return retval;
+ VLOG_ERR("%s: %s major not found (is the module loaded?), using "
+ "default major %d", fn, target, default_major);
+error:
+ VLOG_INFO("using default major %d for %s", default_major, target);
+ return default_major;
+}
+
+static int
+name_to_minor(const char *name, unsigned int *minor)
+{
+ if (!get_minor_from_name(name, minor)) {
+ return 0;
+ }
+ return lookup_minor(name, minor);
+}
+
+static int
+get_minor_from_name(const char *name, unsigned int *minor)
+{
+ if (!strncmp(name, "dp", 2) && isdigit(name[2])) {
+ *minor = atoi(name + 2);
+ return 0;
+ } else if (!strncmp(name, "nl:", 3) && isdigit(name[3])) {
+ /* This is for compatibility only and will be dropped. */
+ *minor = atoi(name + 3);
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
+static int
+open_by_minor(unsigned int minor, struct dpif *dpif)
+{
+ int error;
+ char *fn;
+ int fd;
+
+ error = make_openflow_device(minor, &fn);
+ if (error) {
+ return error;
+ }
+
+ fd = open(fn, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ error = errno;
+ VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
+ free(fn);
+ return error;
+ }
+
+ free(fn);
+ dpif->minor = minor;
+ dpif->fd = fd;
+ return 0;
+}
+\f
+static char *
+odp_actions_to_string(const union odp_action actions[], size_t n_actions)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ if (!n_actions) {
+ ds_put_cstr(&ds, "<no actions>");
+ } else {
+ const union odp_action *a;
+ for (a = actions; a < &actions[n_actions]; a++) {
+ if (a != actions) {
+ ds_put_char(&ds, ',');
+ }
+ switch (a->type) {
+ case ODPAT_OUTPUT:
+ ds_put_format(&ds, "out:%"PRIu16, a->output.port);
+ break;
+ case ODPAT_OUTPUT_GROUP:
+ ds_put_format(&ds, "group:%"PRIu16, a->output_group.group);
+ break;
+ case ODPAT_CONTROLLER:
+ ds_put_format(&ds, "controller(arg:%"PRIu32")",
+ a->controller.arg);
+ break;
+ case ODPAT_SET_VLAN_VID:
+ ds_put_format(&ds, "vid:%"PRIu16, ntohs(a->vlan_vid.vlan_vid));
+ break;
+ case ODPAT_SET_VLAN_PCP:
+ ds_put_format(&ds, "pri:%"PRIu8, a->vlan_pcp.vlan_pcp);
+ break;
+ case ODPAT_STRIP_VLAN:
+ ds_put_cstr(&ds, "strip-vlan");
+ break;
+ case ODPAT_SET_DL_SRC:
+ ds_put_format(&ds, "dl-src:"ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(a->dl_addr.dl_addr));
+ break;
+ case ODPAT_SET_DL_DST:
+ ds_put_format(&ds, "dl-dst:"ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(a->dl_addr.dl_addr));
+ break;
+ case ODPAT_SET_NW_SRC:
+ ds_put_format(&ds, "nw-src:"IP_FMT,
+ IP_ARGS(&a->nw_addr.nw_addr));
+ break;
+ case ODPAT_SET_NW_DST:
+ ds_put_format(&ds, "nw-dst:"IP_FMT,
+ IP_ARGS(&a->nw_addr.nw_addr));
+ break;
+ case ODPAT_SET_TP_SRC:
+ ds_put_format(&ds, "tp-src:%"PRIu16,
+ ntohs(a->tp_port.tp_port));
+ break;
+ case ODPAT_SET_TP_DST:
+ ds_put_format(&ds, "tp-dst:%"PRIu16,
+ ntohs(a->tp_port.tp_port));
+ break;
+ case ODPAT_SNAT:
+ ds_put_format(&ds, "snat:%"PRIu16, a->snat.port);
+ break;
+ default:
+ ds_put_format(&ds, "unknown(%"PRIu16")", a->type);
+ break;
+ }
+ }
+
+ }
+ return ds_cstr(&ds);
}
/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
- *
+ *
* We are making the OpenFlow specification and associated documentation
* (Software) available for public use and benefit with the expectation
* that others will use, modify and enhance the Software and contribute
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
+ *
* The name and trademarks of copyright holder(s) may NOT be used in
* advertising or publicity pertaining to the Software or any
* derivatives without specific, written prior permission.
/* Operations for the datapath running in the local kernel. The interface can
* generalize to multiple types of local datapaths, but the implementation only
- * supports the openflow kernel module via netlink. */
+ * supports the openflow kernel module. */
+#include "openflow/datapath-protocol.h"
#include <stdbool.h>
+#include <stddef.h>
#include <stdint.h>
struct ofpbuf;
-struct ofp_match;
/* A datapath interface. Opaque. */
-struct dpif
-{
- struct nl_sock *sock;
+struct dpif {
+ unsigned int minor; /* For use in error messages. */
+ int fd;
};
-int dpif_open(int subscribe_dp_idx, struct dpif *);
+int dpif_open(const char *name, struct dpif *);
+int dpif_create(const char *name, struct dpif *);
void dpif_close(struct dpif *);
-/* OpenFlow. */
-int dpif_recv_openflow(struct dpif *, int dp_idx, struct ofpbuf **, bool wait);
-int dpif_send_openflow(struct dpif *, int dp_idx, struct ofpbuf *);
+int dpif_delete(struct dpif *);
-/* Management functions. */
-int dpif_add_dp(struct dpif *, int dp_idx, const char *dp_name);
-int dpif_del_dp(struct dpif *, int dp_idx, const char *dp_name);
-int dpif_add_port(struct dpif *, int dp_idx, const char *netdev);
-int dpif_del_port(struct dpif *, int dp_idx, const char *netdev);
-int dpif_get_idx(const char *dp_name);
+int dpif_get_dp_stats(const struct dpif *, struct odp_stats *);
+int dpif_get_drop_frags(const struct dpif *, bool *drop_frags);
+int dpif_set_drop_frags(struct dpif *, bool drop_frags);
+
+int dpif_get_listen_mask(const struct dpif *, int *listen_mask);
+int dpif_set_listen_mask(struct dpif *, int listen_mask);
+
+int dpif_port_add(struct dpif *, const char *devname, uint16_t port_no);
+int dpif_port_del(struct dpif *, uint16_t port_no);
+int dpif_port_query_by_number(const struct dpif *, uint16_t port_no,
+ struct odp_port *);
+int dpif_port_query_by_name(const struct dpif *, const char *devname,
+ struct odp_port *);
+int dpif_port_list(const struct dpif *, struct odp_port **, size_t *n_ports);
+
+int dpif_port_group_set(struct dpif *, uint16_t group,
+ const uint16_t ports[], size_t n_ports);
+int dpif_port_group_get(const struct dpif *, uint16_t group,
+ uint16_t ports[], size_t n_ports, size_t *n_out);
+
+int dpif_flow_flush(struct dpif *);
+int dpif_flow_add(struct dpif *, struct odp_flow *);
+int dpif_flow_set_actions(struct dpif *, const struct odp_flow_key *,
+ const union odp_action *actions, size_t n_actions);
+int dpif_flow_del(struct dpif *, struct odp_flow *);
+int dpif_flow_query(const struct dpif *, struct odp_flow *);
+int dpif_flow_query_multiple(const struct dpif *, struct odp_flow[], size_t n);
+int dpif_flow_list(const struct dpif *, struct odp_flow[], size_t n,
+ size_t *n_out);
+int dpif_flow_list_all(const struct dpif *,
+ struct odp_flow **flowsp, size_t *np);
+
+int dpif_execute(struct dpif *, uint16_t in_port,
+ const union odp_action[], size_t n_actions,
+ const struct ofpbuf *);
+
+int dpif_snat_add_port(struct dpif *, const struct odp_snat_config *);
+int dpif_snat_del_port(struct dpif *, uint16_t port);
+
+int dpif_recv(struct dpif *, struct ofpbuf **);
+void dpif_recv_wait(struct dpif *);
+\f
+struct dpifmon;
+
+int dpifmon_create(const struct dpif *, struct dpifmon **);
+void dpifmon_destroy(struct dpifmon *);
+
+int dpifmon_poll(struct dpifmon *, char **devnamep);
+
+void dpifmon_run(struct dpifmon *);
+void dpifmon_wait(struct dpifmon *);
#endif /* dpif.h */
#include <string.h>
#include "hash.h"
#include "ofpbuf.h"
+#include "openflow/datapath-protocol.h"
#include "openflow/openflow.h"
#include "packets.h"
/* Returns 1 if 'packet' is an IP fragment, 0 otherwise. */
int
-flow_extract(struct ofpbuf *packet, uint16_t in_port, struct flow *flow)
+flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow)
{
struct ofpbuf b = *packet;
struct eth_header *eth;
}
void
-flow_to_match(const struct flow *flow, uint32_t wildcards, struct ofp_match *match)
+flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match)
{
match->wildcards = htonl(wildcards);
- match->in_port = htons(flow->in_port);
+ match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL
+ : flow->in_port);
match->dl_vlan = flow->dl_vlan;
memcpy(match->dl_src, flow->dl_src, ETH_ADDR_LEN);
memcpy(match->dl_dst, flow->dl_dst, ETH_ADDR_LEN);
}
void
-flow_from_match(struct flow *flow, uint32_t *wildcards,
+flow_from_match(flow_t *flow, uint32_t *wildcards,
const struct ofp_match *match)
{
if (wildcards) {
}
char *
-flow_to_string(const struct flow *flow)
+flow_to_string(const flow_t *flow)
{
return xasprintf("port%04x:vlan%d mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" "
"type%04x proto%"PRId8" ip"IP_FMT"->"IP_FMT" port%d->%d",
}
void
-flow_print(FILE *stream, const struct flow *flow)
+flow_print(FILE *stream, const flow_t *flow)
{
char *s = flow_to_string(flow);
fputs(s, stream);
#include <string.h>
#include "openflow/openflow.h"
#include "hash.h"
+#include "openflow/datapath-protocol.h"
+#include "openflow/openflow.h"
#include "util.h"
struct ofp_match;
struct ofpbuf;
-/* Identification data for a flow.
- All fields are in network byte order.
- In decreasing order by size, so that flow structures can be hashed or
- compared bytewise. */
-struct flow {
- uint32_t nw_src; /* IP source address. */
- uint32_t nw_dst; /* IP destination address. */
- uint16_t in_port; /* Input switch port. */
- uint16_t dl_vlan; /* Input VLAN. */
- uint16_t dl_type; /* Ethernet frame type. */
- uint16_t tp_src; /* TCP/UDP source port. */
- uint16_t tp_dst; /* TCP/UDP destination port. */
- uint8_t dl_src[6]; /* Ethernet source address. */
- uint8_t dl_dst[6]; /* Ethernet destination address. */
- uint8_t nw_proto; /* IP protocol. */
- uint8_t reserved; /* Pad to 32-bit alignment. */
-};
-BUILD_ASSERT_DECL(sizeof(struct flow) == 32);
+typedef struct odp_flow_key flow_t;
-int flow_extract(struct ofpbuf *, uint16_t in_port, struct flow *);
-void flow_to_match(const struct flow *, uint32_t wildcards,
- struct ofp_match *);
-void flow_from_match(struct flow *, uint32_t *wildcards,
- const struct ofp_match *);
-char *flow_to_string(const struct flow *);
-void flow_print(FILE *, const struct flow *);
-static inline int flow_compare(const struct flow *, const struct flow *);
-static inline bool flow_equal(const struct flow *, const struct flow *);
-static inline size_t flow_hash(const struct flow *, uint32_t basis);
+int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *);
+void flow_to_match(const flow_t *, uint32_t wildcards, struct ofp_match *);
+void flow_from_match(flow_t *, uint32_t *wildcards, const struct ofp_match *);
+char *flow_to_string(const flow_t *);
+void flow_print(FILE *, const flow_t *);
+static inline int flow_compare(const flow_t *, const flow_t *);
+static inline bool flow_equal(const flow_t *, const flow_t *);
+static inline size_t flow_hash(const flow_t *, uint32_t basis);
static inline int
-flow_compare(const struct flow *a, const struct flow *b)
+flow_compare(const flow_t *a, const flow_t *b)
{
return memcmp(a, b, sizeof *a);
}
static inline bool
-flow_equal(const struct flow *a, const struct flow *b)
+flow_equal(const flow_t *a, const flow_t *b)
{
return !flow_compare(a, b);
}
static inline size_t
-flow_hash(const struct flow *flow, uint32_t basis)
+flow_hash(const flow_t *flow, uint32_t basis)
{
BUILD_ASSERT_DECL(!(sizeof *flow % sizeof(uint32_t)));
return hash_words((const uint32_t *) flow,
size_t pkt_ofs, pkt_len;
struct ofpbuf pkt;
- struct flow flow;
+ flow_t flow;
/* Extract flow data from 'opi' into 'flow'. */
pkt_ofs = offsetof(struct ofp_packet_in, data);
}
if (sw->ml) {
- uint16_t learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0);
- if (may_send(sw, learned_port)) {
+ int learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0);
+ if (learned_port >= 0 && may_send(sw, learned_port)) {
out_port = learned_port;
}
}
#include "hash.h"
#include "list.h"
-#include "openflow/openflow.h"
#include "poll-loop.h"
#include "tag.h"
#include "timeval.h"
}
/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
- * frame destined for 'dst' should be sent, OFPP_FLOOD if unknown. */
-uint16_t
+ * frame destined for 'dst' should be sent, -1 if unknown. */
+int
mac_learning_lookup(const struct mac_learning *ml,
const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan)
{
}
/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
- * frame destined for 'dst' should be sent, OFPP_FLOOD if unknown.
+ * frame destined for 'dst' should be sent, -1 if unknown.
*
* Adds to '*tag' (which the caller must have initialized) the tag that should
* be attached to any flow created based on the return value, if any, to allow
* those flows to be revalidated when the MAC learning entry changes. */
-uint16_t
+int
mac_learning_lookup_tag(const struct mac_learning *ml,
const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
tag_type *tag)
{
if (eth_addr_is_multicast(dst)) {
- return OFPP_FLOOD;
+ return -1;
} else {
struct mac_entry *e = search_bucket(mac_table_bucket(ml, dst, vlan),
dst, vlan);
return e->port;
} else {
*tag |= make_unknown_mac_tag(ml, dst, vlan);
- return OFPP_FLOOD;
+ return -1;
}
}
}
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
tag_type mac_learning_learn(struct mac_learning *,
const uint8_t src[ETH_ADDR_LEN], uint16_t vlan,
uint16_t src_port);
-uint16_t mac_learning_lookup(const struct mac_learning *,
- const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan);
-uint16_t mac_learning_lookup_tag(const struct mac_learning *,
- const uint8_t dst[ETH_ADDR_LEN],
- uint16_t vlan, tag_type *tag);
+int mac_learning_lookup(const struct mac_learning *,
+ const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan);
+int mac_learning_lookup_tag(const struct mac_learning *,
+ const uint8_t dst[ETH_ADDR_LEN],
+ uint16_t vlan, tag_type *tag);
void mac_learning_flush(struct mac_learning *);
void mac_learning_run(struct mac_learning *, struct tag_set *);
void mac_learning_wait(struct mac_learning *);
ds_put_char(string, '\n');
if (verbosity > 0) {
- struct flow flow;
+ flow_t flow;
struct ofpbuf packet;
struct ofp_match match;
packet.data = (void *) op->data;
packet.size = data_len;
flow_extract(&packet, ntohs(op->in_port), &flow);
- match.wildcards = 0;
- match.in_port = flow.in_port;
- memcpy(match.dl_src, flow.dl_src, ETH_ADDR_LEN);
- memcpy(match.dl_dst, flow.dl_dst, ETH_ADDR_LEN);
- match.dl_vlan = flow.dl_vlan;
- match.dl_type = flow.dl_type;
- match.nw_proto = flow.nw_proto;
- match.pad = 0;
- match.nw_src = flow.nw_src;
- match.nw_dst = flow.nw_dst;
- match.tp_src = flow.tp_src;
- match.tp_dst = flow.tp_dst;
+ flow_to_match(&flow, 0, &match);
ofp_print_match(string, &match, verbosity);
ds_put_char(string, '\n');
}
ntohl(ofe->duration), ntohll(ofe->packet_count),
ntohll(ofe->byte_count));
}
-/* Pretty-print the NXT_FLOW_EXPIRED packet of 'len' bytes at 'oh' to 'string'
- * at the given 'verbosity' level. */
-static void
-nx_print_flow_end(struct ds *string, const void *oh, size_t len,
- int verbosity)
-{
- const struct nx_flow_end *nfe = oh;
-
- ds_put_cstr(string, "nx_flow_end: ");
-
- if (len < sizeof(*nfe)) {
- ds_put_format(string, " (***length=%zu < min_size=%zu***)\n",
- len, sizeof(*nfe));
- return;
- }
-
- ofp_print_match(string, &nfe->match, verbosity);
- ds_put_cstr(string, " reason=");
- switch (nfe->reason) {
- case NXFER_IDLE_TIMEOUT:
- ds_put_cstr(string, "idle");
- break;
- case NXFER_HARD_TIMEOUT:
- ds_put_cstr(string, "hard");
- break;
- case NXFER_DELETE:
- ds_put_cstr(string, "delete");
- break;
- case NXFER_EJECT:
- ds_put_cstr(string, "eject");
- break;
- default:
- ds_put_format(string, "**%"PRIu8"**", nfe->reason);
- break;
- }
- ds_put_format(string,
- " pri=%"PRIu16" init=%"PRIu64" used=%"PRIu64" end=%"PRIu64,
- nfe->match.wildcards ? ntohs(nfe->priority) : (uint16_t)-1,
- ntohll(nfe->init_time), ntohll(nfe->used_time),
- ntohll(nfe->end_time));
- ds_put_format(string,
- " tflags=0x%x tos=0x%x pkts=%"PRIu64" bytes=%"PRIu64"\n",
- nfe->tcp_flags, nfe->ip_tos, ntohll(nfe->packet_count),
- ntohll(nfe->byte_count));
-}
-
-static void
-nx_print_msg(struct ds *string, const void *oh, size_t len, int verbosity)
-{
- const struct nicira_header *nh = oh;
-
- switch(ntohl(nh->subtype))
- {
- case NXT_FLOW_END:
- nx_print_flow_end(string, oh, len, verbosity);
- return;
- }
-}
-
static void
ofp_print_port_mod(struct ds *string, const void *oh, size_t len UNUSED,
}
}
-static void
-ofp_vendor(struct ds *string, const void *oh, size_t len, int verbosity)
-{
- const struct ofp_vendor_header *vh = oh;
-
- switch(ntohl(vh->vendor))
- {
- case NX_VENDOR_ID:
- return nx_print_msg(string, oh, len, verbosity);
- break;
- }
-}
-
struct openflow_packet {
uint8_t type;
const char *name;
OFPT_VENDOR,
"vendor",
sizeof (struct ofp_vendor_header),
- ofp_vendor,
+ NULL,
},
};
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "vconn.h"
-#include <arpa/inet.h>
-#include <assert.h>
-#include <errno.h>
-#include <netdb.h>
-#include <poll.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include "dpif.h"
-#include "netlink.h"
-#include "ofpbuf.h"
-#include "openflow/openflow-netlink.h"
-#include "openflow/openflow.h"
-#include "poll-loop.h"
-#include "socket-util.h"
-#include "util.h"
-#include "vconn-provider.h"
-
-#include "vlog.h"
-#define THIS_MODULE VLM_VCONN_NETLINK
-
-struct netlink_vconn
-{
- struct vconn vconn;
- struct dpif dp;
- int dp_idx;
-};
-
-static struct netlink_vconn *
-netlink_vconn_cast(struct vconn *vconn)
-{
- vconn_assert_class(vconn, &netlink_vconn_class);
- return CONTAINER_OF(vconn, struct netlink_vconn, vconn);
-}
-
-static int
-netlink_open(const char *name, char *suffix, struct vconn **vconnp)
-{
- struct netlink_vconn *netlink;
- int subscribe;
- int dp_idx;
- int retval;
-
- subscribe = 1;
- if (sscanf(suffix, "%d:%d", &dp_idx, &subscribe) < 1) {
- ofp_error(0, "%s: syntax error", name);
- return EAFNOSUPPORT;
- }
-
- netlink = xmalloc(sizeof *netlink);
- vconn_init(&netlink->vconn, &netlink_vconn_class, 0, 0, name, true);
- retval = dpif_open(subscribe ? dp_idx : -1, &netlink->dp);
- netlink->dp_idx = dp_idx;
- if (retval) {
- free(netlink);
- *vconnp = NULL;
- return retval;
- }
- *vconnp = &netlink->vconn;
- return 0;
-}
-
-static void
-netlink_close(struct vconn *vconn)
-{
- struct netlink_vconn *netlink = netlink_vconn_cast(vconn);
- dpif_close(&netlink->dp);
- free(netlink);
-}
-
-static int
-netlink_recv(struct vconn *vconn, struct ofpbuf **bufferp)
-{
- struct netlink_vconn *netlink = netlink_vconn_cast(vconn);
- return dpif_recv_openflow(&netlink->dp, netlink->dp_idx, bufferp, false);
-}
-
-static int
-netlink_send(struct vconn *vconn, struct ofpbuf *buffer)
-{
- struct netlink_vconn *netlink = netlink_vconn_cast(vconn);
- int retval = dpif_send_openflow(&netlink->dp, netlink->dp_idx, buffer);
- if (!retval) {
- ofpbuf_delete(buffer);
- }
- return retval;
-}
-
-static void
-netlink_wait(struct vconn *vconn, enum vconn_wait_type wait)
-{
- struct netlink_vconn *netlink = netlink_vconn_cast(vconn);
- short int events = 0;
- switch (wait) {
- case WAIT_CONNECT:
- NOT_REACHED();
-
- case WAIT_RECV:
- events = POLLIN;
- break;
-
- case WAIT_SEND:
- events = 0;
- break;
-
- default:
- NOT_REACHED();
- }
- nl_sock_wait(netlink->dp.sock, events);
-}
-
-struct vconn_class netlink_vconn_class = {
- "nl", /* name */
- netlink_open, /* open */
- netlink_close, /* close */
- NULL, /* connect */
- netlink_recv, /* recv */
- netlink_send, /* send */
- netlink_wait, /* wait */
-};
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
extern struct vconn_class ssl_vconn_class;
extern struct pvconn_class pssl_pvconn_class;
#endif
-#ifdef HAVE_NETLINK
-extern struct vconn_class netlink_vconn_class;
-#endif
#endif /* vconn-provider.h */
static struct vconn_class *vconn_classes[] = {
&tcp_vconn_class,
&unix_vconn_class,
-#ifdef HAVE_NETLINK
- &netlink_vconn_class,
-#endif
#ifdef HAVE_OPENSSL
&ssl_vconn_class,
#endif
}
struct ofpbuf *
-make_flow_mod(uint16_t command, const struct flow *flow, size_t actions_len)
+make_flow_mod(uint16_t command, const flow_t *flow, size_t actions_len)
{
struct ofp_flow_mod *ofm;
size_t size = sizeof *ofm + actions_len;
ofm->header.type = OFPT_FLOW_MOD;
ofm->header.length = htons(size);
ofm->match.wildcards = htonl(0);
- ofm->match.in_port = flow->in_port;
+ ofm->match.in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL
+ : flow->in_port);
memcpy(ofm->match.dl_src, flow->dl_src, sizeof ofm->match.dl_src);
memcpy(ofm->match.dl_dst, flow->dl_dst, sizeof ofm->match.dl_dst);
ofm->match.dl_vlan = flow->dl_vlan;
}
struct ofpbuf *
-make_add_flow(const struct flow *flow, uint32_t buffer_id,
+make_add_flow(const flow_t *flow, uint32_t buffer_id,
uint16_t idle_timeout, size_t actions_len)
{
struct ofpbuf *out = make_flow_mod(OFPFC_ADD, flow, actions_len);
}
struct ofpbuf *
-make_del_flow(const struct flow *flow)
+make_del_flow(const flow_t *flow)
{
struct ofpbuf *out = make_flow_mod(OFPFC_DELETE_STRICT, flow, 0);
struct ofp_flow_mod *ofm = out->data;
}
struct ofpbuf *
-make_add_simple_flow(const struct flow *flow,
+make_add_simple_flow(const flow_t *flow,
uint32_t buffer_id, uint16_t out_port,
uint16_t idle_timeout)
{
opo->header.length = htons(size);
opo->header.xid = htonl(0);
opo->buffer_id = htonl(buffer_id);
- opo->in_port = htons(in_port);
+ opo->in_port = htons(in_port == ODPP_LOCAL ? OFPP_LOCAL : in_port);
opo->actions_len = htons(actions_len);
ofpbuf_put(out, actions, actions_len);
if (packet) {
#include <stddef.h>
#include <stdint.h>
+#include "flow.h"
+
struct ofpbuf;
-struct flow;
struct ofp_action_header;
struct ofp_header;
struct ofp_match;
void *put_openflow_xid(size_t openflow_len, uint8_t type, uint32_t xid,
struct ofpbuf *);
void update_openflow_length(struct ofpbuf *);
-struct ofpbuf *make_flow_mod(uint16_t command, const struct flow *,
+struct ofpbuf *make_flow_mod(uint16_t command, const flow_t *,
size_t actions_len);
-struct ofpbuf *make_add_flow(const struct flow *, uint32_t buffer_id,
+struct ofpbuf *make_add_flow(const flow_t *, uint32_t buffer_id,
uint16_t max_idle, size_t actions_len);
-struct ofpbuf *make_del_flow(const struct flow *);
-struct ofpbuf *make_add_simple_flow(const struct flow *,
+struct ofpbuf *make_del_flow(const flow_t *);
+struct ofpbuf *make_add_simple_flow(const flow_t *,
uint32_t buffer_id, uint16_t out_port,
uint16_t max_idle);
struct ofpbuf *make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id,
VLOG_MODULE(fail_open)
VLOG_MODULE(fault)
VLOG_MODULE(flow)
-VLOG_MODULE(flow_end)
VLOG_MODULE(in_band)
VLOG_MODULE(leak_checker)
VLOG_MODULE(learning_switch)
VLOG_MODULE(mac_learning)
VLOG_MODULE(netdev)
+VLOG_MODULE(netflow)
VLOG_MODULE(netlink)
VLOG_MODULE(ofp_discover)
+VLOG_MODULE(ofproto)
+VLOG_MODULE(pktbuf)
VLOG_MODULE(pcap)
VLOG_MODULE(poll_loop)
VLOG_MODULE(port_watcher)
VLOG_MODULE(terminal)
VLOG_MODULE(socket_util)
VLOG_MODULE(vconn_fd)
-VLOG_MODULE(vconn_netlink)
VLOG_MODULE(vconn_tcp)
VLOG_MODULE(vconn_ssl)
VLOG_MODULE(vconn_stream)
VLOG_MODULE(vconn)
VLOG_MODULE(vlog)
VLOG_MODULE(vlog_socket)
+VLOG_MODULE(wcelim)
VLOG_MODULE(vswitchd)
#ifdef HAVE_EXT
# -*- autoconf -*-
-# Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+# Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
# Junior University
#
# We are making the OpenFlow specification and associated documentation
dnl link against lib/libopenflow.a.
AC_DEFUN([OFP_CHECK_LIBOPENFLOW],
[AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+ AC_REQUIRE([AC_C_BIGENDIAN])
AC_REQUIRE([OFP_CHECK_NDEBUG])
AC_REQUIRE([OFP_CHECK_NETLINK])
AC_REQUIRE([OFP_CHECK_OPENSSL])
bin_PROGRAMS += secchan/secchan
man_MANS += secchan/secchan.8
+# secchan/stp-secchan.c \
+# secchan/stp-secchan.h
+#
+
secchan_secchan_SOURCES = \
secchan/discovery.c \
secchan/discovery.h \
secchan/executer.h \
secchan/fail-open.c \
secchan/fail-open.h \
- secchan/flow-end.c \
- secchan/flow-end.h \
secchan/in-band.c \
secchan/in-band.h \
+ secchan/netflow.c \
secchan/netflow.h \
- secchan/port-watcher.c \
- secchan/port-watcher.h \
- secchan/ratelimit.c \
- secchan/ratelimit.h \
+ secchan/ofproto.c \
+ secchan/ofproto.h \
+ secchan/pktbuf.c \
+ secchan/pktbuf.h \
+ secchan/pinsched.c \
+ secchan/pinsched.h \
secchan/secchan.c \
secchan/secchan.h \
secchan/status.c \
- secchan/status.h \
- secchan/stp-secchan.c \
- secchan/stp-secchan.h
-if SUPPORT_SNAT
-secchan_secchan_SOURCES += \
- secchan/snat.c \
- secchan/snat.h
-endif
+ secchan/status.h
+#if SUPPORT_SNAT
+#secchan_secchan_SOURCES += \
+# secchan/snat.c \
+# secchan/snat.h
+#endif
secchan_secchan_LDADD = lib/libopenflow.a $(FAULT_LIBS) $(SSL_LIBS)
EXTRA_DIST += secchan/secchan.8.in
#include <config.h>
#include "discovery.h"
#include <inttypes.h>
+#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include "dhcp-client.h"
#include "dhcp.h"
+#include "dpif.h"
#include "netdev.h"
#include "openflow/openflow.h"
#include "packets.h"
-#include "port-watcher.h"
#include "secchan.h"
#include "status.h"
#define THIS_MODULE VLM_discovery
#include "vlog.h"
-struct discovery
-{
- const struct settings *s;
+struct discovery {
+ const char *accept_controller_re;
+ bool update_resolv_conf;
+ regex_t accept_controller_regex;
struct dhclient *dhcp;
int n_changes;
};
{
struct discovery *d = d_;
- status_reply_put(sr, "accept-remote=%s", d->s->accept_controller_re);
+ status_reply_put(sr, "accept-remote=%s", d->accept_controller_re);
status_reply_put(sr, "n-changes=%d", d->n_changes);
if (d->dhcp) {
status_reply_put(sr, "state=%s", dhclient_get_state(d->dhcp));
}
}
-static void
-discovery_local_port_cb(const struct ofp_phy_port *port, void *d_)
-{
- struct discovery *d = d_;
- if (port) {
- char name[OFP_MAX_PORT_NAME_LEN + 1];
- struct netdev *netdev;
- int retval;
-
- /* Check that this was really a change. */
- get_port_name(port, name, sizeof name);
- if (d->dhcp && !strcmp(netdev_get_name(dhclient_get_netdev(d->dhcp)),
- name)) {
- return;
- }
-
- /* Destroy current DHCP client. */
- dhclient_destroy(d->dhcp);
- d->dhcp = NULL;
-
- /* Bring local network device up. */
- retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev);
- if (retval) {
- VLOG_ERR("Could not open %s device, discovery disabled: %s",
- name, strerror(retval));
- return;
- }
- retval = netdev_turn_flags_on(netdev, NETDEV_UP, true);
- if (retval) {
- VLOG_ERR("Could not bring %s device up, discovery disabled: %s",
- name, strerror(retval));
- return;
- }
- netdev_close(netdev);
-
- /* Initialize DHCP client. */
- retval = dhclient_create(name, modify_dhcp_request,
- validate_dhcp_offer, (void *) d->s, &d->dhcp);
- if (retval) {
- VLOG_ERR("Failed to initialize DHCP client, "
- "discovery disabled: %s", strerror(retval));
- return;
- }
- dhclient_set_max_timeout(d->dhcp, 3);
- dhclient_init(d->dhcp, 0);
- } else {
- dhclient_destroy(d->dhcp);
- d->dhcp = NULL;
- }
-}
-
-
struct discovery *
-discovery_init(const struct settings *s, struct port_watcher *pw,
- struct switch_status *ss)
+discovery_create(const char *accept_controller_re, bool update_resolv_conf,
+ struct dpif *dpif, struct switch_status *ss)
{
struct discovery *d;
+ struct odp_port port;
+ int error;
+
+ d = xcalloc(1, sizeof *d);
+
+ /* Controller regular expression. */
+ d->accept_controller_re = accept_controller_re;
+ error = regcomp(&d->accept_controller_regex, accept_controller_re,
+ REG_NOSUB | REG_EXTENDED);
+ if (error) {
+ size_t length = regerror(error, &d->accept_controller_regex, NULL, 0);
+ char *buffer = xmalloc(length);
+ regerror(error, &d->accept_controller_regex, buffer, length);
+ ofp_fatal(0, "%s: %s", accept_controller_re, buffer);
+ }
+ d->update_resolv_conf = update_resolv_conf;
- d = xmalloc(sizeof *d);
- d->s = s;
- d->dhcp = NULL;
- d->n_changes = 0;
+ /* Initialize DHCP client. */
+ error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port);
+ if (error) {
+ ofp_fatal(error, "failed to query datapath local port");
+ }
+ error = dhclient_create(port.devname, modify_dhcp_request,
+ validate_dhcp_offer, d, &d->dhcp);
+ if (error) {
+ ofp_fatal(error, "failed to initialize DHCP client");
+ }
+ dhclient_set_max_timeout(d->dhcp, 3);
+ dhclient_init(d->dhcp, 0);
switch_status_register_category(ss, "discovery", discovery_status_cb, d);
- port_watcher_register_local_port_callback(pw, discovery_local_port_cb, d);
return d;
}
}
dhclient_configure_netdev(d->dhcp);
- if (d->s->update_resolv_conf) {
+ if (d->update_resolv_conf) {
dhclient_update_resolv_conf(d->dhcp);
}
}
static bool
-validate_dhcp_offer(const struct dhcp_msg *msg, void *s_)
+validate_dhcp_offer(const struct dhcp_msg *msg, void *d_)
{
- const struct settings *s = s_;
+ const struct discovery *d = d_;
char *vconn_name;
bool accept;
VLOG_WARN_RL(&rl, "rejecting DHCP offer missing controller vconn");
return false;
}
- accept = !regexec(&s->accept_controller_regex, vconn_name, 0, NULL, 0);
+ accept = !regexec(&d->accept_controller_regex, vconn_name, 0, NULL,
+ 0);
if (!accept) {
VLOG_WARN_RL(&rl, "rejecting controller vconn that fails to match %s",
- s->accept_controller_re);
+ d->accept_controller_re);
}
free(vconn_name);
return accept;
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#include <stdbool.h>
+struct dpif;
struct settings;
-struct port_watcher;
struct switch_status;
-struct discovery *discovery_init(const struct settings *,
- struct port_watcher *,
- struct switch_status *);
+struct discovery *discovery_create(const char *accept_controller_re,
+ bool update_resolv_conf,
+ struct dpif *, struct switch_status *);
void discovery_question_connectivity(struct discovery *);
bool discovery_run(struct discovery *, char **controller_name);
void discovery_wait(struct discovery *);
pid_t pid; /* Child's process ID. */
/* For sending a reply to the controller when the child dies. */
- struct relay *relay;
+ struct rconn *rconn;
uint32_t xid; /* Transaction ID used by controller. */
/* We read up to MAX_OUTPUT bytes of output and send them back to the
};
struct executer {
- const struct settings *s;
+ /* Settings. */
+ const char *command_acl; /* Command white/blacklist, as shell globs. */
+ const char *command_dir; /* Directory that contains commands. */
/* Children. */
struct child children[MAX_CHILDREN];
int null_fd; /* FD for /dev/null. */
};
-static void send_child_status(struct relay *, uint32_t xid, uint32_t status,
+static void send_child_status(struct rconn *, uint32_t xid, uint32_t status,
const void *data, size_t size);
-static void send_child_message(struct relay *, uint32_t xid, uint32_t status,
+static void send_child_message(struct rconn *, uint32_t xid, uint32_t status,
const char *message);
/* Returns true if 'cmd' is allowed by 'acl', which is a command-separated
return allowed && !denied;
}
-static bool
-executer_remote_packet_cb(struct relay *r, void *e_)
+int
+executer_handle_request(struct executer *e, struct rconn *rconn,
+ struct nicira_header *request)
{
- struct executer *e = e_;
- struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf;
- struct nicira_header *request;
char **argv;
char *args;
char *exec_file = NULL;
pid_t pid;
int output_fds[2];
- /* Check for NXT_COMMAND_REQUEST vendor extension. */
- if (msg->size < sizeof(struct nicira_header)) {
- return false;
- }
- request = msg->data;
- if (request->header.type != OFPT_VENDOR
- || request->vendor != htonl(NX_VENDOR_ID)
- || request->subtype != htonl(NXT_COMMAND_REQUEST)) {
- return false;
- }
-
/* Verify limit on children not exceeded.
* XXX should probably kill children when the connection drops? */
if (e->n_children >= MAX_CHILDREN) {
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"too many child processes");
- VLOG_WARN("limit of %d child processes reached, dropping request",
- MAX_CHILDREN);
- return false;
+ return 0;
}
/* Copy argument buffer, adding a null terminator at the end. Now every
* argument is null-terminated, instead of being merely null-delimited. */
- args_size = msg->size - sizeof *request;
+ args_size = ntohs(request->header.length) - sizeof *request;
args = xmemdup0((const void *) (request + 1), args_size);
/* Count arguments. */
argv[argc] = NULL;
/* Check permissions. */
- if (!executer_is_permitted(e->s->command_acl, argv[0])) {
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ if (!executer_is_permitted(e->command_acl, argv[0])) {
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"command not allowed");
goto done;
}
/* Find the executable. */
- exec_file = xasprintf("%s/%s", e->s->command_dir, argv[0]);
+ exec_file = xasprintf("%s/%s", e->command_dir, argv[0]);
if (stat(exec_file, &s)) {
VLOG_WARN("failed to stat \"%s\": %s", exec_file, strerror(errno));
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"command not allowed");
goto done;
}
if (!S_ISREG(s.st_mode)) {
VLOG_WARN("\"%s\" is not a regular file", exec_file);
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"command not allowed");
goto done;
}
/* Arrange to capture output. */
if (pipe(output_fds)) {
VLOG_WARN("pipe failed: %s", strerror(errno));
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"internal error (pipe)");
goto done;
}
for (i = 3; i < max_fds; i++) {
close(i);
}
- if (chdir(e->s->command_dir)) {
+ if (chdir(e->command_dir)) {
printf("could not change directory to \"%s\": %s",
- e->s->command_dir, strerror(errno));
+ e->command_dir, strerror(errno));
exit(EXIT_FAILURE);
}
execv(argv[0], argv);
struct child *child;
VLOG_INFO("started \"%s\" subprocess", argv[0]);
- send_child_status(r, request->header.xid, NXT_STATUS_STARTED, NULL, 0);
+ send_child_status(rconn, request->header.xid, NXT_STATUS_STARTED,
+ NULL, 0);
child = &e->children[e->n_children++];
child->name = xstrdup(argv[0]);
child->pid = pid;
- child->relay = r;
+ child->rconn = rconn;
child->xid = request->header.xid;
child->output_fd = output_fds[0];
child->output = xmalloc(MAX_OUTPUT);
close(output_fds[1]);
} else {
VLOG_WARN("fork failed: %s", strerror(errno));
- send_child_message(r, request->header.xid, NXT_STATUS_ERROR,
+ send_child_message(rconn, request->header.xid, NXT_STATUS_ERROR,
"internal error (fork)");
close(output_fds[0]);
close(output_fds[1]);
free(exec_file);
free(args);
free(argv);
- return true;
+ return 0;
}
static void
-send_child_status(struct relay *relay, uint32_t xid, uint32_t status,
+send_child_status(struct rconn *rconn, uint32_t xid, uint32_t status,
const void *data, size_t size)
{
- if (relay) {
+ if (rconn) {
struct nx_command_reply *r;
struct ofpbuf *buffer;
r->status = htonl(status);
ofpbuf_put(buffer, data, size);
update_openflow_length(buffer);
- if (rconn_send(relay->halves[HALF_REMOTE].rconn, buffer, NULL)) {
+ if (rconn_send(rconn, buffer, NULL)) {
ofpbuf_delete(buffer);
}
}
}
static void
-send_child_message(struct relay *relay, uint32_t xid, uint32_t status,
+send_child_message(struct rconn *rconn, uint32_t xid, uint32_t status,
const char *message)
{
- send_child_status(relay, xid, status, message, strlen(message));
+ send_child_status(rconn, xid, status, message, strlen(message));
}
/* 'child' died with 'status' as its return code. Deal with it. */
if (WCOREDUMP(status)) {
ofp_status |= NXT_STATUS_COREDUMP;
}
- send_child_status(child->relay, child->xid, ofp_status,
+ send_child_status(child->rconn, child->xid, ofp_status,
child->output, child->output_size);
}
child->output_fd = -1;
}
-static void
-executer_periodic_cb(void *e_)
+void
+executer_run(struct executer *e)
{
- struct executer *e = e_;
char buffer[MAX_CHILDREN];
size_t i;
}
-static void
-executer_wait_cb(void *e_)
+void
+executer_wait(struct executer *e)
{
- struct executer *e = e_;
if (e->n_children) {
size_t i;
}
}
-static void
-executer_closing_cb(struct relay *r, void *e_)
+void
+executer_rconn_closing(struct executer *e, struct rconn *rconn)
{
- struct executer *e = e_;
size_t i;
/* If any of our children was connected to 'r', then disconnect it so we
* later.
* XXX kill the children started by 'r'? */
for (i = 0; i < e->n_children; i++) {
- if (e->children[i].relay == r) {
- e->children[i].relay = NULL;
+ if (e->children[i].rconn == rconn) {
+ e->children[i].rconn = NULL;
}
}
}
write(child_fd, "", 1);
}
-static const struct hook_class executer_hook_class = {
- NULL, /* local_packet_cb */
- executer_remote_packet_cb, /* remote_packet_cb */
- executer_periodic_cb, /* periodic_cb */
- executer_wait_cb, /* wait_cb */
- executer_closing_cb, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-executer_start(struct secchan *secchan, const struct settings *settings)
+struct executer *
+executer_create(const char *command_acl, const char *command_dir)
{
struct executer *e;
struct sigaction sa;
ofp_fatal(errno, "sigaction(SIGCHLD) failed");
}
- /* Add hook. */
e = xcalloc(1, sizeof *e);
- e->s = settings;
+ e->command_acl = command_acl;
+ e->command_dir = command_dir;
e->n_children = 0;
e->wait_fd = fds[0];
e->null_fd = null_fd;
- add_hook(secchan, &executer_hook_class, e);
+ return e;
}
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#ifndef EXECUTER_H
#define EXECUTER_H 1
-struct secchan;
-struct settings;
+struct nicira_header;
+struct rconn;
-void executer_start(struct secchan *, const struct settings *);
+struct executer *executer_create(const char *command_acl,
+ const char *command_dir);
+void executer_run(struct executer *);
+void executer_wait(struct executer *);
+void executer_rconn_closing(struct executer *, struct rconn *);
+int executer_handle_request(struct executer *, struct rconn *,
+ struct nicira_header *);
#endif /* executer.h */
#include <config.h>
#include "fail-open.h"
-#include <arpa/inet.h>
-#include <stddef.h>
-#include <string.h>
-#include "learning-switch.h"
-#include "netdev.h"
-#include "packets.h"
-#include "port-watcher.h"
+#include <inttypes.h>
+#include "flow.h"
+#include "mac-learning.h"
+#include "ofproto.h"
#include "rconn.h"
-#include "secchan.h"
#include "status.h"
-#include "stp-secchan.h"
#include "timeval.h"
#define THIS_MODULE VLM_fail_open
#include "vlog.h"
-struct fail_open_data {
- const struct settings *s;
- struct rconn *local_rconn;
- struct rconn *remote_rconn;
- struct lswitch *lswitch;
+struct fail_open {
+ struct rconn *controller;
+ int trigger_duration;
int last_disconn_secs;
- time_t boot_deadline;
+ struct mac_learning *mac_learning;
};
/* Causes 'r' to enter or leave fail-open mode, if appropriate. */
-static void
-fail_open_periodic_cb(void *fail_open_)
-{
- struct fail_open_data *fail_open = fail_open_;
- int disconn_secs;
- bool open;
- if (time_now() < fail_open->boot_deadline) {
- return;
- }
- disconn_secs = rconn_failure_duration(fail_open->remote_rconn);
- open = disconn_secs >= fail_open->s->probe_interval * 3;
- if (open != (fail_open->lswitch != NULL)) {
+void
+fail_open_run(struct fail_open *fo)
+{
+ int disconn_secs = rconn_failure_duration(fo->controller);
+ bool open = disconn_secs >= fo->trigger_duration;
+ if (open != (fo->mac_learning != NULL)) {
if (!open) {
VLOG_WARN("No longer in fail-open mode");
- lswitch_destroy(fail_open->lswitch);
- fail_open->lswitch = NULL;
+ mac_learning_destroy(fo->mac_learning);
+ fo->mac_learning = NULL;
} else {
VLOG_WARN("Could not connect to controller for %d seconds, "
"failing open", disconn_secs);
- fail_open->lswitch = lswitch_create(fail_open->local_rconn, true,
- fail_open->s->max_idle);
- fail_open->last_disconn_secs = disconn_secs;
+ fo->mac_learning = mac_learning_create();
+ fo->last_disconn_secs = disconn_secs;
}
- } else if (open && disconn_secs > fail_open->last_disconn_secs + 60) {
+ } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
"from controller", disconn_secs);
- fail_open->last_disconn_secs = disconn_secs;
+ fo->last_disconn_secs = disconn_secs;
}
- if (fail_open->lswitch) {
- lswitch_run(fail_open->lswitch, fail_open->local_rconn);
+ if (fo->mac_learning) {
+ mac_learning_run(fo->mac_learning, NULL);
}
}
-static void
-fail_open_wait_cb(void *fail_open_)
+void
+fail_open_wait(struct fail_open *fo)
{
- struct fail_open_data *fail_open = fail_open_;
- if (fail_open->lswitch) {
- lswitch_wait(fail_open->lswitch);
+ if (fo->mac_learning) {
+ mac_learning_wait(fo->mac_learning);
}
}
-static bool
-fail_open_local_packet_cb(struct relay *r, void *fail_open_)
+bool
+fail_open_handle_flow_miss(struct fail_open *fo, struct ofproto *ofproto,
+ uint16_t in_port, const flow_t *flow,
+ const struct ofpbuf *payload)
{
- struct fail_open_data *fail_open = fail_open_;
- if (rconn_is_connected(fail_open->remote_rconn) || !fail_open->lswitch) {
+ /* -1 (FLOOD) is coincidentally the value returned by mac_learning_lookup()
+ * when it doesn't have a entry for that address. */
+ enum { FLOOD = -1, DROP = -2 };
+ union ofp_action action;
+ int out_port;
+
+ if (rconn_is_connected(fo->controller) || !fo->mac_learning) {
return false;
+ }
+
+ if (mac_learning_learn(fo->mac_learning, flow->dl_src, 0, in_port)) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
+ VLOG_DBG_RL(&rl, "learned that "ETH_ADDR_FMT" is on port %"PRIu16,
+ ETH_ADDR_ARGS(flow->dl_src), in_port);
+ }
+
+ out_port = (eth_addr_is_reserved(flow->dl_src) ? DROP
+ : mac_learning_lookup(fo->mac_learning, flow->dl_dst, 0));
+ memset(&action, 0, sizeof action);
+ action.output.type = htons(OFPAT_OUTPUT);
+ action.output.len = htons(sizeof action);
+ if (in_port == out_port || out_port == DROP) {
+ /* Set up a flow to drop packets. */
+ ofproto_setup_exact_flow(ofproto, flow, NULL, 0, NULL);
+ } else if (out_port != FLOOD) {
+ /* The output port is known, so add a new flow. */
+ action.output.port = htons(out_port);
+ ofproto_setup_exact_flow(ofproto, flow, &action, 1, payload);
} else {
- lswitch_process_packet(fail_open->lswitch, fail_open->local_rconn,
- r->halves[HALF_LOCAL].rxbuf);
- rconn_run(fail_open->local_rconn);
- return true;
+ /* We don't know that MAC. Send along the packet without setting up a
+ * flow. */
+ action.output.port = htons(OFPP_FLOOD);
+ ofproto_send_packet(ofproto, flow, &action, 1, payload);
}
+ return true;
}
static void
-fail_open_status_cb(struct status_reply *sr, void *fail_open_)
+fail_open_status_cb(struct status_reply *sr, void *fo_)
{
- struct fail_open_data *fail_open = fail_open_;
- const struct settings *s = fail_open->s;
- int trigger_duration = s->probe_interval * 3;
- int cur_duration = rconn_failure_duration(fail_open->remote_rconn);
+ struct fail_open *fo = fo_;
+ int cur_duration = rconn_failure_duration(fo->controller);
- status_reply_put(sr, "trigger-duration=%d", trigger_duration);
+ status_reply_put(sr, "trigger-duration=%d", fo->trigger_duration);
status_reply_put(sr, "current-duration=%d", cur_duration);
status_reply_put(sr, "triggered=%s",
- cur_duration >= trigger_duration ? "true" : "false");
- status_reply_put(sr, "max-idle=%d", s->max_idle);
+ cur_duration >= fo->trigger_duration ? "true" : "false");
}
-static const struct hook_class fail_open_hook_class = {
- fail_open_local_packet_cb, /* local_packet_cb */
- NULL, /* remote_packet_cb */
- fail_open_periodic_cb, /* periodic_cb */
- fail_open_wait_cb, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-fail_open_start(struct secchan *secchan, const struct settings *s,
- struct switch_status *ss,
- struct rconn *local_rconn, struct rconn *remote_rconn)
+struct fail_open *
+fail_open_create(int trigger_duration, struct switch_status *switch_status,
+ struct rconn *controller)
{
- struct fail_open_data *fail_open = xmalloc(sizeof *fail_open);
- fail_open->s = s;
- fail_open->local_rconn = local_rconn;
- fail_open->remote_rconn = remote_rconn;
- fail_open->lswitch = NULL;
- fail_open->boot_deadline = time_now() + s->probe_interval * 3;
- if (s->enable_stp) {
- fail_open->boot_deadline += STP_EXTRA_BOOT_TIME;
- }
- switch_status_register_category(ss, "fail-open",
- fail_open_status_cb, fail_open);
- add_hook(secchan, &fail_open_hook_class, fail_open);
+ struct fail_open *fo = xmalloc(sizeof *fo);
+ fo->controller = controller;
+ fo->trigger_duration = trigger_duration;
+ fo->last_disconn_secs = 0;
+ fo->mac_learning = NULL;
+ switch_status_register_category(switch_status, "fail-open",
+ fail_open_status_cb, fo);
+ return fo;
}
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#ifndef FAIL_OPEN_H
#define FAIL_OPEN_H 1
+#include <stdbool.h>
+#include <stdint.h>
+#include "flow.h"
+
+struct fail_open;
+struct ofproto;
struct rconn;
-struct secchan;
-struct settings;
struct switch_status;
-void fail_open_start(struct secchan *, const struct settings *,
- struct switch_status *,
- struct rconn *local, struct rconn *remote);
+struct fail_open *fail_open_create(int trigger_duration,
+ struct switch_status *,
+ struct rconn *controller);
+void fail_open_wait(struct fail_open *);
+void fail_open_run(struct fail_open *);
+bool fail_open_handle_flow_miss(struct fail_open *, struct ofproto *,
+ uint16_t in_port, const flow_t *,
+ const struct ofpbuf *payload);
#endif /* fail-open.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include <errno.h>
-#include <arpa/inet.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <time.h>
-
-#include "openflow/nicira-ext.h"
-#include "openflow/openflow.h"
-#include "cfg.h"
-#include "flow-end.h"
-#include "netflow.h"
-#include "ofpbuf.h"
-#include "rconn.h"
-#include "secchan.h"
-#include "socket-util.h"
-#include "svec.h"
-#include "vconn.h"
-#include "xtoxll.h"
-
-#define THIS_MODULE VLM_flow_end
-#include "vlog.h"
-
-
-#define MAX_COLLECTORS 8
-
-struct flow_end_data {
- const struct settings *s;
-
- struct rconn *remote_rconn;
- struct rconn *local_rconn;
-
- bool send_ofp_exp; /* Send OpenFlow 'flow expired' messages? */
-
- int netflow_fds[MAX_COLLECTORS]; /* Sockets for NetFlow collectors. */
- uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */
-};
-
-static int
-udp_open(char *dst)
-{
- char *save_ptr;
- const char *host_name;
- const char *port_string;
- struct sockaddr_in sin;
- int retval;
- int fd;
-
- /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that
- * can cause segfaults here:
- * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614.
- * Using "::" instead of the obvious ":" works around it. */
- host_name = strtok_r(dst, "::", &save_ptr);
- port_string = strtok_r(NULL, "::", &save_ptr);
- if (!host_name) {
- ofp_error(0, "%s: bad peer name format", dst);
- return -EAFNOSUPPORT;
- }
- if (!port_string) {
- ofp_error(0, "%s: bad port format", dst);
- return -EAFNOSUPPORT;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- if (lookup_ip(host_name, &sin.sin_addr)) {
- return -ENOENT;
- }
- sin.sin_port = htons(atoi(port_string));
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
- VLOG_ERR("%s: socket: %s", dst, strerror(errno));
- return -errno;
- }
-
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return -retval;
- }
-
- retval = connect(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- int error = errno;
- VLOG_ERR("%s: connect: %s", dst, strerror(error));
- close(fd);
- return -error;
- }
-
- return fd;
-}
-
-static void
-send_netflow_msg(const struct nx_flow_end *nfe, struct flow_end_data *fe)
-{
- struct netflow_v5_header *nf_hdr;
- struct netflow_v5_record *nf_rec;
- uint8_t buf[sizeof(*nf_hdr) + sizeof(*nf_rec)];
- uint8_t *p = buf;
- struct timeval now;
- int i;
-
- /* We only send NetFlow messages for fully specified IP flows; any
- * entry with a wildcard is ignored. */
- if ((nfe->match.wildcards != 0)
- || (nfe->match.dl_type != htons(ETH_TYPE_IP))) {
- return;
- }
-
- memset(&buf, 0, sizeof(buf));
- gettimeofday(&now, NULL);
-
- nf_hdr = (struct netflow_v5_header *)p;
- p += sizeof(*nf_hdr);
- nf_rec = (struct netflow_v5_record *)p;
-
- nf_hdr->version = htons(NETFLOW_V5_VERSION);
- nf_hdr->count = htons(1);
- nf_hdr->sysuptime = htonl((uint32_t)ntohll(nfe->end_time));
- nf_hdr->unix_secs = htonl(now.tv_sec);
- nf_hdr->unix_nsecs = htonl(now.tv_usec * 1000);
- nf_hdr->flow_seq = htonl(fe->netflow_cnt);
- nf_hdr->engine_type = 0;
- nf_hdr->engine_id = 0;
- nf_hdr->sampling_interval = htons(0);
-
- nf_rec->src_addr = nfe->match.nw_src;
- nf_rec->dst_addr = nfe->match.nw_dst;
- nf_rec->nexthop = htons(0);
- nf_rec->input = nfe->match.in_port;
- nf_rec->output = htons(0);
- nf_rec->packet_count = htonl((uint32_t)ntohll(nfe->packet_count));
- nf_rec->byte_count = htonl((uint32_t)ntohll(nfe->byte_count));
- nf_rec->init_time = htonl((uint32_t)ntohll(nfe->init_time));
- nf_rec->used_time = htonl((uint32_t)ntohll(nfe->used_time));
-
- if (nfe->match.nw_proto == IP_TYPE_ICMP) {
- /* In NetFlow, the ICMP type and code are concatenated and
- * placed in the 'dst_port' field. */
- uint8_t type = (uint8_t)ntohs(nfe->match.tp_src);
- uint8_t code = (uint8_t)ntohs(nfe->match.tp_dst);
- nf_rec->src_port = htons(0);
- nf_rec->dst_port = htons((type << 8) | code);
- } else {
- nf_rec->src_port = nfe->match.tp_src;
- nf_rec->dst_port = nfe->match.tp_dst;
- }
-
- nf_rec->tcp_flags = nfe->tcp_flags;
- nf_rec->ip_proto = nfe->match.nw_proto;
- nf_rec->ip_tos = nfe->ip_tos;
-
- nf_rec->src_as = htons(0);
- nf_rec->dst_as = htons(0);
- nf_rec->src_mask = 0;
- nf_rec->dst_mask = 0;
-
- for (i=0; i<MAX_COLLECTORS; i++) {
- if (fe->netflow_fds[i] == -1) {
- break;
- }
- send(fe->netflow_fds[i], buf, sizeof(buf), 0);
- }
- fe->netflow_cnt++;
-}
-
-static void
-send_ofp_expired(const struct nx_flow_end *nfe, const struct flow_end_data *fe)
-{
- struct ofp_flow_expired *ofe;
- struct ofpbuf *b;
-
- if ((nfe->reason != NXFER_IDLE_TIMEOUT)
- && (nfe->reason != NXFER_HARD_TIMEOUT)) {
- return;
- }
-
- ofe = make_openflow(sizeof(*ofe), OFPT_FLOW_EXPIRED, &b);
- ofe->match = nfe->match;
- ofe->priority = nfe->priority;
- if (nfe->reason == NXFER_IDLE_TIMEOUT) {
- ofe->reason = OFPER_IDLE_TIMEOUT;
- } else {
- ofe->reason = OFPER_HARD_TIMEOUT;
- }
- /* 'duration' is in seconds, but we keeping track of milliseconds. */
- ofe->duration = htonl((ntohll(nfe->end_time)-ntohll(nfe->init_time))/1000);
- ofe->packet_count = nfe->packet_count;
- ofe->byte_count = nfe->byte_count;
-
- rconn_send(fe->remote_rconn, b, NULL);
-}
-
-static void
-send_nx_flow_end_config(const struct flow_end_data *fe)
-{
- struct nx_flow_end_config *nfec;
- struct ofpbuf *b;
-
- nfec = make_openflow(sizeof(*nfec), OFPT_VENDOR, &b);
- nfec->header.vendor = htonl(NX_VENDOR_ID);
- nfec->header.subtype = htonl(NXT_FLOW_END_CONFIG);
- if ((fe->send_ofp_exp == false) && (fe->netflow_fds[0] < 0)) {
- nfec->enable = 0;
- } else {
- nfec->enable = 1;
- }
-
- rconn_send(fe->local_rconn, b, NULL);
-}
-
-static bool
-flow_end_local_packet_cb(struct relay *r, void *flow_end_)
-{
- struct flow_end_data *fe = flow_end_;
- struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf;
- struct nicira_header *request = msg->data;
- struct nx_flow_end *nfe = msg->data;
-
-
- if (msg->size < sizeof(*nfe)) {
- return false;
- }
- request = msg->data;
- if (request->header.type != OFPT_VENDOR
- || request->vendor != htonl(NX_VENDOR_ID)
- || request->subtype != htonl(NXT_FLOW_END)) {
- return false;
- }
-
- if (fe->netflow_fds[0] >= 0) {
- send_netflow_msg(nfe, fe);
- }
-
- if (fe->send_ofp_exp) {
- send_ofp_expired(nfe, fe);
- }
-
- /* We always consume these Flow End messages. */
- return true;
-}
-
-static bool
-flow_end_remote_packet_cb(struct relay *r, void *flow_end_)
-{
- struct flow_end_data *fe = flow_end_;
- struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf;
- struct ofp_switch_config *osc = msg->data;
-
- /* Check for OFPT_SET_CONFIG messages to see if the controller wants
- * to receive 'flow expired' messages. If so, we need to intercept
- * the datapath's 'flow end' meta-messages and convert. */
-
- if ((msg->size < sizeof(*osc))
- || (osc->header.type != OFPT_SET_CONFIG)) {
- return false;
- }
-
- if (osc->flags & htons(OFPC_SEND_FLOW_EXP)) {
- fe->send_ofp_exp = true;
- } else {
- fe->send_ofp_exp = false;
- }
-
- send_nx_flow_end_config(fe);
-
- return false;
-}
-
-static void
-flow_end_reconfigure_cb(void *flow_end_)
-{
- int i, nf_idx=0;
- struct flow_end_data *fe = flow_end_;
- struct svec collectors;
-
- /* Configure NetFlow collectors. */
- for (i=0; i<MAX_COLLECTORS; i++) {
- if (fe->netflow_fds[i] >= 0) {
- close(fe->netflow_fds[i]);
- fe->netflow_fds[i] = -1;
- }
- }
-
- svec_init(&collectors);
- cfg_get_all_keys(&collectors, "netflow.%s.host", fe->s->br_name);
- svec_sort(&collectors);
- if (!svec_is_unique(&collectors)) {
- VLOG_WARN("%s specified twice as netflow collector",
- svec_get_duplicate(&collectors));
- svec_unique(&collectors);
- }
-
- for (i=0; i<collectors.n; i++) {
- if (nf_idx >= MAX_COLLECTORS) {
- VLOG_WARN("too many netflow collectors specified, ignoring %s\n",
- collectors.names[i]);
- continue;
- }
-
- fe->netflow_fds[nf_idx] = udp_open(collectors.names[i]);
- if (fe->netflow_fds[nf_idx] < 0) {
- VLOG_WARN("couldn't open connection to collector, ignoring %s\n",
- collectors.names[i]);
- } else {
- nf_idx++;
- }
- }
-
- if (nf_idx > 0) {
- send_nx_flow_end_config(fe);
- }
-}
-
-static const struct hook_class flow_end_hook_class = {
- flow_end_local_packet_cb, /* local_packet_cb */
- flow_end_remote_packet_cb, /* remote_packet_cb */
- NULL, /* periodic_cb */
- NULL, /* wait_cb */
- NULL, /* closing_cb */
- flow_end_reconfigure_cb, /* reconfigure_cb */
-};
-
-void
-flow_end_start(struct secchan *secchan, const struct settings *settings,
- struct rconn *local, struct rconn *remote)
-{
- int i;
- struct flow_end_data *fe;
-
- fe = xcalloc(1, sizeof *fe);
-
- fe->s = settings;
- fe->remote_rconn = remote;
- fe->local_rconn = local;
-
- for (i=0; i<MAX_COLLECTORS; i++) {
- fe->netflow_fds[i] = -1;
- }
- fe->send_ofp_exp = false;
-
- add_hook(secchan, &flow_end_hook_class, fe);
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef FLOW_END_H
-#define FLOW_END_H 1
-
-
-struct secchan;
-struct settings;
-struct rconn;
-
-void flow_end_start(struct secchan *, const struct settings *,
- struct rconn *, struct rconn *);
-
-#endif /* flow-end.h */
#include <errno.h>
#include <inttypes.h>
#include <string.h>
+#include "dpif.h"
#include "flow.h"
#include "mac-learning.h"
#include "netdev.h"
+#include "ofp-print.h"
+#include "ofproto.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
#include "packets.h"
-#include "port-watcher.h"
#include "rconn.h"
-#include "secchan.h"
#include "status.h"
#include "timeval.h"
#include "vconn.h"
#define THIS_MODULE VLM_in_band
#include "vlog.h"
-struct in_band_data {
- const struct settings *s;
- struct mac_learning *ml;
- struct netdev *of_device;
+struct in_band {
+ struct mac_learning *mac_learning;
+ struct netdev *netdev;
struct rconn *controller;
- int n_queued;
};
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
-static void
-queue_tx(struct rconn *rc, struct in_band_data *in_band, struct ofpbuf *b)
-{
- rconn_send_with_limit(rc, b, &in_band->n_queued, 10);
-}
-
static const uint8_t *
-get_controller_mac(struct in_band_data *in_band)
+get_controller_mac(struct in_band *in_band)
{
static uint32_t ip, last_nonzero_ip;
static uint8_t mac[ETH_ADDR_LEN], last_nonzero_mac[ETH_ADDR_LEN];
/* Look up MAC address. */
memset(mac, 0, sizeof mac);
- if (ip && in_band->of_device) {
- int retval = netdev_arp_lookup(in_band->of_device, ip, mac);
+ if (ip) {
+ int retval = netdev_arp_lookup(in_band->netdev, ip, mac);
if (retval) {
VLOG_DBG_RL(&rl, "cannot look up controller hw address "
"("IP_FMT"): %s", IP_ARGS(&ip), strerror(retval));
static bool
is_controller_mac(const uint8_t dl_addr[ETH_ADDR_LEN],
- struct in_band_data *in_band)
+ struct in_band *in_band)
{
const uint8_t *mac = get_controller_mac(in_band);
return mac && eth_addr_equals(mac, dl_addr);
}
static void
-in_band_learn_mac(struct in_band_data *in_band,
+in_band_learn_mac(struct in_band *in_band,
uint16_t in_port, const uint8_t src_mac[ETH_ADDR_LEN])
{
- if (mac_learning_learn(in_band->ml, src_mac, 0, in_port)) {
+ if (mac_learning_learn(in_band->mac_learning, src_mac, 0, in_port)) {
VLOG_DBG_RL(&rl, "learned that "ETH_ADDR_FMT" is on port %"PRIu16,
ETH_ADDR_ARGS(src_mac), in_port);
}
}
-static bool
-in_band_local_packet_cb(struct relay *r, void *in_band_)
+bool
+in_band_handle_flow_miss(struct in_band *in_band, struct ofproto *ofproto,
+ uint16_t in_port, const flow_t *flow,
+ const struct ofpbuf *payload)
{
- struct in_band_data *in_band = in_band_;
- struct rconn *rc = r->halves[HALF_LOCAL].rconn;
- struct ofp_packet_in *opi;
- struct eth_header *eth;
- struct ofpbuf payload;
- struct flow flow;
- uint16_t in_port;
+ /* -1 (FLOOD) is coincidentally the value returned by mac_learning_lookup()
+ * when it doesn't have a entry for that address. */
+ enum { FLOOD = -1, DROP = -2 };
+ union ofp_action action;
int out_port;
- if (!get_ofp_packet_eth_header(r, &opi, ð) || !in_band->of_device) {
- return false;
- }
- in_port = ntohs(opi->in_port);
- get_ofp_packet_payload(opi, &payload);
- flow_extract(&payload, in_port, &flow);
-
/* Deal with local stuff. */
- if (in_port == OFPP_LOCAL) {
+ if (in_port == ODPP_LOCAL) {
/* Sent by secure channel. */
- out_port = mac_learning_lookup(in_band->ml, eth->eth_dst, 0);
- } else if (eth_addr_equals(eth->eth_dst,
- netdev_get_etheraddr(in_band->of_device))) {
+ out_port = mac_learning_lookup(in_band->mac_learning, flow->dl_dst, 0);
+ } else if (eth_addr_equals(flow->dl_dst,
+ netdev_get_etheraddr(in_band->netdev))) {
/* Sent to secure channel. */
- out_port = OFPP_LOCAL;
- in_band_learn_mac(in_band, in_port, eth->eth_src);
- } else if (eth->eth_type == htons(ETH_TYPE_ARP)
- && eth_addr_is_broadcast(eth->eth_dst)
- && is_controller_mac(eth->eth_src, in_band)) {
+ out_port = ODPP_LOCAL;
+ in_band_learn_mac(in_band, in_port, flow->dl_src);
+ } else if (flow->dl_type == htons(ETH_TYPE_ARP)
+ && eth_addr_is_broadcast(flow->dl_dst)
+ && is_controller_mac(flow->dl_src, in_band)) {
/* ARP sent by controller. */
- out_port = OFPP_FLOOD;
- } else if ((is_controller_mac(eth->eth_dst, in_band)
- || is_controller_mac(eth->eth_src, in_band))
- && flow.dl_type == htons(ETH_TYPE_IP)
- && flow.nw_proto == IP_TYPE_TCP
- && (flow.tp_src == htons(OFP_TCP_PORT)
- || flow.tp_src == htons(OFP_SSL_PORT)
- || flow.tp_dst == htons(OFP_TCP_PORT)
- || flow.tp_dst == htons(OFP_SSL_PORT))) {
+ out_port = FLOOD;
+ } else if ((is_controller_mac(flow->dl_dst, in_band) ||
+ is_controller_mac(flow->dl_src, in_band))
+ && flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IP_TYPE_TCP
+ && (flow->tp_src == htons(OFP_TCP_PORT) ||
+ flow->tp_src == htons(OFP_SSL_PORT) ||
+ flow->tp_dst == htons(OFP_TCP_PORT) ||
+ flow->tp_dst == htons(OFP_SSL_PORT))) {
/* Traffic to or from controller. Switch it by hand. */
- in_band_learn_mac(in_band, in_port, eth->eth_src);
- out_port = mac_learning_lookup(in_band->ml, eth->eth_dst, 0);
+ in_band_learn_mac(in_band, in_port, flow->dl_src);
+ out_port = mac_learning_lookup(in_band->mac_learning, flow->dl_dst, 0);
} else {
- const uint8_t *controller_mac;
- controller_mac = get_controller_mac(in_band);
- if (eth->eth_type == htons(ETH_TYPE_ARP)
- && eth_addr_is_broadcast(eth->eth_dst)
- && is_controller_mac(eth->eth_src, in_band)) {
+ const uint8_t *controller_mac = get_controller_mac(in_band);
+ if (flow->dl_type == htons(ETH_TYPE_ARP)
+ && eth_addr_is_broadcast(flow->dl_dst)
+ && is_controller_mac(flow->dl_src, in_band)) {
/* ARP sent by controller. */
- out_port = OFPP_FLOOD;
- } else if (is_controller_mac(eth->eth_dst, in_band)
- && in_port == mac_learning_lookup(in_band->ml,
+ out_port = FLOOD;
+ } else if (is_controller_mac(flow->dl_dst, in_band)
+ && in_port == mac_learning_lookup(in_band->mac_learning,
controller_mac, 0)) {
/* Drop controller traffic that arrives on the controller port. */
- out_port = -1;
+ out_port = DROP;
} else {
return false;
}
}
- if (in_port == out_port) {
- /* The input and output port match. Set up a flow to drop packets. */
- queue_tx(rc, in_band, make_add_flow(&flow, ntohl(opi->buffer_id),
- in_band->s->max_idle, 0));
- } else if (out_port != OFPP_FLOOD) {
+ memset(&action, 0, sizeof action);
+ action.output.type = htons(OFPAT_OUTPUT);
+ action.output.len = htons(sizeof action);
+ if (in_port == out_port || out_port == DROP) {
+ /* Set up a flow to drop packets. */
+ ofproto_setup_exact_flow(ofproto, flow, NULL, 0, NULL);
+ } else if (out_port != FLOOD) {
/* The output port is known, so add a new flow. */
- queue_tx(rc, in_band,
- make_add_simple_flow(&flow, ntohl(opi->buffer_id),
- out_port, in_band->s->max_idle));
-
- /* If the switch didn't buffer the packet, we need to send a copy. */
- if (ntohl(opi->buffer_id) == UINT32_MAX) {
- queue_tx(rc, in_band,
- make_unbuffered_packet_out(&payload, in_port, out_port));
- }
+ action.output.port = htons(out_port);
+ ofproto_setup_exact_flow(ofproto, flow, &action, 1, payload);
} else {
/* We don't know that MAC. Send along the packet without setting up a
* flow. */
- struct ofpbuf *b;
- if (ntohl(opi->buffer_id) == UINT32_MAX) {
- b = make_unbuffered_packet_out(&payload, in_port, out_port);
- } else {
- b = make_buffered_packet_out(ntohl(opi->buffer_id),
- in_port, out_port);
- }
- queue_tx(rc, in_band, b);
+ action.type = htons(OFPAT_OUTPUT);
+ action.output.port = htons(OFPP_FLOOD);
+ ofproto_send_packet(ofproto, flow, &action, 1, payload);
}
return true;
}
static void
in_band_status_cb(struct status_reply *sr, void *in_band_)
{
- struct in_band_data *in_band = in_band_;
+ struct in_band *in_band = in_band_;
struct in_addr local_ip;
uint32_t controller_ip;
const uint8_t *controller_mac;
+ const uint8_t *mac;
- if (in_band->of_device) {
- const uint8_t *mac = netdev_get_etheraddr(in_band->of_device);
- if (netdev_get_in4(in_band->of_device, &local_ip)) {
- status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr));
- }
- status_reply_put(sr, "local-mac="ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
+ mac = netdev_get_etheraddr(in_band->netdev);
+ if (netdev_get_in4(in_band->netdev, &local_ip)) {
+ status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr));
+ }
+ status_reply_put(sr, "local-mac="ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
- controller_ip = rconn_get_ip(in_band->controller);
- if (controller_ip) {
- status_reply_put(sr, "controller-ip="IP_FMT,
- IP_ARGS(&controller_ip));
- }
- controller_mac = get_controller_mac(in_band);
- if (controller_mac) {
- status_reply_put(sr, "controller-mac="ETH_ADDR_FMT,
- ETH_ADDR_ARGS(controller_mac));
- }
+ controller_ip = rconn_get_ip(in_band->controller);
+ if (controller_ip) {
+ status_reply_put(sr, "controller-ip="IP_FMT,
+ IP_ARGS(&controller_ip));
+ }
+ controller_mac = get_controller_mac(in_band);
+ if (controller_mac) {
+ status_reply_put(sr, "controller-mac="ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(controller_mac));
}
}
void
-get_ofp_packet_payload(struct ofp_packet_in *opi, struct ofpbuf *payload)
+in_band_run(struct in_band *in_band)
{
- payload->data = opi->data;
- payload->size = ntohs(opi->header.length) - offsetof(struct ofp_packet_in,
- data);
+ mac_learning_run(in_band->mac_learning, NULL);
}
-static void
-in_band_local_port_cb(const struct ofp_phy_port *port, void *in_band_)
-{
- struct in_band_data *in_band = in_band_;
- if (port) {
- char name[sizeof port->name + 1];
- get_port_name(port, name, sizeof name);
-
- if (!in_band->of_device
- || strcmp(netdev_get_name(in_band->of_device), name))
- {
- int error;
- netdev_close(in_band->of_device);
- error = netdev_open(name, NETDEV_ETH_TYPE_NONE,
- &in_band->of_device);
- if (error) {
- VLOG_ERR("failed to open in-band control network device "
- "\"%s\": %s", name, strerror(errno));
- }
- }
- } else {
- netdev_close(in_band->of_device);
- in_band->of_device = NULL;
- }
-}
-
-static void
-in_band_periodic_cb(void *in_band_)
+void
+in_band_wait(struct in_band *in_band)
{
- struct in_band_data *in_band = in_band_;
- mac_learning_run(in_band->ml, NULL);
+ mac_learning_wait(in_band->mac_learning);
}
-static void
-in_band_wait_cb(void *in_band_)
+struct in_band *
+in_band_create(struct dpif *dpif, struct switch_status *ss,
+ struct rconn *controller)
{
- struct in_band_data *in_band = in_band_;
- mac_learning_wait(in_band->ml);
-}
+ struct in_band *in_band;
+ struct netdev *netdev;
+ struct odp_port port;
+ int error;
-static const struct hook_class in_band_hook_class = {
- in_band_local_packet_cb, /* local_packet_cb */
- NULL, /* remote_packet_cb */
- in_band_periodic_cb, /* periodic_cb */
- in_band_wait_cb, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
+ error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port);
+ if (error) {
+ ofp_fatal(error, "failed to query datapath local port");
+ }
-void
-in_band_start(struct secchan *secchan,
- const struct settings *s, struct switch_status *ss,
- struct port_watcher *pw, struct rconn *remote)
-{
- struct in_band_data *in_band;
+ error = netdev_open(port.devname, NETDEV_ETH_TYPE_NONE, &netdev);
+ if (error) {
+ ofp_fatal(error, "failed to open %s network device", port.devname);
+ }
in_band = xcalloc(1, sizeof *in_band);
- in_band->s = s;
- in_band->ml = mac_learning_create();
- in_band->of_device = NULL;
- in_band->controller = remote;
+ in_band->mac_learning = mac_learning_create();
+ in_band->netdev = netdev;
+ in_band->controller = controller;
switch_status_register_category(ss, "in-band", in_band_status_cb, in_band);
- port_watcher_register_local_port_callback(pw, in_band_local_port_cb,
- in_band);
- add_hook(secchan, &in_band_hook_class, in_band);
+
+ return in_band;
}
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#ifndef IN_BAND_H
#define IN_BAND_H 1
-struct port_watcher;
+#include "flow.h"
+
+struct dpif;
+struct ofproto;
struct rconn;
struct secchan;
struct settings;
struct switch_status;
-void in_band_start(struct secchan *, const struct settings *,
- struct switch_status *, struct port_watcher *,
- struct rconn *remote);
+struct in_band *in_band_create(struct dpif *, struct switch_status *,
+ struct rconn *controller);
+void in_band_run(struct in_band *);
+void in_band_wait(struct in_band *);
+bool in_band_handle_flow_miss(struct in_band *, struct ofproto *,
+ uint16_t in_port, const flow_t *,
+ const struct ofpbuf *payload);
#endif /* in-band.h */
--- /dev/null
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#include <config.h>
+#include "netflow.h"
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "cfg.h"
+#include "flow.h"
+#include "netflow.h"
+#include "packets.h"
+#include "secchan.h"
+#include "socket-util.h"
+#include "svec.h"
+#include "timeval.h"
+#include "util.h"
+#include "xtoxll.h"
+
+#define THIS_MODULE VLM_netflow
+#include "vlog.h"
+
+#define NETFLOW_V5_VERSION 5
+
+/* Every NetFlow v5 message contains the header that follows. This is
+ * followed by up to thirty records that describe a terminating flow.
+ * We only send a single record per NetFlow message.
+ */
+struct netflow_v5_header {
+ uint16_t version; /* NetFlow version is 5. */
+ uint16_t count; /* Number of records in this message. */
+ uint32_t sysuptime; /* System uptime in milliseconds. */
+ uint32_t unix_secs; /* Number of seconds since Unix epoch. */
+ uint32_t unix_nsecs; /* Number of residual nanoseconds
+ after epoch seconds. */
+ uint32_t flow_seq; /* Number of flows since sending
+ messages began. */
+ uint8_t engine_type; /* Set to zero. */
+ uint8_t engine_id; /* Set to zero. */
+ uint16_t sampling_interval; /* Set to zero. */
+};
+BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24);
+
+/* A NetFlow v5 description of a terminating flow. It is preceded by a
+ * NetFlow v5 header.
+ */
+struct netflow_v5_record {
+ uint32_t src_addr; /* Source IP address. */
+ uint32_t dst_addr; /* Destination IP address. */
+ uint32_t nexthop; /* IP address of next hop. Set to 0. */
+ uint16_t input; /* Input interface index. */
+ uint16_t output; /* Output interface index. */
+ uint32_t packet_count; /* Number of packets. */
+ uint32_t byte_count; /* Number of bytes. */
+ uint32_t init_time; /* Value of sysuptime on first packet. */
+ uint32_t used_time; /* Value of sysuptime on last packet. */
+
+ /* The 'src_port' and 'dst_port' identify the source and destination
+ * port, respectively, for TCP and UDP. For ICMP, the high-order
+ * byte identifies the type and low-order byte identifies the code
+ * in the 'dst_port' field. */
+ uint16_t src_port;
+ uint16_t dst_port;
+
+ uint8_t pad1;
+ uint8_t tcp_flags; /* Union of seen TCP flags. */
+ uint8_t ip_proto; /* IP protocol. */
+ uint8_t ip_tos; /* IP TOS value. */
+ uint16_t src_as; /* Source AS ID. Set to 0. */
+ uint16_t dst_as; /* Destination AS ID. Set to 0. */
+ uint8_t src_mask; /* Source mask bits. Set to 0. */
+ uint8_t dst_mask; /* Destination mask bits. Set to 0. */
+ uint8_t pad[2];
+};
+BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48);
+
+#define MAX_COLLECTORS 8
+
+struct netflow {
+ const char *br_name; /* Bridge name, for reading config file. */
+ long long int boot_time; /* Time when netflow_create() was called. */
+ int netflow_fds[MAX_COLLECTORS]; /* Sockets for NetFlow collectors. */
+ uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */
+};
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static int
+udp_open(char *dst)
+{
+ char *save_ptr;
+ const char *host_name;
+ const char *port_string;
+ struct sockaddr_in sin;
+ int retval;
+ int fd;
+
+ /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that
+ * can cause segfaults here:
+ * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614.
+ * Using "::" instead of the obvious ":" works around it. */
+ host_name = strtok_r(dst, "::", &save_ptr);
+ port_string = strtok_r(NULL, "::", &save_ptr);
+ if (!host_name) {
+ ofp_error(0, "%s: bad peer name format", dst);
+ return -EAFNOSUPPORT;
+ }
+ if (!port_string) {
+ ofp_error(0, "%s: bad port format", dst);
+ return -EAFNOSUPPORT;
+ }
+
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ if (lookup_ip(host_name, &sin.sin_addr)) {
+ return -ENOENT;
+ }
+ sin.sin_port = htons(atoi(port_string));
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ VLOG_ERR("%s: socket: %s", dst, strerror(errno));
+ return -errno;
+ }
+
+ retval = set_nonblocking(fd);
+ if (retval) {
+ close(fd);
+ return -retval;
+ }
+
+ retval = connect(fd, (struct sockaddr *) &sin, sizeof sin);
+ if (retval < 0) {
+ int error = errno;
+ VLOG_ERR("%s: connect: %s", dst, strerror(error));
+ close(fd);
+ return -error;
+ }
+
+ return fd;
+}
+
+void
+netflow_expire(struct netflow *nf, const flow_t *flow,
+ const struct odp_flow_stats *stats,
+ long long int created)
+{
+ struct netflow_v5_header nf_hdr;
+ struct netflow_v5_record nf_rec;
+ struct timeval now;
+ long long int used;
+ int i;
+
+ time_timeval(&now);
+
+ memset(&nf_hdr, 0, sizeof nf_hdr);
+ nf_hdr.version = htons(NETFLOW_V5_VERSION);
+ nf_hdr.count = htons(1);
+ nf_hdr.sysuptime = htonl(time_msec() - nf->boot_time);
+ nf_hdr.unix_secs = htonl(now.tv_sec);
+ nf_hdr.unix_nsecs = htonl(now.tv_usec * 1000);
+ nf_hdr.flow_seq = htonl(nf->netflow_cnt);
+ nf_hdr.engine_type = 0;
+ nf_hdr.engine_id = 0;
+ nf_hdr.sampling_interval = htons(0);
+
+ memset(&nf_rec, 0, sizeof nf_rec);
+ nf_rec.src_addr = flow->nw_src;
+ nf_rec.dst_addr = flow->nw_dst;
+ nf_rec.nexthop = htons(0);
+ nf_rec.input = htons(flow->in_port);
+ nf_rec.output = htons(0);
+ nf_rec.packet_count = htonl(stats->n_packets);
+ nf_rec.byte_count = htonl(stats->n_bytes);
+ nf_rec.init_time = htonl(created - nf->boot_time);
+ used = stats->used_sec * 1000 + stats->used_nsec / 1000000;
+ nf_rec.used_time = htonl((used > created ? used : created)
+ - nf->boot_time);
+
+ if (flow->nw_proto == IP_TYPE_ICMP) {
+ /* In NetFlow, the ICMP type and code are concatenated and
+ * placed in the 'dst_port' field. */
+ uint8_t type = ntohs(flow->tp_src);
+ uint8_t code = ntohs(flow->tp_dst);
+ nf_rec.src_port = htons(0);
+ nf_rec.dst_port = htons((type << 8) | code);
+ } else {
+ nf_rec.src_port = flow->tp_src;
+ nf_rec.dst_port = flow->tp_dst;
+ }
+
+ nf_rec.tcp_flags = stats->tcp_flags;
+ nf_rec.ip_proto = flow->nw_proto;
+ nf_rec.ip_tos = stats->ip_tos;
+
+ nf_rec.src_as = htons(0);
+ nf_rec.dst_as = htons(0);
+ nf_rec.src_mask = 0;
+ nf_rec.dst_mask = 0;
+
+ for (i=0; i<MAX_COLLECTORS; i++) {
+ struct msghdr msghdr;
+ struct iovec iov[2];
+
+ if (nf->netflow_fds[i] == -1) {
+ break;
+ }
+
+ iov[0].iov_base = &nf_hdr;
+ iov[0].iov_len = sizeof nf_hdr;
+ iov[1].iov_base = &nf_rec;
+ iov[1].iov_len = sizeof nf_rec;
+ msghdr.msg_name = NULL;
+ msghdr.msg_namelen = 0;
+ msghdr.msg_iov = iov;
+ msghdr.msg_iovlen = 2;
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ msghdr.msg_flags = 0;
+ if (sendmsg(nf->netflow_fds[i], &msghdr, 0) < 0) {
+ VLOG_WARN_RL(&rl, "netflow message send failed: %s",
+ strerror(errno));
+ }
+ }
+ nf->netflow_cnt++;
+}
+
+void
+netflow_reconfigure(struct netflow *nf)
+{
+ struct svec collectors;
+ int i, nf_idx=0;
+
+ /* Configure NetFlow collectors. */
+ for (i=0; i<MAX_COLLECTORS; i++) {
+ if (nf->netflow_fds[i] >= 0) {
+ close(nf->netflow_fds[i]);
+ nf->netflow_fds[i] = -1;
+ }
+ }
+
+ svec_init(&collectors);
+ cfg_get_all_keys(&collectors, "netflow.%s.host", nf->br_name);
+ svec_sort(&collectors);
+ if (!svec_is_unique(&collectors)) {
+ VLOG_WARN("%s specified twice as netflow collector",
+ svec_get_duplicate(&collectors));
+ svec_unique(&collectors);
+ }
+
+ for (i=0; i<collectors.n; i++) {
+ if (nf_idx >= MAX_COLLECTORS) {
+ VLOG_WARN("too many netflow collectors specified, ignoring %s\n",
+ collectors.names[i]);
+ continue;
+ }
+
+ nf->netflow_fds[nf_idx] = udp_open(collectors.names[i]);
+ if (nf->netflow_fds[nf_idx] < 0) {
+ VLOG_WARN("couldn't open connection to collector, ignoring %s\n",
+ collectors.names[i]);
+ } else {
+ nf_idx++;
+ }
+ }
+}
+
+struct netflow *
+netflow_create(const char *br_name)
+{
+ struct netflow *nf;
+ int i;
+
+ nf = xmalloc(sizeof *nf);
+ nf->br_name = br_name;
+ nf->boot_time = time_msec();
+ for (i = 0; i < MAX_COLLECTORS; i++) {
+ nf->netflow_fds[i] = -1;
+ }
+ nf->netflow_cnt = 0;
+ return nf;
+}
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#ifndef NETFLOW_H
#define NETFLOW_H 1
-#include <util.h>
+#include "flow.h"
+struct odp_flow_stats;
-#define NETFLOW_V5_VERSION 5
-
-/* Every NetFlow v5 message contains the header that follows. This is
- * followed by up to thirty records that describe a terminating flow.
- * We only send a single record per NetFlow message.
- */
-struct netflow_v5_header {
- uint16_t version; /* NetFlow version is 5. */
- uint16_t count; /* Number of records in this message. */
- uint32_t sysuptime; /* System uptime in milliseconds. */
- uint32_t unix_secs; /* Number of seconds since Unix epoch. */
- uint32_t unix_nsecs; /* Number of residual nanoseconds
- after epoch seconds. */
- uint32_t flow_seq; /* Number of flows since sending
- messages began. */
- uint8_t engine_type; /* Set to zero. */
- uint8_t engine_id; /* Set to zero. */
- uint16_t sampling_interval; /* Set to zero. */
-};
-BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24);
-
-/* A NetFlow v5 description of a terminating flow. It is preceded by a
- * NetFlow v5 header.
- */
-struct netflow_v5_record {
- uint32_t src_addr; /* Source IP address. */
- uint32_t dst_addr; /* Destination IP address. */
- uint32_t nexthop; /* IP address of next hop. Set to 0. */
- uint16_t input; /* Input interface index. */
- uint16_t output; /* Output interface index. */
- uint32_t packet_count; /* Number of packets. */
- uint32_t byte_count; /* Number of bytes. */
- uint32_t init_time; /* Value of sysuptime on first packet. */
- uint32_t used_time; /* Value of sysuptime on last packet. */
-
- /* The 'src_port' and 'dst_port' identify the source and destination
- * port, respectively, for TCP and UDP. For ICMP, the high-order
- * byte identifies the type and low-order byte identifies the code
- * in the 'dst_port' field. */
- uint16_t src_port;
- uint16_t dst_port;
-
- uint8_t pad1;
- uint8_t tcp_flags; /* Union of seen TCP flags. */
- uint8_t ip_proto; /* IP protocol. */
- uint8_t ip_tos; /* IP TOS value. */
- uint16_t src_as; /* Source AS ID. Set to 0. */
- uint16_t dst_as; /* Destination AS ID. Set to 0. */
- uint8_t src_mask; /* Source mask bits. Set to 0. */
- uint8_t dst_mask; /* Destination mask bits. Set to 0. */
- uint8_t pad[2];
-};
-BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48);
+struct netflow *netflow_create(const char *br_name);
+void netflow_reconfigure(struct netflow *);
+void netflow_expire(struct netflow *, const flow_t *,
+ const struct odp_flow_stats *,
+ long long int created);
#endif /* netflow.h */
--- /dev/null
+/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#include <config.h>
+#include "ofproto.h"
+#include <errno.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include "classifier.h"
+#include "discovery.h"
+#include "dpif.h"
+#include "executer.h"
+#include "fail-open.h"
+#include "in-band.h"
+#include "netdev.h"
+#include "netflow.h"
+#include "ofp-print.h"
+#include "ofpbuf.h"
+#include "openflow/datapath-protocol.h"
+#include "openflow/nicira-ext.h"
+#include "openflow/openflow.h"
+#include "packets.h"
+#include "pinsched.h"
+#include "pktbuf.h"
+#include "poll-loop.h"
+#include "port-array.h"
+#include "rconn.h"
+#include "secchan.h"
+#include "shash.h"
+#include "status.h"
+#include "svec.h"
+#include "timeval.h"
+#include "vconn.h"
+#include "xtoxll.h"
+
+#define THIS_MODULE VLM_ofproto
+#include "vlog.h"
+
+enum {
+ DP_GROUP_FLOOD = 0,
+ DP_GROUP_ALL = 1
+};
+
+enum {
+ TABLEID_HASH = 0,
+ TABLEID_CLASSIFIER = 1
+};
+
+struct ofport {
+ struct netdev *netdev;
+ struct ofp_phy_port opp; /* In host byte order. */
+};
+
+static void hton_ofp_phy_port(struct ofp_phy_port *opp);
+
+struct odp_actions {
+ union odp_action *actions;
+ size_t n_actions, allocated_actions;
+};
+
+static void init_actions(struct odp_actions *);
+static void free_actions(struct odp_actions *);
+static void ofp_actions_to_odp_actions(uint16_t ofp_in_port,
+ const struct ofp_action_header *in_,
+ size_t n_in, struct odp_actions *out);
+
+#define UNKNOWN_SUPER ((struct rule *)-1)
+struct rule {
+ struct cls_rule cr;
+
+ uint16_t idle_timeout;
+ uint16_t hard_timeout;
+ long long int used;
+ long long int created;
+ uint64_t packet_count; /* Packets from *expired* subrules. */
+ uint64_t byte_count; /* Bytes from *expired* subrules. */
+ uint8_t tcp_flags;
+ uint8_t ip_tos;
+
+ struct rule *super;
+ struct list list;
+
+ /* A subrule has no actions (it uses the super-rule's actions). */
+ int n_actions;
+ union ofp_action actions[];
+};
+
+static void rule_destroy(struct rule *);
+static inline size_t rule_size(int n_actions);
+static struct rule *rule_from_cls_rule(const struct cls_rule *);
+static void rule_make_actions(const struct rule *, struct odp_actions *);
+
+struct ofconn {
+ struct list node;
+ struct rconn *rconn;
+ struct pktbuf *pktbuf;
+ bool send_flow_exp;
+ int miss_send_len;
+};
+
+static struct ofconn *ofconn_create(struct ofproto *, struct rconn *);
+static void ofconn_destroy(struct ofconn *, struct ofproto *);
+static void ofconn_run(struct ofconn *, struct ofproto *);
+static void ofconn_wait(struct ofconn *);
+static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn);
+
+struct ofproto {
+ /* Settings. */
+ uint64_t datapath_id; /* Datapath ID. */
+ const char *mfr_desc; /* Manufacturer. */
+ const char *hw_desc; /* Hardware. */
+ const char *sw_desc; /* Software version. */
+ const char *serial_desc; /* Serial number. */
+
+ /* Datapath. */
+ struct dpif dpif;
+ struct dpifmon *dpifmon;
+ struct port_array ports;
+ struct shash port_by_name;
+
+ /* Configuration. */
+ struct switch_status *switch_status;
+ struct in_band *in_band;
+ struct discovery *discovery;
+ struct fail_open *fail_open;
+ struct pinsched *miss_sched, *action_sched;
+ struct executer *executer;
+ struct netflow *netflow;
+
+ /* Flow table. */
+ struct classifier cls;
+ bool need_revalidate;
+ long long int next_expiration;
+
+ /* OpenFlow connections. */
+ struct list all_conns;
+ struct ofconn *controller;
+ struct pvconn *listeners[MAX_MGMT];
+ size_t n_listeners;
+};
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static uint64_t pick_datapath_id(struct dpif *);
+static void send_packet_in_miss(struct ofpbuf *, void *ofproto);
+static void send_packet_in_action(struct ofpbuf *, void *ofproto);
+static void update_used(struct ofproto *);
+static void expire_rule(struct cls_rule *, void *ofproto);
+static void revalidate_subrule(struct cls_rule *, void *ofproto);
+
+static void handle_odp_msg(struct ofproto *, struct ofpbuf *);
+
+static void handle_openflow(struct ofconn *, struct ofproto *,
+ struct ofpbuf *);
+
+static void refresh_port_group(struct ofproto *, unsigned int group);
+static void update_port(struct ofproto *, const char *devname);
+static void init_ports(struct ofproto *);
+static void reinit_ports(struct ofproto *);
+
+static uint16_t odp_port_to_ofp_port(uint16_t odp_port);
+static uint16_t ofp_port_to_odp_port(uint16_t ofp_port);
+
+struct ofproto *
+ofproto_create(const struct settings *settings)
+{
+ struct dpifmon *dpifmon;
+ struct rconn *controller;
+ struct ofproto *p;
+ struct dpif dpif;
+ int error;
+ size_t i;
+
+ /* Connect to datapath and start listening for messages. */
+ error = dpif_open(settings->dp_name, &dpif);
+ if (error) {
+ ofp_fatal(error, "Failed to open datapath %s", settings->dp_name);
+ }
+ error = dpif_set_listen_mask(&dpif, ODPL_MISS | ODPL_ACTION);
+ if (error) {
+ ofp_fatal(error, "failed to listen on dpif %d", dpif.minor);
+ }
+
+ /* Start monitoring datapath ports for status changes. */
+ error = dpifmon_create(&dpif, &dpifmon);
+ if (error) {
+ ofp_fatal(error, "failed to starting monitoring dpif %d", dpif.minor);
+ }
+
+ /* Create controller connection. */
+ controller = rconn_create(settings->probe_interval, settings->max_backoff);
+ if (settings->controller_name) {
+ error = rconn_connect(controller, settings->controller_name);
+ if (error == EAFNOSUPPORT) {
+ ofp_fatal(0, "No support for %s vconn", settings->controller_name);
+ }
+ }
+
+ /* Initialize settings. */
+ p = xcalloc(1, sizeof *p);
+ p->datapath_id = settings->datapath_id;
+ if (!p->datapath_id) {
+ p->datapath_id = pick_datapath_id(&dpif);
+ }
+ VLOG_INFO("using datapath ID %012"PRIx64, settings->datapath_id);
+ p->mfr_desc = settings->mfr_desc;
+ p->hw_desc = settings->hw_desc;
+ p->sw_desc = settings->sw_desc;
+ p->serial_desc = settings->serial_desc;
+
+ /* Initialize datapath information. */
+ p->dpif = dpif;
+ p->dpifmon = dpifmon;
+ port_array_init(&p->ports);
+ shash_init(&p->port_by_name);
+
+ /* Initialize submodules. */
+ p->switch_status = switch_status_create(settings);
+ switch_status_register_category(p->switch_status, "remote",
+ rconn_status_cb, controller);
+ if (settings->in_band) {
+ p->in_band = in_band_create(&dpif, p->switch_status, controller);
+ }
+ if (settings->discovery) {
+ p->discovery = discovery_create(settings->accept_controller_re,
+ settings->update_resolv_conf, &p->dpif,
+ p->switch_status);
+ }
+ if (settings->fail_mode == FAIL_OPEN) {
+ p->fail_open = fail_open_create(settings->probe_interval * 3,
+ p->switch_status, controller);
+ }
+ if (settings->rate_limit) {
+ p->miss_sched = pinsched_create(settings->rate_limit,
+ settings->burst_limit,
+ p->switch_status);
+ p->action_sched = pinsched_create(settings->rate_limit,
+ settings->burst_limit, NULL);
+ }
+ if (settings->command_acl[0]) {
+ p->executer = executer_create(settings->command_acl,
+ settings->command_dir);
+ }
+ if (settings->br_name) {
+ p->netflow = netflow_create(settings->br_name);
+ }
+
+ /* Initialize flow table. */
+ classifier_init(&p->cls);
+ p->need_revalidate = false;
+ p->next_expiration = time_msec() + 1000;
+
+ /* Initialize OpenFlow connections. */
+ list_init(&p->all_conns);
+ p->controller = ofconn_create(p, controller);
+ p->controller->pktbuf = pktbuf_create();
+ p->controller->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
+ p->n_listeners = 0;
+ for (i = 0; i < settings->n_listeners; i++) {
+ const char *name = settings->listener_names[i];
+ struct pvconn *pvconn;
+
+ error = pvconn_open(name, &pvconn);
+ if (error && error != EAGAIN) {
+ ofp_fatal(error, "opening %s", name);
+ }
+ p->listeners[p->n_listeners++] = pvconn;
+ }
+
+ /* Retrieve initial port status. */
+ init_ports(p);
+
+ return p;
+}
+
+void
+ofproto_reconfigure(struct ofproto *p)
+{
+ if (p->netflow) {
+ netflow_reconfigure(p->netflow);
+ }
+}
+
+void
+ofproto_run(struct ofproto *p)
+{
+ struct ofconn *ofconn, *next_ofconn;
+ char *devname;
+ int error;
+ int i;
+
+ for (i = 0; i < 50; i++) {
+ struct ofpbuf *buf;
+ int error;
+
+ error = dpif_recv(&p->dpif, &buf);
+ if (error) {
+ break;
+ }
+
+ handle_odp_msg(p, buf);
+ }
+
+ while ((error = dpifmon_poll(p->dpifmon, &devname)) != EAGAIN) {
+ if (error == ENOBUFS) {
+ reinit_ports(p);
+ } else if (!error) {
+ update_port(p, devname);
+ free(devname);
+ }
+ }
+
+ if (p->in_band) {
+ in_band_run(p->in_band);
+ }
+ if (p->discovery) {
+ char *controller_name;
+ if (rconn_is_connectivity_questionable(p->controller->rconn)) {
+ discovery_question_connectivity(p->discovery);
+ }
+ if (discovery_run(p->discovery, &controller_name)) {
+ if (controller_name) {
+ rconn_connect(p->controller->rconn, controller_name);
+ } else {
+ rconn_disconnect(p->controller->rconn);
+ }
+ }
+ }
+ if (p->fail_open) {
+ fail_open_run(p->fail_open);
+ }
+ pinsched_run(p->miss_sched, send_packet_in_miss, p);
+ pinsched_run(p->action_sched, send_packet_in_action, p);
+ if (p->executer) {
+ executer_run(p->executer);
+ }
+
+ LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
+ &p->all_conns) {
+ ofconn_run(ofconn, p);
+ }
+
+ for (i = 0; i < p->n_listeners; i++) {
+ struct vconn *vconn;
+ int retval;
+
+ retval = pvconn_accept(p->listeners[i], OFP_VERSION, &vconn);
+ if (!retval) {
+ ofconn_create(p, rconn_new_from_vconn("passive", vconn));
+ } else if (retval != EAGAIN) {
+ VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval));
+ }
+ }
+
+ if (time_msec() >= p->next_expiration) {
+ p->next_expiration = time_msec() + 1000;
+ update_used(p);
+ classifier_for_each(&p->cls, expire_rule, p);
+ }
+
+ if (p->need_revalidate) {
+ classifier_for_each_with_wildcards(&p->cls, 0, revalidate_subrule, p);
+ p->need_revalidate = false;
+ }
+}
+
+void
+ofproto_wait(struct ofproto *p)
+{
+ struct ofconn *ofconn;
+ size_t i;
+
+ dpif_recv_wait(&p->dpif);
+ dpifmon_wait(p->dpifmon);
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ ofconn_wait(ofconn);
+ }
+ if (p->in_band) {
+ in_band_wait(p->in_band);
+ }
+ if (p->discovery) {
+ discovery_wait(p->discovery);
+ }
+ if (p->fail_open) {
+ fail_open_wait(p->fail_open);
+ }
+ pinsched_wait(p->miss_sched);
+ pinsched_wait(p->action_sched);
+ if (p->executer) {
+ executer_wait(p->executer);
+ }
+ if (p->need_revalidate) {
+ /* Shouldn't happen, but if it does just go around again. */
+ VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
+ poll_immediate_wake();
+ } else if (p->next_expiration != LLONG_MAX) {
+ poll_timer_wait(p->next_expiration - time_msec());
+ }
+ for (i = 0; i < p->n_listeners; i++) {
+ pvconn_wait(p->listeners[i]);
+ }
+}
+
+bool
+ofproto_is_alive(const struct ofproto *p)
+{
+ return p->discovery || rconn_is_alive(p->controller->rconn);
+}
+
+int
+ofproto_send_packet(struct ofproto *p, const flow_t *flow,
+ const union ofp_action *actions, size_t n_actions,
+ const struct ofpbuf *packet)
+{
+ struct odp_actions odp_actions;
+ int error;
+
+ ofp_actions_to_odp_actions(odp_port_to_ofp_port(flow->in_port),
+ (const struct ofp_action_header *) actions,
+ n_actions, &odp_actions);
+ error = dpif_execute(&p->dpif, flow->in_port, odp_actions.actions,
+ odp_actions.n_actions, packet);
+ free_actions(&odp_actions);
+ return error;
+}
+
+void
+ofproto_setup_exact_flow(struct ofproto *p, const flow_t *flow,
+ const union ofp_action *actions, size_t n_actions,
+ const struct ofpbuf *packet)
+{
+ struct rule *rule, *displaced_rule;
+ struct odp_actions odp_actions;
+ struct odp_flow odp_flow;
+
+ rule = xmalloc(rule_size(n_actions));
+ cls_rule_from_flow(&rule->cr, flow, 0, UINT16_MAX);
+ rule->idle_timeout = 5; /* XXX */
+ rule->hard_timeout = 0; /* XXX */
+ rule->used = rule->created = time_msec();
+ rule->packet_count = 0;
+ rule->byte_count = 0;
+ rule->tcp_flags = 0;
+ rule->ip_tos = 0;
+ rule->super = NULL; /* XXX */
+ list_init(&rule->list);
+ rule->n_actions = n_actions;
+ memcpy(rule->actions, actions, n_actions * sizeof *rule->actions);
+
+ displaced_rule = rule_from_cls_rule(classifier_insert(&p->cls, &rule->cr));
+ if (displaced_rule) {
+ /* XXX */
+ rule_destroy(displaced_rule);
+ }
+
+ rule_make_actions(rule, &odp_actions);
+ if (packet) {
+ if (!ofproto_send_packet(p, flow, actions, n_actions, packet)) {
+ rule->byte_count = packet->size;
+ rule->packet_count++;
+ }
+ }
+
+ memset(&odp_flow.stats, 0, sizeof odp_flow.stats);
+ odp_flow.key = *flow;
+ odp_flow.actions = odp_actions.actions;
+ odp_flow.n_actions = odp_actions.n_actions;
+ dpif_flow_add(&p->dpif, &odp_flow);
+ free_actions(&odp_actions);
+}
+\f
+static void
+reinit_ports(struct ofproto *p)
+{
+ struct svec devnames;
+ struct ofport *ofport;
+ unsigned int port_no;
+ struct odp_port *odp_ports;
+ size_t n_odp_ports;
+ size_t i;
+
+ svec_init(&devnames);
+ PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
+ svec_add (&devnames, (char *) ofport->opp.name);
+ }
+ dpif_port_list(&p->dpif, &odp_ports, &n_odp_ports);
+ for (i = 0; i < n_odp_ports; i++) {
+ svec_add (&devnames, odp_ports[i].devname);
+ }
+ free(odp_ports);
+
+ svec_sort_unique(&devnames);
+ for (i = 0; i < devnames.n; i++) {
+ update_port(p, devnames.names[i]);
+ }
+ svec_destroy(&devnames);
+}
+
+static void
+refresh_port_group(struct ofproto *p, unsigned int group)
+{
+ uint16_t *ports;
+ size_t n_ports;
+ struct ofport *port;
+ unsigned int port_no;
+
+ assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD);
+
+ ports = xmalloc(port_array_count(&p->ports) * sizeof *ports);
+ n_ports = 0;
+ PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
+ if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
+ ports[n_ports++] = port_no;
+ }
+ }
+ dpif_port_group_set(&p->dpif, group, ports, n_ports);
+ free(ports);
+}
+
+static void
+refresh_port_groups(struct ofproto *p)
+{
+ refresh_port_group(p, DP_GROUP_FLOOD);
+ refresh_port_group(p, DP_GROUP_ALL);
+}
+
+static struct ofport *
+make_ofport(const struct odp_port *odp_port)
+{
+ enum netdev_flags flags;
+ struct ofport *ofport;
+ struct netdev *netdev;
+ int error;
+
+ error = netdev_open(odp_port->devname, NETDEV_ETH_TYPE_NONE, &netdev);
+ if (error) {
+ VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s "
+ "cannot be opened (%s)",
+ odp_port->devname, odp_port->port,
+ odp_port->devname, strerror(error));
+ return NULL;
+ }
+
+ ofport = xmalloc(sizeof *ofport);
+ ofport->netdev = netdev;
+ ofport->opp.port_no = odp_port_to_ofp_port(odp_port->port);
+ memcpy(ofport->opp.hw_addr, netdev_get_etheraddr(netdev), ETH_ALEN);
+ memcpy(ofport->opp.name, odp_port->devname,
+ MIN(sizeof ofport->opp.name, sizeof odp_port->devname));
+ ofport->opp.name[sizeof ofport->opp.name - 1] = '\0';
+
+ netdev_get_flags(netdev, &flags);
+ ofport->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN;
+ ofport->opp.state = flags & NETDEV_CARRIER ? 0 : OFPPS_LINK_DOWN;
+
+ netdev_get_features(netdev,
+ &ofport->opp.curr, &ofport->opp.advertised,
+ &ofport->opp.supported, &ofport->opp.peer);
+ return ofport;
+}
+
+static bool
+ofport_conflicts(const struct ofproto *p, const struct odp_port *odp_port)
+{
+ if (port_array_get(&p->ports, odp_port->port)) {
+ VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath",
+ odp_port->port);
+ return true;
+ } else if (shash_find(&p->port_by_name, odp_port->devname)) {
+ VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath",
+ odp_port->devname);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static int
+ofport_equal(const struct ofport *a_, const struct ofport *b_)
+{
+ const struct ofp_phy_port *a = &a_->opp;
+ const struct ofp_phy_port *b = &b_->opp;
+
+ BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */
+ return (a->port_no == b->port_no
+ && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr)
+ && !strcmp((char *) a->name, (char *) b->name)
+ && a->state == b->state
+ && a->config == b->config
+ && a->curr == b->curr
+ && a->advertised == b->advertised
+ && a->supported == b->supported
+ && a->peer == b->peer);
+}
+
+static void
+send_port_status(struct ofproto *p, const struct ofport *ofport,
+ uint8_t reason)
+{
+ struct ofconn *ofconn;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ struct ofp_port_status *ops;
+ struct ofpbuf *b;
+
+ ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &b);
+ ops->reason = reason;
+ ops->desc = ofport->opp;
+ hton_ofp_phy_port(&ops->desc);
+ queue_tx(b, ofconn);
+ }
+}
+
+static void
+ofport_install(struct ofproto *p, struct ofport *ofport)
+{
+ port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no),
+ ofport);
+ shash_add(&p->port_by_name, (char *) ofport->opp.name, ofport);
+}
+
+static void
+ofport_remove(struct ofproto *p, struct ofport *ofport)
+{
+ port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no), NULL);
+ shash_delete(&p->port_by_name,
+ shash_find(&p->port_by_name, (char *) ofport->opp.name));
+}
+
+static void
+ofport_free(struct ofport *ofport)
+{
+ if (ofport) {
+ netdev_close(ofport->netdev);
+ free(ofport);
+ }
+}
+
+static void
+update_port(struct ofproto *p, const char *devname)
+{
+ struct odp_port odp_port;
+ struct ofport *ofport;
+ int error;
+
+ ofport = shash_find_data(&p->port_by_name, devname);
+ error = dpif_port_query_by_name(&p->dpif, devname, &odp_port);
+ if (!error) {
+ if (!ofport) {
+ /* New port. */
+ if (!ofport_conflicts(p, &odp_port)) {
+ ofport = make_ofport(&odp_port);
+ if (ofport) {
+ ofport_install(p, ofport);
+ send_port_status(p, ofport, OFPPR_ADD);
+ }
+ }
+ } else {
+ /* Modified port. */
+ struct ofport *new_ofport = make_ofport(&odp_port);
+ if (!new_ofport) {
+ return;
+ }
+
+ new_ofport->opp.config &= OFPPC_PORT_DOWN;
+ new_ofport->opp.config |= ofport->opp.config & ~OFPPC_PORT_DOWN;
+ if (ofport_equal(ofport, new_ofport)) {
+ /* False alarm--no change. */
+ ofport_free(new_ofport);
+ } else {
+ ofport_remove(p, ofport);
+ ofport_install(p, new_ofport);
+ ofport_free(ofport);
+ send_port_status(p, new_ofport, OFPPR_MODIFY);
+ }
+ }
+ } else if (error == ENOENT || error == ENODEV) {
+ /* Deleted port. */
+ if (ofport) {
+ send_port_status(p, ofport, OFPPR_DELETE);
+ ofport_remove(p, ofport);
+ ofport_free(ofport);
+ }
+ } else {
+ VLOG_WARN_RL(&rl, "dpif_port_query_by_name returned unexpected error "
+ "%s", strerror(error));
+ return;
+ }
+ refresh_port_groups(p);
+}
+
+static void
+init_ports(struct ofproto *p)
+{
+ struct odp_port *ports;
+ size_t n_ports;
+ size_t i;
+ int error;
+
+ error = dpif_port_list(&p->dpif, &ports, &n_ports);
+ if (error) {
+ ofp_fatal(error, "failed to list datapath ports");
+ }
+
+ for (i = 0; i < n_ports; i++) {
+ const struct odp_port *odp_port = &ports[i];
+ if (!ofport_conflicts(p, odp_port)) {
+ struct ofport *ofport = make_ofport(odp_port);
+ if (ofport) {
+ ofport_install(p, ofport);
+ }
+ }
+ }
+ free(ports);
+ refresh_port_groups(p);
+}
+\f
+static struct ofconn *
+ofconn_create(struct ofproto *p, struct rconn *rconn)
+{
+ struct ofconn *ofconn = xmalloc(sizeof *ofconn);
+ list_push_back(&p->all_conns, &ofconn->node);
+ ofconn->rconn = rconn;
+ ofconn->pktbuf = NULL;
+ ofconn->send_flow_exp = false;
+ ofconn->miss_send_len = 0;
+ return ofconn;
+}
+
+static void
+ofconn_destroy(struct ofconn *ofconn, struct ofproto *p)
+{
+ if (p->executer) {
+ executer_rconn_closing(p->executer, ofconn->rconn);
+ }
+
+ list_remove(&ofconn->node);
+ rconn_destroy(ofconn->rconn);
+ pktbuf_destroy(ofconn->pktbuf);
+ free(ofconn);
+}
+
+static void
+ofconn_run(struct ofconn *ofconn, struct ofproto *p)
+{
+ int iteration;
+
+ rconn_run(ofconn->rconn);
+
+ /* Limit the number of iterations to prevent other tasks from starving. */
+ for (iteration = 0; iteration < 50; iteration++) {
+ struct ofpbuf *of_msg = rconn_recv(ofconn->rconn);
+ if (!of_msg) {
+ break;
+ }
+ handle_openflow(ofconn, p, of_msg);
+ ofpbuf_delete(of_msg);
+ }
+
+ if (ofconn != p->controller && !rconn_is_alive(ofconn->rconn)) {
+ ofconn_destroy(ofconn, p);
+ }
+}
+
+static void
+ofconn_wait(struct ofconn *ofconn)
+{
+ rconn_run_wait(ofconn->rconn);
+ rconn_recv_wait(ofconn->rconn);
+}
+\f
+static inline size_t
+rule_size(int n_actions)
+{
+ return (offsetof(struct rule, actions)
+ + sizeof(union ofp_action) * n_actions);
+}
+
+static struct rule *
+rule_from_cls_rule(const struct cls_rule *cls_rule)
+{
+ return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL;
+}
+
+static void
+rule_destroy(struct rule *rule)
+{
+ if (!rule->super) {
+ struct rule *subrule;
+ LIST_FOR_EACH (subrule, struct rule, list, &rule->list) {
+ subrule->super = UNKNOWN_SUPER;
+ }
+ } else if (rule->super != UNKNOWN_SUPER) {
+ list_remove(&rule->list);
+ }
+ free(rule);
+}
+
+static bool
+rule_has_out_port(const struct rule *rule, uint16_t out_port)
+{
+ const union ofp_action *oa;
+ struct actions_iterator i;
+
+ if (out_port == htons(OFPP_NONE)) {
+ return true;
+ }
+ for (oa = actions_first(&i, rule->actions, rule->n_actions); oa;
+ oa = actions_next(&i)) {
+ if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+rule_make_actions(const struct rule *rule, struct odp_actions *actions)
+{
+ const struct rule *super = rule->super ? rule->super : rule;
+ assert(!rule->cr.wc.wildcards);
+ ofp_actions_to_odp_actions(odp_port_to_ofp_port(rule->cr.flow.in_port),
+ (const struct ofp_action_header *) super->actions,
+ super->n_actions, actions);
+}
+\f
+static void
+queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn)
+{
+ update_openflow_length(msg);
+ if (rconn_send(ofconn->rconn, msg, NULL)) { /* XXX */
+ ofpbuf_delete(msg);
+ }
+}
+
+static void
+send_error(const struct ofconn *ofconn, const struct ofp_header *oh,
+ int error, const void *data, size_t len)
+{
+ struct ofpbuf *buf;
+ struct ofp_error_msg *oem;
+
+ if (!(error >> 16)) {
+ VLOG_WARN_RL(&rl, "not sending bad error code %d to controller",
+ error);
+ return;
+ }
+
+ oem = make_openflow_xid(len + sizeof *oem, OFPT_ERROR,
+ oh ? oh->xid : 0, &buf);
+ oem->type = htons((unsigned int) error >> 16);
+ oem->code = htons(error & 0xffff);
+ memcpy(oem->data, data, len);
+ queue_tx(buf, ofconn);
+}
+
+static void
+send_error_oh(const struct ofconn *ofconn, const struct ofp_header *oh,
+ int error)
+{
+ size_t oh_length = ntohs(oh->length);
+ send_error(ofconn, oh, error, oh, MIN(oh_length, 64));
+}
+
+static void
+hton_ofp_phy_port(struct ofp_phy_port *opp)
+{
+ opp->port_no = htons(opp->port_no);
+ opp->config = htonl(opp->config);
+ opp->state = htonl(opp->state);
+ opp->curr = htonl(opp->curr);
+ opp->advertised = htonl(opp->advertised);
+ opp->supported = htonl(opp->supported);
+ opp->peer = htonl(opp->peer);
+}
+
+static int
+handle_features_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
+{
+ struct ofp_switch_features *osf;
+ struct ofpbuf *buf;
+ unsigned int port_no;
+ struct ofport *port;
+
+ osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf);
+ osf->datapath_id = htonll(p->datapath_id);
+ osf->n_buffers = htonl(pktbuf_capacity());
+ osf->n_tables = 2;
+ osf->capabilities = htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS |
+ OFPC_PORT_STATS | OFPC_MULTI_PHY_TX);
+ osf->actions = htonl((1u << OFPAT_OUTPUT) |
+ (1u << OFPAT_SET_VLAN_VID) |
+ (1u << OFPAT_SET_VLAN_PCP) |
+ (1u << OFPAT_STRIP_VLAN) |
+ (1u << OFPAT_SET_DL_SRC) |
+ (1u << OFPAT_SET_DL_DST) |
+ (1u << OFPAT_SET_NW_SRC) |
+ (1u << OFPAT_SET_NW_DST) |
+ (1u << OFPAT_SET_TP_SRC) |
+ (1u << OFPAT_SET_TP_DST));
+
+ PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
+ hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
+ }
+
+ queue_tx(buf, ofconn);
+ return 0;
+}
+
+static int
+handle_get_config_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
+{
+ struct ofpbuf *buf;
+ struct ofp_switch_config *osc;
+ uint16_t flags;
+ bool drop_frags;
+
+ /* Figure out flags. */
+ dpif_get_drop_frags(&p->dpif, &drop_frags);
+ flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
+ if (ofconn->send_flow_exp) {
+ flags |= OFPC_SEND_FLOW_EXP;
+ }
+
+ /* Send reply. */
+ osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf);
+ osc->flags = htons(flags);
+ osc->miss_send_len = htons(ofconn->miss_send_len);
+ queue_tx(buf, ofconn);
+
+ return 0;
+}
+
+static int
+handle_set_config(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_switch_config *osc)
+{
+ uint16_t flags;
+ int error;
+
+ error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc);
+ if (error) {
+ return error;
+ }
+ flags = ntohs(osc->flags);
+
+ ofconn->send_flow_exp = (flags & OFPC_SEND_FLOW_EXP) != 0;
+
+ switch (flags & OFPC_FRAG_MASK) {
+ case OFPC_FRAG_NORMAL:
+ dpif_set_drop_frags(&p->dpif, false);
+ break;
+ case OFPC_FRAG_DROP:
+ dpif_set_drop_frags(&p->dpif, true);
+ break;
+ default:
+ VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
+ osc->flags);
+ break;
+ }
+
+ if ((ntohs(osc->miss_send_len) != 0) != (ofconn->miss_send_len != 0)) {
+ if (ntohs(osc->miss_send_len) != 0) {
+ ofconn->pktbuf = pktbuf_create();
+ } else {
+ pktbuf_destroy(ofconn->pktbuf);
+ }
+ }
+
+ ofconn->miss_send_len = ntohs(osc->miss_send_len);
+
+ return 0;
+}
+
+static uint16_t
+ofp_port_to_odp_port(uint16_t ofp_port)
+{
+ switch (ofp_port) {
+ case OFPP_LOCAL:
+ return ODPP_LOCAL;
+ case OFPP_NONE:
+ return ODPP_NONE;
+ default:
+ return ofp_port;
+ }
+}
+
+static uint16_t
+odp_port_to_ofp_port(uint16_t odp_port)
+{
+ switch (odp_port) {
+ case ODPP_LOCAL:
+ return OFPP_LOCAL;
+ case ODPP_NONE:
+ return OFPP_NONE;
+ default:
+ return odp_port;
+ }
+}
+
+static void
+init_actions(struct odp_actions *actions)
+{
+ actions->actions = NULL;
+ actions->n_actions = 0;
+ actions->allocated_actions = 0;
+}
+
+static void
+free_actions(struct odp_actions *actions)
+{
+ free(actions->actions);
+}
+
+static union odp_action *
+add_action(struct odp_actions *actions, uint16_t type)
+{
+ union odp_action *a;
+ if (actions->n_actions >= actions->allocated_actions) {
+ actions->actions = x2nrealloc(actions->actions,
+ &actions->allocated_actions,
+ sizeof *actions->actions);
+ }
+ a = &actions->actions[actions->n_actions++];
+ memset(a, 0, sizeof *a);
+ a->type = type;
+ return a;
+}
+
+static void
+add_output_action(struct odp_actions *actions, uint16_t port)
+{
+ add_action(actions, ODPAT_OUTPUT)->output.port = port;
+}
+
+static void
+add_output_group_action(struct odp_actions *actions, uint16_t group)
+{
+ add_action(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group;
+}
+
+static void
+add_controller_action(struct odp_actions *actions,
+ const struct ofp_action_output *oao)
+{
+ union odp_action *a = add_action(actions, ODPAT_CONTROLLER);
+ a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX;
+}
+
+static int
+ofp_to_odp_action_output(struct odp_actions *actions, uint16_t ofp_in_port,
+ const struct ofp_action_output *oao)
+{
+ switch (ntohs(oao->port)) {
+ case OFPP_IN_PORT:
+ add_output_action(actions, ofp_port_to_odp_port(ofp_in_port));
+ break;
+ case OFPP_TABLE:
+ /* XXX not implemented */
+ break;
+ case OFPP_NORMAL:
+ add_output_group_action(actions, DP_GROUP_FLOOD); /* XXX */
+ break;
+ case OFPP_FLOOD:
+ add_output_group_action(actions, DP_GROUP_FLOOD);
+ break;
+ case OFPP_ALL:
+ add_output_group_action(actions, DP_GROUP_ALL);
+ break;
+ case OFPP_CONTROLLER:
+ add_controller_action(actions, oao);
+ break;
+ case OFPP_LOCAL:
+ add_output_action(actions, ODPP_LOCAL);
+ break;
+ default:
+ if (ntohs(oao->port) != ofp_in_port) {
+ add_output_action(actions, ofp_port_to_odp_port(ntohs(oao->port)));
+ }
+ break;
+ }
+ return 0;
+}
+
+static void
+ofp_actions_to_odp_actions(uint16_t ofp_in_port,
+ const struct ofp_action_header *in_, size_t n_in,
+ struct odp_actions *out)
+{
+ union ofp_action *in = (union ofp_action *) in_;
+ struct actions_iterator iter;
+ const union ofp_action *a;
+
+ init_actions(out);
+ for (a = actions_first(&iter, in, n_in); a; a = actions_next(&iter)) {
+ uint16_t type = ntohs(a->type);
+ union odp_action *oa;
+
+ switch (type) {
+ case OFPAT_OUTPUT:
+ ofp_to_odp_action_output(out, ofp_in_port, &a->output);
+ break;
+
+ case OFPAT_SET_VLAN_VID:
+ oa = add_action(out, ODPAT_SET_VLAN_VID);
+ oa->vlan_vid.vlan_vid = a->vlan_vid.vlan_vid;
+ break;
+
+ case OFPAT_SET_VLAN_PCP:
+ oa = add_action(out, ODPAT_SET_VLAN_PCP);
+ oa->vlan_pcp.vlan_pcp = a->vlan_pcp.vlan_pcp;
+ break;
+
+ case OFPAT_STRIP_VLAN:
+ add_action(out, ODPAT_STRIP_VLAN);
+ break;
+
+ case OFPAT_SET_DL_SRC:
+ oa = add_action(out, ODPAT_SET_DL_SRC);
+ memcpy(oa->dl_addr.dl_addr,
+ ((struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN);
+ break;
+
+ case OFPAT_SET_DL_DST:
+ oa = add_action(out, ODPAT_SET_DL_DST);
+ memcpy(oa->dl_addr.dl_addr,
+ ((struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN);
+ break;
+
+ case OFPAT_SET_NW_SRC:
+ oa = add_action(out, ODPAT_SET_NW_SRC);
+ oa->nw_addr.nw_addr = a->nw_addr.nw_addr;
+ break;
+
+ case OFPAT_SET_TP_SRC:
+ oa = add_action(out, ODPAT_SET_TP_SRC);
+ oa->tp_port.tp_port = a->tp_port.tp_port;
+ break;
+
+ case OFPAT_VENDOR:
+ if (a->vendor.vendor == htonl(NX_VENDOR_ID)) {
+ const struct nx_action_snat *nas =
+ (const struct nx_action_snat *) a;
+ if (nas->subtype == htons(NXAST_SNAT)) {
+ oa = add_action(out, ODPAT_SNAT);
+ oa->snat.port = ntohs(nas->port);
+ }
+ }
+ break;
+
+ default:
+ VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type);
+ break;
+ }
+ }
+}
+
+static int
+handle_packet_out(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
+{
+ struct ofp_packet_out *opo;
+ struct ofpbuf payload, *buffer;
+ struct odp_actions actions;
+ int n_actions;
+ uint16_t in_port;
+ int error;
+
+ error = check_ofp_packet_out(oh, &payload, &n_actions);
+ if (error) {
+ return error;
+ }
+ opo = (struct ofp_packet_out *) oh;
+
+ if (opo->buffer_id != htonl(UINT32_MAX)) {
+ error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
+ &buffer, &in_port);
+ if (error) {
+ return error;
+ }
+ payload = *buffer;
+ } else {
+ buffer = NULL;
+ }
+
+ in_port = ofp_port_to_odp_port(ntohs(opo->in_port));
+ ofp_actions_to_odp_actions(ntohs(opo->in_port), opo->actions,
+ n_actions, &actions);
+ dpif_execute(&p->dpif, in_port, actions.actions, actions.n_actions,
+ &payload);
+ free_actions(&actions);
+ ofpbuf_delete(buffer);
+
+ return 0;
+}
+
+static void
+update_port_config(struct ofproto *p, struct ofport *port,
+ uint32_t config, uint32_t mask)
+{
+ mask &= config ^ port->opp.config;
+ if (mask & OFPPC_PORT_DOWN) {
+ if (config & OFPPC_PORT_DOWN) {
+ netdev_turn_flags_off(port->netdev, NETDEV_UP, true);
+ } else {
+ netdev_turn_flags_on(port->netdev, NETDEV_UP, true);
+ }
+ }
+ if (mask & OFPPC_NO_STP) {
+ /* XXX */
+ }
+ if (mask & OFPPC_NO_RECV) {
+ /* XXX */
+ }
+ if (mask & OFPPC_NO_RECV_STP) {
+ /* XXX */
+ }
+ if (mask & OFPPC_NO_FLOOD) {
+ port->opp.config ^= OFPPC_NO_FLOOD;
+ refresh_port_group(p, DP_GROUP_FLOOD);
+ }
+ if (mask & OFPPC_NO_FWD) {
+ /* XXX */
+ }
+ if (mask & OFPPC_NO_PACKET_IN) {
+ port->opp.config ^= OFPPC_NO_PACKET_IN;
+ }
+}
+
+static int
+handle_port_mod(struct ofproto *p, struct ofp_header *oh)
+{
+ const struct ofp_port_mod *opm;
+ struct ofport *port;
+ int error;
+
+ error = check_ofp_message(oh, OFPT_PORT_MOD, sizeof *opm);
+ if (error) {
+ return error;
+ }
+ opm = (struct ofp_port_mod *) oh;
+
+ port = port_array_get(&p->ports,
+ ofp_port_to_odp_port(ntohs(opm->port_no)));
+ if (!port) {
+ return ofp_mkerr(OFPET_PORT_MOD_FAILED, OFPPMFC_BAD_PORT);
+ } else if (memcmp(port->opp.hw_addr, opm->hw_addr, OFP_ETH_ALEN)) {
+ return ofp_mkerr(OFPET_PORT_MOD_FAILED, OFPPMFC_BAD_HW_ADDR);
+ } else {
+ update_port_config(p, port, ntohl(opm->config), ntohl(opm->mask));
+ if (opm->advertise) {
+ netdev_set_advertisements(port->netdev, ntohl(opm->advertise));
+ }
+ }
+ return 0;
+}
+
+static struct ofpbuf *
+make_stats_reply(uint32_t xid, uint16_t type, size_t body_len)
+{
+ struct ofp_stats_reply *osr;
+ struct ofpbuf *msg;
+
+ msg = ofpbuf_new(MIN(sizeof *osr + body_len, UINT16_MAX));
+ osr = put_openflow_xid(sizeof *osr, OFPT_STATS_REPLY, xid, msg);
+ osr->type = type;
+ osr->flags = htons(0);
+ return msg;
+}
+
+static struct ofpbuf *
+start_stats_reply(const struct ofp_stats_request *request, size_t body_len)
+{
+ return make_stats_reply(request->header.xid, request->type, body_len);
+}
+
+static void *
+append_stats_reply(size_t nbytes, struct ofconn *ofconn, struct ofpbuf **msgp)
+{
+ struct ofpbuf *msg = *msgp;
+ assert(nbytes <= UINT16_MAX - sizeof(struct ofp_stats_reply));
+ if (nbytes + msg->size > UINT16_MAX) {
+ struct ofp_stats_reply *reply = msg->data;
+ reply->flags = htons(OFPSF_REPLY_MORE);
+ *msgp = make_stats_reply(reply->header.xid, reply->type, nbytes);
+ queue_tx(msg, ofconn);
+ }
+ return ofpbuf_put_uninit(*msgp, nbytes);
+}
+
+static int
+handle_desc_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_stats_request *request)
+{
+ struct ofp_desc_stats *ods;
+ struct ofpbuf *msg;
+
+ msg = start_stats_reply(request, sizeof *ods);
+ ods = append_stats_reply(sizeof *ods, ofconn, &msg);
+ strncpy(ods->mfr_desc, p->mfr_desc, sizeof ods->mfr_desc);
+ strncpy(ods->hw_desc, p->hw_desc, sizeof ods->hw_desc);
+ strncpy(ods->sw_desc, p->sw_desc, sizeof ods->sw_desc);
+ strncpy(ods->serial_num, p->serial_desc, sizeof ods->serial_num);
+ queue_tx(msg, ofconn);
+
+ return 0;
+}
+
+static void
+count_subrules(struct cls_rule *cls_rule, void *n_subrules_)
+{
+ struct rule *rule = rule_from_cls_rule(cls_rule);
+ int *n_subrules = n_subrules_;
+
+ if (rule->super) {
+ (*n_subrules)++;
+ }
+}
+
+static int
+handle_table_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_stats_request *request)
+{
+ struct ofp_table_stats *ots;
+ struct ofpbuf *msg;
+ struct odp_stats dpstats;
+ int n_exact, n_subrules, n_wild;
+
+ msg = start_stats_reply(request, sizeof *ots * 2);
+
+ /* Count rules of various kinds. */
+ n_subrules = 0;
+ classifier_for_each_with_wildcards(&p->cls, 0,
+ count_subrules, &n_subrules);
+ n_exact = classifier_count_exact(&p->cls) - n_subrules;
+ n_wild = classifier_count(&p->cls) - classifier_count_exact(&p->cls);
+
+ /* Hash table. */
+ dpif_get_dp_stats(&p->dpif, &dpstats);
+ ots = append_stats_reply(sizeof *ots, ofconn, &msg);
+ memset(ots, 0, sizeof *ots);
+ ots->table_id = TABLEID_HASH;
+ strcpy(ots->name, "hash");
+ ots->wildcards = htonl(0);
+ ots->max_entries = htonl(dpstats.max_capacity);
+ ots->active_count = htonl(n_exact);
+ ots->lookup_count = htonll(dpstats.n_frags + dpstats.n_hit +
+ dpstats.n_missed);
+ ots->matched_count = htonll(dpstats.n_hit); /* XXX */
+
+ /* Classifier table. */
+ ots = append_stats_reply(sizeof *ots, ofconn, &msg);
+ memset(ots, 0, sizeof *ots);
+ ots->table_id = TABLEID_CLASSIFIER;
+ strcpy(ots->name, "classifier");
+ ots->wildcards = htonl(OFPFW_ALL);
+ ots->max_entries = htonl(65536);
+ ots->active_count = htonl(n_wild);
+ ots->lookup_count = htonll(0); /* XXX */
+ ots->matched_count = htonll(0); /* XXX */
+
+ queue_tx(msg, ofconn);
+ return 0;
+}
+
+static int
+handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_stats_request *request)
+{
+ struct ofp_port_stats *ops;
+ struct ofpbuf *msg;
+ struct ofport *port;
+ unsigned int port_no;
+
+ msg = start_stats_reply(request, sizeof *ops * 16);
+ PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
+ struct netdev_stats stats;
+
+ /* Intentionally ignore return value, since errors will set 'stats' to
+ * all-1s, which is correct for OpenFlow, and netdev_get_stats() will
+ * log errors. */
+ netdev_get_stats(port->netdev, &stats);
+
+ ops = append_stats_reply(sizeof *ops, ofconn, &msg);
+ ops->port_no = htons(odp_port_to_ofp_port(port_no));
+ memset(ops->pad, 0, sizeof ops->pad);
+ ops->rx_packets = htonll(stats.rx_packets);
+ ops->tx_packets = htonll(stats.tx_packets);
+ ops->rx_bytes = htonll(stats.rx_bytes);
+ ops->tx_bytes = htonll(stats.tx_bytes);
+ ops->rx_dropped = htonll(stats.rx_dropped);
+ ops->tx_dropped = htonll(stats.tx_dropped);
+ ops->rx_errors = htonll(stats.rx_errors);
+ ops->tx_errors = htonll(stats.tx_errors);
+ ops->rx_frame_err = htonll(stats.rx_frame_errors);
+ ops->rx_over_err = htonll(stats.rx_over_errors);
+ ops->rx_crc_err = htonll(stats.rx_crc_errors);
+ ops->collisions = htonll(stats.collisions);
+ }
+
+ queue_tx(msg, ofconn);
+ return 0;
+}
+
+struct flow_stats_cbdata {
+ struct ofproto *ofproto;
+ struct ofconn *ofconn;
+ uint16_t out_port;
+ struct ofpbuf *msg;
+};
+
+static void
+query_stats(struct ofproto *p, struct rule *rule,
+ uint64_t *packet_countp, uint64_t *byte_countp)
+{
+ uint64_t packet_count, byte_count;
+ struct rule *subrule;
+ struct odp_flow *odp_flows;
+ size_t n_odp_flows;
+
+ n_odp_flows = rule->cr.wc.wildcards ? list_size(&rule->list) : 1;
+ odp_flows = xcalloc(1, n_odp_flows * sizeof *odp_flows);
+ if (rule->cr.wc.wildcards) {
+ size_t i = 0;
+ LIST_FOR_EACH (subrule, struct rule, list, &rule->list) {
+ odp_flows[i++].key = subrule->cr.flow;
+ }
+ } else {
+ odp_flows[0].key = rule->cr.flow;
+ }
+
+ packet_count = rule->packet_count;
+ byte_count = rule->byte_count;
+ if (!dpif_flow_query_multiple(&p->dpif, odp_flows, n_odp_flows)) {
+ size_t i;
+ for (i = 0; i < n_odp_flows; i++) {
+ struct odp_flow *odp_flow = &odp_flows[i];
+ packet_count += odp_flow->stats.n_packets;
+ byte_count += odp_flow->stats.n_bytes;
+ }
+ }
+ free(odp_flows);
+
+ *packet_countp = packet_count;
+ *byte_countp = byte_count;
+}
+
+static void
+flow_stats_cb(struct cls_rule *rule_, void *cbdata_)
+{
+ struct rule *rule = rule_from_cls_rule(rule_);
+ struct flow_stats_cbdata *cbdata = cbdata_;
+ struct ofp_flow_stats *ofs;
+ uint64_t packet_count, byte_count;
+ size_t act_len, len;
+
+ if (rule->super || !rule_has_out_port(rule, cbdata->out_port)) {
+ return;
+ }
+
+ act_len = sizeof *rule->actions * rule->n_actions;
+ len = offsetof(struct ofp_flow_stats, actions) + act_len;
+
+ query_stats(cbdata->ofproto, rule, &packet_count, &byte_count);
+
+ ofs = append_stats_reply(len, cbdata->ofconn, &cbdata->msg);
+ ofs->length = htons(len);
+ ofs->table_id = rule->cr.wc.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH;
+ ofs->pad = 0;
+ flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &ofs->match);
+ ofs->duration = htonl((time_msec() - rule->created) / 1000);
+ ofs->priority = htons(rule->cr.priority);
+ ofs->idle_timeout = htons(rule->idle_timeout);
+ ofs->hard_timeout = htons(rule->hard_timeout);
+ memset(ofs->pad2, 0, sizeof ofs->pad2);
+ ofs->packet_count = htonll(packet_count);
+ ofs->byte_count = htonll(byte_count);
+ memcpy(ofs->actions, rule->actions, act_len);
+}
+
+static int
+table_id_to_include(uint8_t table_id)
+{
+ return (table_id == TABLEID_HASH ? CLS_INC_EXACT
+ : table_id == TABLEID_CLASSIFIER ? CLS_INC_WILD
+ : CLS_INC_EXACT | CLS_INC_WILD);
+}
+
+static int
+handle_flow_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ const struct ofp_stats_request *osr,
+ size_t arg_size)
+{
+ struct ofp_flow_stats_request *fsr;
+ struct flow_stats_cbdata cbdata;
+ struct cls_rule target;
+
+ if (arg_size != sizeof *fsr) {
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH);
+ }
+ fsr = (struct ofp_flow_stats_request *) osr->body;
+
+ cbdata.ofproto = p;
+ cbdata.ofconn = ofconn;
+ cbdata.out_port = fsr->out_port;
+ cbdata.msg = start_stats_reply(osr, 1024);
+ cls_rule_from_match(&target, &fsr->match, 0);
+ classifier_for_each_match(&p->cls, &target,
+ table_id_to_include(fsr->table_id),
+ flow_stats_cb, &cbdata);
+ queue_tx(cbdata.msg, ofconn);
+ return 0;
+}
+
+struct aggregate_stats_cbdata {
+ struct ofproto *ofproto;
+ uint16_t out_port;
+ uint64_t packet_count;
+ uint64_t byte_count;
+ uint32_t n_flows;
+};
+
+static void
+aggregate_stats_cb(struct cls_rule *rule_, void *cbdata_)
+{
+ struct rule *rule = rule_from_cls_rule(rule_);
+ struct aggregate_stats_cbdata *cbdata = cbdata_;
+ uint64_t packet_count, byte_count;
+
+ if (rule->super || !rule_has_out_port(rule, cbdata->out_port)) {
+ return;
+ }
+
+ query_stats(cbdata->ofproto, rule, &packet_count, &byte_count);
+
+ cbdata->packet_count += packet_count;
+ cbdata->byte_count += byte_count;
+ cbdata->n_flows++;
+}
+
+static int
+handle_aggregate_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ const struct ofp_stats_request *osr,
+ size_t arg_size)
+{
+ struct ofp_aggregate_stats_request *asr;
+ struct ofp_aggregate_stats_reply *reply;
+ struct aggregate_stats_cbdata cbdata;
+ struct cls_rule target;
+ struct ofpbuf *msg;
+
+ if (arg_size != sizeof *asr) {
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH);
+ }
+ asr = (struct ofp_aggregate_stats_request *) osr->body;
+
+ cbdata.ofproto = p;
+ cbdata.out_port = asr->out_port;
+ cbdata.packet_count = 0;
+ cbdata.byte_count = 0;
+ cbdata.n_flows = 0;
+ cls_rule_from_match(&target, &asr->match, 0);
+ classifier_for_each_match(&p->cls, &target,
+ table_id_to_include(asr->table_id),
+ aggregate_stats_cb, &cbdata);
+
+ msg = start_stats_reply(osr, sizeof *reply);
+ reply = append_stats_reply(sizeof *reply, ofconn, &msg);
+ reply->flow_count = htonl(cbdata.n_flows);
+ reply->packet_count = htonll(cbdata.packet_count);
+ reply->byte_count = htonll(cbdata.byte_count);
+ queue_tx(msg, ofconn);
+ return 0;
+}
+
+static int
+handle_stats_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
+{
+ struct ofp_stats_request *osr;
+ size_t arg_size;
+ int error;
+
+ error = check_ofp_message_array(oh, OFPT_STATS_REQUEST, sizeof *osr,
+ 1, &arg_size);
+ if (error) {
+ return error;
+ }
+ osr = (struct ofp_stats_request *) oh;
+
+ switch (ntohs(osr->type)) {
+ case OFPST_DESC:
+ return handle_desc_stats_request(p, ofconn, osr);
+
+ case OFPST_FLOW:
+ return handle_flow_stats_request(p, ofconn, osr, arg_size);
+
+ case OFPST_AGGREGATE:
+ return handle_aggregate_stats_request(p, ofconn, osr, arg_size);
+
+ case OFPST_TABLE:
+ return handle_table_stats_request(p, ofconn, osr);
+
+ case OFPST_PORT:
+ return handle_port_stats_request(p, ofconn, osr);
+
+ case OFPST_VENDOR:
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR);
+
+ default:
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_STAT);
+ }
+}
+
+static long long int
+msec_from_nsec(uint64_t sec, uint32_t nsec)
+{
+ return !sec ? 0 : sec * 1000 + nsec / 1000000;
+}
+
+static void
+update_time(struct rule *rule, const struct odp_flow_stats *stats)
+{
+ long long int used = msec_from_nsec(stats->used_sec, stats->used_nsec);
+ if (used > rule->used) {
+ rule->used = used;
+ }
+}
+
+static void
+update_stats(struct rule *rule, const struct odp_flow_stats *stats)
+{
+ update_time(rule, stats);
+ rule->packet_count += stats->n_packets;
+ rule->byte_count += stats->n_bytes;
+ rule->tcp_flags |= stats->tcp_flags;
+ rule->ip_tos = stats->ip_tos;
+}
+
+static int
+send_buffered(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_flow_mod *ofm, size_t n_actions,
+ int *byte_count)
+{
+ struct odp_actions actions;
+ struct ofpbuf *packet;
+ uint16_t in_port;
+ int error;
+
+ *byte_count = 0;
+ if (ofm->buffer_id == htonl(UINT32_MAX)) {
+ return 0;
+ } else if (!ofconn->pktbuf) {
+ VLOG_WARN_RL(&rl, "attempt to send buffered packet via connection "
+ "without buffers");
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+ }
+
+ error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id),
+ &packet, &in_port);
+ if (error) {
+ return error;
+ }
+
+ ofp_actions_to_odp_actions(in_port, ofm->actions, n_actions, &actions);
+ if (!dpif_execute(&p->dpif, ofp_port_to_odp_port(in_port),
+ actions.actions, actions.n_actions, packet)) {
+ *byte_count = packet->size;
+ }
+ free_actions(&actions);
+ ofpbuf_delete(packet);
+
+ return 0;
+}
+
+static int
+add_flow(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_flow_mod *ofm, size_t n_actions)
+{
+ struct rule *rule, *displaced_rule;
+ int byte_count;
+ int buffer_error = 0;
+
+ buffer_error = send_buffered(p, ofconn, ofm, n_actions, &byte_count);
+
+ rule = xmalloc(rule_size(n_actions));
+ cls_rule_from_match(&rule->cr, &ofm->match, ntohs(ofm->priority));
+ rule->idle_timeout = ntohs(ofm->idle_timeout);
+ rule->hard_timeout = ntohs(ofm->hard_timeout);
+ rule->used = rule->created = time_msec();
+ rule->packet_count = byte_count > 0;
+ rule->byte_count = byte_count;
+ rule->tcp_flags = 0;
+ rule->ip_tos = 0;
+ rule->super = NULL;
+ list_init(&rule->list);
+ rule->n_actions = n_actions;
+ memcpy(rule->actions, ofm->actions, n_actions * sizeof *rule->actions);
+
+ displaced_rule = rule_from_cls_rule(classifier_insert(&p->cls, &rule->cr));
+ if (rule->cr.wc.wildcards) {
+ if (displaced_rule) {
+ /* The displaced rule matches exactly the same packets as the new
+ * rule, and it has exactly the same priority, so we can transfer
+ * all displaced_rule's subrules to the new rule. The subrule
+ * actions might have changed, so we have to update the datapath
+ * flows, which also has the convenient side effect of zeroing the
+ * counters for those flows. */
+ struct rule *subrule;
+
+ list_splice(&rule->list, displaced_rule->list.next,
+ &displaced_rule->list);
+ LIST_FOR_EACH (subrule, struct rule, list, &rule->list) {
+ struct odp_actions actions;
+ struct odp_flow odp_flow;
+
+ subrule->super = rule;
+ ofp_actions_to_odp_actions(
+ odp_port_to_ofp_port(subrule->cr.flow.in_port),
+ (const struct ofp_action_header *) rule->actions,
+ rule->n_actions, &actions);
+ odp_flow.key = subrule->cr.flow;
+ odp_flow.actions = actions.actions;
+ odp_flow.n_actions = actions.n_actions;
+ dpif_flow_add(&p->dpif, &odp_flow);
+ free_actions(&actions);
+ }
+ rule_destroy(displaced_rule);
+ }
+ } else {
+ struct odp_flow odp_flow;
+ struct odp_actions actions;
+
+ ofp_actions_to_odp_actions(ntohs(ofm->match.in_port),
+ ofm->actions, n_actions, &actions);
+
+ odp_flow.key = rule->cr.flow;
+ odp_flow.actions = actions.actions;
+ odp_flow.n_actions = actions.n_actions;
+ dpif_flow_add(&p->dpif, &odp_flow);
+ if (displaced_rule) {
+ if (displaced_rule->super &&
+ displaced_rule->super != UNKNOWN_SUPER) {
+ update_stats(displaced_rule->super, &odp_flow.stats);
+ }
+ rule_destroy(displaced_rule);
+ }
+ }
+ return buffer_error;
+}
+
+static int
+modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm,
+ size_t n_actions, uint16_t command, struct rule *rule)
+{
+ if (rule->super) {
+ /* Subrules are invisible to the controller. */
+ return 0;
+ }
+
+ if (command == OFPFC_DELETE) {
+ if (!rule->cr.wc.wildcards) {
+ struct odp_flow odp_flow;
+ flow_from_match(&odp_flow.key, NULL, &ofm->match);
+ dpif_flow_del(&p->dpif, &odp_flow);
+ }
+ classifier_remove(&p->cls, &rule->cr);
+ rule_destroy(rule);
+ } else {
+ struct rule *old_rule = rule;
+ if (!rule->cr.wc.wildcards) {
+ struct odp_flow odp_flow;
+ struct odp_actions actions;
+
+ ofp_actions_to_odp_actions(rule->cr.flow.in_port,
+ ofm->actions, n_actions, &actions);
+ odp_flow.key = rule->cr.flow;
+ odp_flow.actions = actions.actions;
+ odp_flow.n_actions = actions.n_actions;
+ dpif_flow_add(&p->dpif, &odp_flow);
+
+ update_stats(rule, &odp_flow.stats);
+ }
+ rule = xrealloc(rule, rule_size(n_actions));
+ memcpy(rule->actions, ofm->actions, n_actions * sizeof *rule->actions);
+ cls_rule_moved(&p->cls, &old_rule->cr, &rule->cr);
+ }
+
+ if (rule->cr.wc.wildcards) {
+ p->need_revalidate = true;
+ }
+ return 0;
+}
+
+static int
+modify_flows_strict(struct ofproto *p, const struct ofp_flow_mod *ofm,
+ size_t n_actions, uint16_t command)
+{
+ struct rule *rule;
+ uint32_t wildcards;
+ flow_t flow;
+
+ flow_from_match(&flow, &wildcards, &ofm->match);
+ rule = rule_from_cls_rule(classifier_find_rule_exactly(
+ &p->cls, &flow, wildcards,
+ ntohs(ofm->priority)));
+
+ if (rule) {
+ if (command == OFPFC_DELETE
+ && ofm->out_port != htons(OFPP_NONE)
+ && !rule_has_out_port(rule, ofm->out_port)) {
+ return 0;
+ }
+
+ modify_flow(p, ofm, n_actions, command, rule);
+ }
+ return 0;
+}
+
+struct modify_flows_cbdata {
+ struct ofproto *ofproto;
+ const struct ofp_flow_mod *ofm;
+ uint16_t out_port;
+ size_t n_actions;
+ uint16_t command;
+};
+
+static void
+modify_flows_cb(struct cls_rule *rule_, void *cbdata_)
+{
+ struct rule *rule = rule_from_cls_rule(rule_);
+ struct modify_flows_cbdata *cbdata = cbdata_;
+
+ modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions,
+ cbdata->command, rule);
+}
+
+static int
+modify_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm,
+ size_t n_actions, uint16_t command)
+{
+ struct modify_flows_cbdata cbdata;
+ struct cls_rule target;
+
+ cbdata.ofproto = p;
+ cbdata.ofm = ofm;
+ cbdata.out_port = (command == OFPFC_DELETE ? ofm->out_port
+ : htons(OFPP_NONE));
+ cbdata.n_actions = n_actions;
+ cbdata.command = command;
+
+ cls_rule_from_match(&target, &ofm->match, 0);
+ classifier_for_each_match(&p->cls, &target, CLS_INC_WILD | CLS_INC_EXACT,
+ modify_flows_cb, &cbdata);
+ return 0;
+}
+
+static int
+handle_flow_mod(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_flow_mod *ofm)
+{
+ size_t n_actions;
+ int error;
+
+ error = check_ofp_message_array(&ofm->header, OFPT_FLOW_MOD, sizeof *ofm,
+ sizeof *ofm->actions, &n_actions);
+ if (error) {
+ return error;
+ }
+
+ normalize_match(&ofm->match);
+ if (!ofm->match.wildcards) {
+ ofm->priority = htons(UINT16_MAX);
+ }
+
+ error = validate_actions((const union ofp_action *) ofm->actions,
+ n_actions);
+ if (error) {
+ return error;
+ }
+
+ switch (ntohs(ofm->command)) {
+ case OFPFC_ADD:
+ return add_flow(p, ofconn, ofm, n_actions);
+
+ case OFPFC_MODIFY:
+ return modify_flows_loose(p, ofm, n_actions, OFPFC_MODIFY);
+
+ case OFPFC_MODIFY_STRICT:
+ return modify_flows_strict(p, ofm, n_actions, OFPFC_MODIFY);
+
+ case OFPFC_DELETE:
+ return modify_flows_loose(p, ofm, n_actions, OFPFC_DELETE);
+
+ case OFPFC_DELETE_STRICT:
+ return modify_flows_strict(p, ofm, n_actions, OFPFC_DELETE);
+
+ default:
+ return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_BAD_COMMAND);
+ }
+}
+
+static int
+handle_vendor(struct ofproto *p, struct ofconn *ofconn, void *msg)
+{
+ struct ofp_vendor_header *ovh = msg;
+ struct nicira_header *nh;
+
+ if (ntohs(ovh->header.length) < sizeof(struct ofp_vendor_header)) {
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH);
+ }
+ if (ovh->vendor != htonl(NX_VENDOR_ID)) {
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR);
+ }
+ if (ntohs(ovh->header.length) < sizeof(struct nicira_header)) {
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH);
+ }
+
+ nh = msg;
+ switch (ntohl(nh->subtype)) {
+ case NXT_STATUS_REQUEST:
+ return switch_status_handle_request(p->switch_status, ofconn->rconn,
+ msg);
+
+ case NXT_ACT_SET_CONFIG:
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); /* XXX */
+
+ case NXT_ACT_GET_CONFIG:
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); /* XXX */
+
+ case NXT_COMMAND_REQUEST:
+ if (p->executer) {
+ return executer_handle_request(p->executer, ofconn->rconn, msg);
+ }
+ break;
+ }
+ return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE);
+}
+
+static void
+handle_openflow(struct ofconn *ofconn, struct ofproto *p,
+ struct ofpbuf *ofp_msg)
+{
+ struct ofp_header *oh = ofp_msg->data;
+ int error;
+
+ switch (oh->type) {
+ case OFPT_ECHO_REPLY:
+ error = 0;
+ break;
+
+ case OFPT_FEATURES_REQUEST:
+ error = handle_features_request(p, ofconn, oh);
+ break;
+
+ case OFPT_GET_CONFIG_REQUEST:
+ error = handle_get_config_request(p, ofconn, oh);
+ break;
+
+ case OFPT_SET_CONFIG:
+ error = handle_set_config(p, ofconn, ofp_msg->data);
+ break;
+
+ case OFPT_PACKET_OUT:
+ error = handle_packet_out(p, ofconn, ofp_msg->data);
+ break;
+
+ case OFPT_PORT_MOD:
+ error = handle_port_mod(p, oh);
+ break;
+
+ case OFPT_FLOW_MOD:
+ error = handle_flow_mod(p, ofconn, ofp_msg->data);
+ break;
+
+ case OFPT_STATS_REQUEST:
+ error = handle_stats_request(p, ofconn, oh);
+ break;
+
+ case OFPT_VENDOR:
+ error = handle_vendor(p, ofconn, ofp_msg->data);
+ break;
+
+ default:
+ if (VLOG_IS_WARN_ENABLED()) {
+ char *s = ofp_to_string(oh, ntohs(oh->length), 2);
+ VLOG_DBG_RL(&rl, "OpenFlow message ignored: %s", s);
+ free(s);
+ }
+ error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE);
+ break;
+ }
+
+ if (error) {
+ send_error_oh(ofconn, ofp_msg->data, error);
+ }
+}
+\f
+static void
+handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
+{
+ struct odp_msg *msg = packet->data;
+ uint16_t in_port = odp_port_to_ofp_port(msg->port);
+ struct rule *rule, *subrule;
+ struct odp_actions actions;
+ struct ofpbuf payload;
+ flow_t flow;
+
+ /* Handle controller actions. */
+ if (msg->type == _ODPL_ACTION_NR) {
+ pinsched_send(p->action_sched, in_port, packet,
+ send_packet_in_action, p);
+ return;
+ }
+
+ payload.data = msg + 1;
+ payload.size = msg->length - sizeof *msg;
+ flow_extract(&payload, msg->port, &flow);
+
+ rule = rule_from_cls_rule(classifier_lookup(&p->cls, &flow));
+ if (!rule) {
+ struct ofport *port;
+
+ if (p->in_band && in_band_handle_flow_miss(p->in_band, p, msg->port,
+ &flow, &payload)) {
+ ofpbuf_delete(packet);
+ return;
+ }
+
+ if (p->fail_open && fail_open_handle_flow_miss(p->fail_open, p,
+ msg->port, &flow,
+ &payload)) {
+ ofpbuf_delete(packet);
+ return;
+ }
+
+ /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */
+ port = port_array_get(&p->ports, msg->port);
+ if (port) {
+ if (port->opp.config & OFPPC_NO_PACKET_IN) {
+ /* XXX install 'drop' flow entry */
+ ofpbuf_delete(packet);
+ return;
+ }
+ } else {
+ VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, msg->port);
+ }
+
+ pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+ return;
+ }
+
+ if (rule->cr.wc.wildcards) {
+ struct rule *old_sr;
+ struct odp_flow odp_flow;
+
+ subrule = xmalloc(sizeof *subrule);
+ cls_rule_from_flow(&subrule->cr, &flow, 0, 0);
+ subrule->idle_timeout = rule->idle_timeout;
+ subrule->hard_timeout = rule->hard_timeout;
+ subrule->used = subrule->created = time_msec();
+ subrule->packet_count = subrule->byte_count = 0;
+ subrule->tcp_flags = 0;
+ subrule->ip_tos = 0;
+ subrule->super = rule;
+ subrule->n_actions = 0;
+
+ old_sr = rule_from_cls_rule(classifier_insert(&p->cls, &subrule->cr));
+ if (old_sr) {
+ if (!old_sr->super) {
+ /* Put old_sr back. */
+ cls_rule_replace(&p->cls, &subrule->cr, &old_sr->cr);
+ free(subrule);
+
+ /* Execute old_sr on packet. */
+ rule_make_actions(old_sr, &actions);
+ dpif_execute(&p->dpif, msg->port,
+ actions.actions, actions.n_actions, &payload);
+ free_actions(&actions);
+ ofpbuf_delete(packet);
+ return;
+ } else {
+ subrule->packet_count += old_sr->packet_count;
+ subrule->byte_count += old_sr->byte_count;
+ rule_destroy(old_sr);
+ }
+ }
+ list_push_back(&rule->list, &subrule->list);
+ rule->used = time_msec();
+
+ /* Install flow entry into datapath. */
+ rule_make_actions(subrule, &actions);
+ odp_flow.key = flow;
+ odp_flow.actions = actions.actions;
+ odp_flow.n_actions = actions.n_actions;
+ dpif_flow_add(&p->dpif, &odp_flow);
+ } else {
+ /* XXX This should happen only if a flow got dropped--perhaps a hash
+ * collision? Oh, it could also indicate that the packet was buffered
+ * before we processed another packet from the same flow. */
+ subrule = rule;
+ rule_make_actions(subrule, &actions);
+ }
+
+ /* Execute subrule on packet. */
+ dpif_execute(&p->dpif, msg->port, actions.actions, actions.n_actions,
+ &payload);
+ free_actions(&actions);
+ ofpbuf_delete(packet);
+}
+\f
+static void
+revalidate_subrule(struct cls_rule *sub_, void *p_)
+{
+ struct rule *sub = rule_from_cls_rule(sub_);
+ struct ofproto *p = p_;
+ struct rule *super;
+
+ if (!sub->super) {
+ /* Not a subrule. */
+ return;
+ }
+
+ super = rule_from_cls_rule(classifier_lookup(&p->cls, &sub->cr.flow));
+ if (super != sub->super) {
+ if (!super) {
+ struct odp_flow odp_flow;
+ odp_flow.key = sub->cr.flow;
+ dpif_flow_del(&p->dpif, &odp_flow);
+ rule_destroy(sub);
+ } else {
+ struct odp_actions actions;
+
+ sub->super = super;
+ sub->hard_timeout = super->hard_timeout;
+ sub->idle_timeout = super->idle_timeout;
+ sub->created = super->created;
+ sub->used = 0;
+
+ rule_make_actions(sub, &actions);
+ dpif_flow_set_actions(&p->dpif, &sub->cr.flow, actions.actions,
+ actions.n_actions);
+ free_actions(&actions);
+ }
+ }
+}
+
+static struct ofpbuf *
+compose_flow_exp(const struct rule *rule, long long int now, uint8_t reason)
+{
+ struct ofp_flow_expired *ofe;
+ struct ofpbuf *buf;
+
+ ofe = make_openflow(sizeof *ofe, OFPT_FLOW_EXPIRED, &buf);
+ flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &ofe->match);
+ ofe->priority = htons(rule->cr.priority);
+ ofe->reason = reason;
+ ofe->duration = (now - rule->created) / 1000;
+ ofe->packet_count = rule->packet_count;
+ ofe->byte_count = rule->byte_count;
+
+ return buf;
+}
+
+static void
+send_flow_exp(struct ofproto *p, struct rule *rule,
+ long long int now, uint8_t reason)
+{
+ struct ofconn *ofconn;
+ struct ofconn *prev;
+ struct ofpbuf *buf;
+
+ prev = NULL;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ if (ofconn->send_flow_exp) {
+ if (prev) {
+ queue_tx(ofpbuf_clone(buf), prev);
+ } else {
+ buf = compose_flow_exp(rule, now, reason);
+ }
+ prev = ofconn;
+ }
+ }
+ if (prev) {
+ queue_tx(buf, prev);
+ }
+
+ if (p->netflow && !rule->cr.wc.wildcards) {
+ /* XXX this interface isn't so great */
+ struct odp_flow_stats stats;
+ stats.n_packets = rule->packet_count;
+ stats.n_bytes = rule->byte_count;
+ stats.used_sec = rule->used / 1000;
+ stats.used_nsec = rule->used % 1000 * 1000000;
+ stats.tcp_flags = rule->tcp_flags;
+ stats.ip_tos = rule->ip_tos;
+ stats.reserved = 0;
+ netflow_expire(p->netflow, &rule->cr.flow, &stats, rule->created);
+ }
+}
+
+static void
+expire_rule(struct cls_rule *cls_rule, void *p_)
+{
+ struct ofproto *p = p_;
+ struct rule *rule = rule_from_cls_rule(cls_rule);
+ long long int hard_expiration, idle_expiration, expiration, now;
+
+ hard_expiration = (rule->hard_timeout
+ ? rule->created + rule->hard_timeout * 1000
+ : LLONG_MAX);
+ idle_expiration = (rule->idle_timeout
+ && (rule->super || list_is_empty(&rule->list))
+ ? rule->used + rule->idle_timeout * 1000
+ : LLONG_MAX);
+ expiration = MIN(hard_expiration, idle_expiration);
+
+ if (expiration == LLONG_MAX) {
+ return;
+ }
+
+ now = time_msec();
+ if (now < expiration) {
+ return;
+ }
+
+ if (!rule->super) {
+ if (rule->cr.wc.wildcards) {
+ /* Update stats. (This code will be a no-op if the rule expired
+ * due to an idle timeout, because in that case the rule has no
+ * subrules left.) */
+ struct rule *subrule, *next_subrule;
+ LIST_FOR_EACH_SAFE (subrule, next_subrule,
+ struct rule, list, &rule->list) {
+ struct odp_flow odp_flow;
+ odp_flow.key = subrule->cr.flow;
+ if (!dpif_flow_del(&p->dpif, &odp_flow)) {
+ update_stats(rule, &odp_flow.stats);
+ }
+ rule_destroy(subrule);
+ }
+ } else {
+ struct odp_flow odp_flow;
+ memset(&odp_flow, 0, sizeof odp_flow);
+ odp_flow.key = rule->cr.flow;
+ dpif_flow_del(&p->dpif, &odp_flow);
+ }
+ }
+
+#if 0
+ printf("expiring rule (%s): ",
+ (now >= hard_expiration ? "hard" : "idle"));
+ flow_print(stdout, &rule->cr.flow);
+ printf("\n");
+#endif
+
+ send_flow_exp(p, rule, now,
+ (now >= hard_expiration
+ ? OFPER_HARD_TIMEOUT : OFPER_IDLE_TIMEOUT));
+ classifier_remove(&p->cls, &rule->cr);
+ rule_destroy(rule);
+}
+
+static void
+update_used(struct ofproto *p)
+{
+ struct odp_flow *flows;
+ size_t n_flows;
+ size_t i;
+ int error;
+
+ error = dpif_flow_list_all(&p->dpif, &flows, &n_flows);
+ if (error) {
+ return;
+ }
+
+ for (i = 0; i < n_flows; i++) {
+ struct odp_flow *f = &flows[i];
+ struct rule *rule;
+
+ rule = rule_from_cls_rule(
+ classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX));
+ if (!rule) {
+ printf("deleting mysterious rule from datapath\n");
+ flow_print(stdout, &f->key);
+ printf("\n");
+ dpif_flow_del(&p->dpif, f);
+ continue;
+ }
+
+ update_time(rule, &f->stats);
+ /* XXX update p->next_expiration */
+ }
+ free(flows);
+}
+
+static void
+do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id,
+ const struct ofpbuf *packet, int send_len)
+{
+ struct ofp_packet_in *opi;
+ struct ofpbuf payload, *buf;
+ struct odp_msg *msg;
+
+ msg = packet->data;
+ payload.data = msg + 1;
+ payload.size = msg->length - sizeof *msg;
+
+ send_len = MIN(send_len, payload.size);
+ buf = ofpbuf_new(sizeof *opi + send_len);
+ opi = put_openflow_xid(offsetof(struct ofp_packet_in, data),
+ OFPT_PACKET_IN, 0, buf);
+ opi->buffer_id = htonl(buffer_id);
+ opi->total_len = htons(payload.size);
+ opi->in_port = htons(odp_port_to_ofp_port(msg->port));
+ opi->reason = msg->type == _ODPL_ACTION_NR ? OFPR_ACTION : OFPR_NO_MATCH;
+ ofpbuf_put(buf, payload.data, MIN(send_len, payload.size));
+ queue_tx(buf, ofconn);
+}
+
+static void
+send_packet_in_action(struct ofpbuf *packet, void *p_)
+{
+ struct ofproto *p = p_;
+ struct ofconn *ofconn;
+ struct odp_msg *msg;
+
+ msg = packet->data;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ if (ofconn == p->controller || ofconn->miss_send_len) {
+ do_send_packet_in(ofconn, UINT32_MAX, packet, msg->arg);
+ }
+ }
+ ofpbuf_delete(packet);
+}
+
+static void
+send_packet_in_miss(struct ofpbuf *packet, void *p_)
+{
+ struct ofproto *p = p_;
+ struct ofconn *ofconn;
+ struct ofpbuf payload;
+ struct odp_msg *msg;
+
+ msg = packet->data;
+ payload.data = msg + 1;
+ payload.size = msg->length - sizeof *msg;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ if (ofconn->miss_send_len) {
+ uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
+ msg->port);
+ int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
+ : UINT32_MAX);
+ do_send_packet_in(ofconn, buffer_id, packet, send_len);
+ }
+ }
+ ofpbuf_delete(packet);
+}
+
+static uint64_t
+pick_datapath_id(struct dpif *dpif)
+{
+ struct odp_port port;
+ uint8_t ea[ETH_ADDR_LEN];
+ int error;
+
+ error = dpif_port_query_by_number(dpif, ODPP_LOCAL, &port);
+ if (!error) {
+ error = netdev_nodev_get_etheraddr(port.devname, ea);
+ if (!error) {
+ return eth_addr_to_uint64(ea);
+ }
+ VLOG_WARN("could not get MAC address for %s (%s)",
+ port.devname, strerror(error));
+ } else {
+ VLOG_WARN("datapath local port query failed (%s)", strerror(error));
+ }
+
+ eth_addr_random(ea);
+ ea[0] = 0x00; /* Set Nicira OUI. */
+ ea[1] = 0x23;
+ ea[2] = 0x20;
+ return eth_addr_to_uint64(ea);
+}
+
--- /dev/null
+/* Copyright (c) 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#ifndef OFPROTO_H
+#define OFPROTO_H 1
+
+#include "flow.h"
+
+struct dpif;
+struct ofproto;
+struct settings;
+struct switch_status;
+
+struct ofproto *ofproto_create(const struct settings *);
+void ofproto_reconfigure(struct ofproto *);
+void ofproto_run(struct ofproto *);
+void ofproto_wait(struct ofproto *);
+bool ofproto_is_alive(const struct ofproto *p);
+
+int ofproto_send_packet(struct ofproto *p, const flow_t *,
+ const union ofp_action *, size_t n_actions,
+ const struct ofpbuf *);
+void ofproto_setup_exact_flow(struct ofproto *, const flow_t *,
+ const union ofp_action *, size_t n_actions,
+ const struct ofpbuf *);
+
+#endif /* ofproto.h */
--- /dev/null
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#include <config.h>
+#include "pinsched.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include "ofpbuf.h"
+#include "openflow/openflow.h"
+#include "poll-loop.h"
+#include "port-array.h"
+#include "queue.h"
+#include "random.h"
+#include "rconn.h"
+#include "secchan.h"
+#include "status.h"
+#include "timeval.h"
+#include "vconn.h"
+
+struct pinsched {
+ /* Client-supplied parameters. */
+ int rate_limit; /* Packets added to bucket per second. */
+ int burst_limit; /* Maximum token bucket size, in packets. */
+
+ /* One queue per physical port. */
+ struct port_array queues; /* Array of "struct ofp_queue *". */
+ int n_queued; /* Sum over queues[*].n. */
+ unsigned int last_tx_port; /* Last port checked in round-robin. */
+
+ /* Token bucket.
+ *
+ * It costs 1000 tokens to send a single packet_in message. A single token
+ * per message would be more straightforward, but this choice lets us avoid
+ * round-off error in refill_bucket()'s calculation of how many tokens to
+ * add to the bucket, since no division step is needed. */
+ long long int last_fill; /* Time at which we last added tokens. */
+ int tokens; /* Current number of tokens. */
+
+ /* Transmission queue. */
+ int n_txq; /* No. of packets waiting in rconn for tx. */
+
+ /* Statistics reporting. */
+ unsigned long long n_normal; /* # txed w/o rate limit queuing. */
+ unsigned long long n_limited; /* # queued for rate limiting. */
+ unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */
+ unsigned long long n_tx_dropped; /* # dropped due to tx overflow. */
+};
+
+static struct ofpbuf *
+dequeue_packet(struct pinsched *ps, struct ofp_queue *q,
+ unsigned int port_no)
+{
+ struct ofpbuf *packet = queue_pop_head(q);
+ if (!q->n) {
+ free(q);
+ port_array_set(&ps->queues, port_no, NULL);
+ }
+ ps->n_queued--;
+ return packet;
+}
+
+/* Drop a packet from the longest queue in 'ps'. */
+static void
+drop_packet(struct pinsched *ps)
+{
+ struct ofp_queue *longest; /* Queue currently selected as longest. */
+ int n_longest; /* # of queues of same length as 'longest'. */
+ unsigned int longest_port_no;
+ unsigned int port_no;
+ struct ofp_queue *q;
+
+ longest = port_array_first(&ps->queues, &port_no);
+ longest_port_no = port_no;
+ n_longest = 1;
+ while ((q = port_array_next(&ps->queues, &port_no)) != NULL) {
+ if (longest->n < q->n) {
+ longest = q;
+ n_longest = 1;
+ } else if (longest->n == q->n) {
+ n_longest++;
+
+ /* Randomly select one of the longest queues, with a uniform
+ * distribution (Knuth algorithm 3.4.2R). */
+ if (!random_range(n_longest)) {
+ longest = q;
+ longest_port_no = port_no;
+ }
+ }
+ }
+
+ /* FIXME: do we want to pop the tail instead? */
+ ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no));
+}
+
+/* Remove and return the next packet to transmit (in round-robin order). */
+static struct ofpbuf *
+get_tx_packet(struct pinsched *ps)
+{
+ struct ofp_queue *q = port_array_next(&ps->queues, &ps->last_tx_port);
+ if (!q) {
+ q = port_array_first(&ps->queues, &ps->last_tx_port);
+ }
+ return dequeue_packet(ps, q, ps->last_tx_port);
+}
+
+/* Add tokens to the bucket based on elapsed time. */
+static void
+refill_bucket(struct pinsched *ps)
+{
+ long long int now = time_msec();
+ long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens;
+ if (tokens >= 1000) {
+ ps->last_fill = now;
+ ps->tokens = MIN(tokens, ps->burst_limit * 1000);
+ }
+}
+
+/* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns
+ * true if successful, false otherwise. (In the latter case no tokens are
+ * removed.) */
+static bool
+get_token(struct pinsched *ps)
+{
+ if (ps->tokens >= 1000) {
+ ps->tokens -= 1000;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void
+pinsched_send(struct pinsched *ps, uint16_t port_no,
+ struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux)
+{
+ if (!ps) {
+ cb(packet, aux);
+ } else if (!ps->n_queued && get_token(ps)) {
+ /* In the common case where we are not constrained by the rate limit,
+ * let the packet take the normal path. */
+ ps->n_normal++;
+ cb(packet, aux);
+ } else {
+ /* Otherwise queue it up for the periodic callback to drain out. */
+ struct ofp_queue *q;
+
+ if (ps->n_queued >= ps->burst_limit) {
+ drop_packet(ps);
+ }
+ q = port_array_get(&ps->queues, port_no);
+ if (!q) {
+ q = xmalloc(sizeof *q);
+ queue_init(q);
+ port_array_set(&ps->queues, port_no, q);
+ }
+ queue_push_tail(q, packet);
+ ps->n_queued++;
+ ps->n_limited++;
+ }
+}
+
+static void
+pinsched_status_cb(struct status_reply *sr, void *ps_)
+{
+ struct pinsched *ps = ps_;
+
+ status_reply_put(sr, "normal=%llu", ps->n_normal);
+ status_reply_put(sr, "limited=%llu", ps->n_limited);
+ status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped);
+ status_reply_put(sr, "tx-dropped=%llu", ps->n_tx_dropped);
+}
+
+void
+pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux)
+{
+ if (ps) {
+ int i;
+
+ /* Drain some packets out of the bucket if possible, but limit the
+ * number of iterations to allow other code to get work done too. */
+ refill_bucket(ps);
+ for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) {
+ cb(get_tx_packet(ps), aux);
+ }
+ }
+}
+
+void
+pinsched_wait(struct pinsched *ps)
+{
+ if (ps && ps->n_queued) {
+ if (ps->tokens >= 1000) {
+ /* We can transmit more packets as soon as we're called again. */
+ poll_immediate_wake();
+ } else {
+ /* We have to wait for the bucket to re-fill. We could calculate
+ * the exact amount of time here for increased smoothness. */
+ poll_timer_wait(TIME_UPDATE_INTERVAL / 2);
+ }
+ }
+}
+
+/* Creates and returns a scheduler for sending packet-in messages. */
+struct pinsched *
+pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss)
+{
+ struct pinsched *ps;
+
+ ps = xcalloc(1, sizeof *ps);
+ ps->rate_limit = rate_limit;
+ ps->burst_limit = burst_limit;
+ port_array_init(&ps->queues);
+ ps->n_queued = 0;
+ ps->last_tx_port = PORT_ARRAY_SIZE;
+ ps->last_fill = time_msec();
+ ps->tokens = rate_limit * 100;
+ ps->n_txq = 0;
+ ps->n_normal = 0;
+ ps->n_limited = 0;
+ ps->n_queue_dropped = 0;
+ ps->n_tx_dropped = 0;
+
+ if (ss) {
+ switch_status_register_category(ss, "rate-limit",
+ pinsched_status_cb, ps);
+ }
+
+ return ps;
+}
+
--- /dev/null
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#ifndef PINSCHED_H
+#define PINSCHED_H_H 1
+
+#include <stdint.h>
+
+struct ofpbuf;
+struct switch_status;
+
+typedef void pinsched_tx_cb(struct ofpbuf *, void *aux);
+struct pinsched *pinsched_create(int rate_limit, int burst_limit,
+ struct switch_status *);
+void pinsched_send(struct pinsched *, uint16_t port_no, struct ofpbuf *,
+ pinsched_tx_cb *, void *aux);
+void pinsched_run(struct pinsched *, pinsched_tx_cb *, void *aux);
+void pinsched_wait(struct pinsched *);
+
+#endif /* pinsched.h */
--- /dev/null
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#include <config.h>
+#include "pktbuf.h"
+#include <inttypes.h>
+#include <stdlib.h>
+#include "ofpbuf.h"
+#include "timeval.h"
+#include "util.h"
+#include "vconn.h"
+
+#define THIS_MODULE VLM_pktbuf
+#include "vlog.h"
+
+/* Buffers are identified by a 32-bit opaque ID. We divide the ID
+ * into a buffer number (low bits) and a cookie (high bits). The buffer number
+ * is an index into an array of buffers. The cookie distinguishes between
+ * different packets that have occupied a single buffer. Thus, the more
+ * buffers we have, the lower-quality the cookie... */
+#define PKTBUF_BITS 8
+#define PKTBUF_MASK (PKTBUF_CNT - 1)
+#define PKTBUF_CNT (1u << PKTBUF_BITS)
+
+#define COOKIE_BITS (32 - PKTBUF_BITS)
+#define COOKIE_MAX ((1u << COOKIE_BITS) - 1)
+
+#define OVERWRITE_MSECS 5000
+
+struct packet {
+ struct ofpbuf *buffer;
+ uint32_t cookie;
+ long long int timeout;
+ uint16_t in_port;
+};
+
+struct pktbuf {
+ struct packet packets[PKTBUF_CNT];
+ unsigned int buffer_idx;
+};
+
+int
+pktbuf_capacity(void)
+{
+ return PKTBUF_CNT;
+}
+
+struct pktbuf *
+pktbuf_create(void)
+{
+ return xcalloc(1, sizeof *pktbuf_create());
+}
+
+void
+pktbuf_destroy(struct pktbuf *pb)
+{
+ if (pb) {
+ size_t i;
+
+ for (i = 0; i < PKTBUF_CNT; i++) {
+ ofpbuf_delete(pb->packets[i].buffer);
+ }
+ free(pb);
+ }
+}
+
+uint32_t
+pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
+{
+ struct packet *p = &pb->packets[pb->buffer_idx];
+ pb->buffer_idx = (pb->buffer_idx + 1) & PKTBUF_MASK;
+ if (p->buffer) {
+ if (time_msec() < p->timeout) {
+ return UINT32_MAX;
+ }
+ ofpbuf_delete(p->buffer);
+ }
+
+ /* Don't use maximum cookie value since all-1-bits ID is special. */
+ if (++p->cookie >= COOKIE_MAX) {
+ p->cookie = 0;
+ }
+ p->buffer = ofpbuf_clone(buffer);
+ p->timeout = time_msec() + OVERWRITE_MSECS;
+ p->in_port = in_port;
+ return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+}
+
+int
+pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
+ uint16_t *in_port)
+{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 20);
+ struct packet *p;
+ int error;
+
+ p = &pb->packets[id & PKTBUF_MASK];
+ if (p->cookie == id >> PKTBUF_BITS) {
+ struct ofpbuf *buffer = p->buffer;
+ if (buffer) {
+ *bufferp = buffer;
+ *in_port = p->in_port;
+ p->buffer = NULL;
+ return 0;
+ } else {
+ VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
+ error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
+ }
+ } else {
+ VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
+ id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
+ error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+ }
+ *bufferp = NULL;
+ *in_port = -1;
+ return error;
+}
+
+void
+pktbuf_discard(struct pktbuf *pb, uint32_t id)
+{
+ struct packet *p = &pb->packets[id & PKTBUF_MASK];
+ if (p->cookie == id >> PKTBUF_BITS) {
+ ofpbuf_delete(p->buffer);
+ p->buffer = NULL;
+ }
+}
--- /dev/null
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
+ * Junior University
+ *
+ * We are making the OpenFlow specification and associated documentation
+ * (Software) available for public use and benefit with the expectation
+ * that others will use, modify and enhance the Software and contribute
+ * those enhancements back to the community. However, since we would
+ * like to make the Software available for broadest use, with as few
+ * restrictions as possible permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this Software to deal in
+ * the Software under the copyrights without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * The name and trademarks of copyright holder(s) may NOT be used in
+ * advertising or publicity pertaining to the Software or any
+ * derivatives without specific, written prior permission.
+ */
+
+#ifndef PKTBUF_H
+#define PKTBUF_H 1
+
+#include <stdint.h>
+
+struct pktbuf;
+struct ofpbuf;
+
+int pktbuf_capacity(void);
+
+struct pktbuf *pktbuf_create(void);
+void pktbuf_destroy(struct pktbuf *);
+uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
+ uint16_t *in_port);
+void pktbuf_discard(struct pktbuf *, uint32_t id);
+
+#endif /* pktbuf.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "port-watcher.h"
-#include <arpa/inet.h>
-#include <assert.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include "dynamic-string.h"
-#include "netdev.h"
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "poll-loop.h"
-#include "port-array.h"
-#include "rconn.h"
-#include "shash.h"
-#include "svec.h"
-#include "timeval.h"
-#include "vconn.h"
-#include "xtoxll.h"
-
-#define THIS_MODULE VLM_port_watcher
-#include "vlog.h"
-
-struct port_watcher_cb {
- port_changed_cb_func *port_changed;
- void *aux;
-};
-
-struct port_watcher_local_cb {
- local_port_changed_cb_func *local_port_changed;
- void *aux;
-};
-
-struct port_watcher {
- struct rconn *local_rconn;
- struct rconn *remote_rconn;
- struct port_array ports;
- time_t last_feature_request;
- bool got_feature_reply;
- uint64_t datapath_id;
- int n_txq;
- struct port_watcher_cb cbs[2];
- int n_cbs;
- struct port_watcher_local_cb local_cbs[4];
- int n_local_cbs;
- char local_port_name[OFP_MAX_PORT_NAME_LEN + 1];
- struct netdev_monitor *mon;
- struct shash port_by_name;
-};
-
-/* Returns the number of fields that differ from 'a' to 'b'. */
-static int
-opp_differs(const struct ofp_phy_port *a, const struct ofp_phy_port *b)
-{
- BUILD_ASSERT_DECL(sizeof *a == 48); /* Trips when we add or remove fields. */
- return ((a->port_no != b->port_no)
- + (memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) != 0)
- + (memcmp(a->name, b->name, sizeof a->name) != 0)
- + (a->config != b->config)
- + (a->state != b->state)
- + (a->curr != b->curr)
- + (a->advertised != b->advertised)
- + (a->supported != b->supported)
- + (a->peer != b->peer));
-}
-
-static void
-sanitize_opp(struct ofp_phy_port *opp)
-{
- size_t i;
-
- for (i = 0; i < sizeof opp->name; i++) {
- char c = opp->name[i];
- if (c && (c < 0x20 || c > 0x7e)) {
- opp->name[i] = '.';
- }
- }
- opp->name[sizeof opp->name - 1] = '\0';
-}
-
-static void
-call_port_changed_callbacks(struct port_watcher *pw, int port_no,
- const struct ofp_phy_port *old,
- const struct ofp_phy_port *new)
-{
- int i;
- for (i = 0; i < pw->n_cbs; i++) {
- port_changed_cb_func *port_changed = pw->cbs[i].port_changed;
- (port_changed)(port_no, old, new, pw->cbs[i].aux);
- }
-}
-
-void
-get_port_name(const struct ofp_phy_port *port, char *name, size_t name_size)
-{
- char *p;
-
- memcpy(name, port->name, MIN(name_size, sizeof port->name));
- name[name_size - 1] = '\0';
- for (p = name; *p != '\0'; p++) {
- if (*p < 32 || *p > 126) {
- *p = '.';
- }
- }
-}
-
-static struct ofp_phy_port *
-lookup_port(const struct port_watcher *pw, uint16_t port_no)
-{
- return port_array_get(&pw->ports, port_no);
-}
-
-static void
-call_local_port_changed_callbacks(struct port_watcher *pw)
-{
- char name[OFP_MAX_PORT_NAME_LEN + 1];
- const struct ofp_phy_port *port;
- int i;
-
- /* Pass the local port to the callbacks, if it exists.
- Pass a null pointer if there is no local port. */
- port = lookup_port(pw, OFPP_LOCAL);
-
- /* Log the name of the local port. */
- if (port) {
- get_port_name(port, name, sizeof name);
- } else {
- name[0] = '\0';
- }
- if (strcmp(pw->local_port_name, name)) {
- if (name[0]) {
- VLOG_INFO("Identified data path local port as \"%s\".", name);
- } else {
- VLOG_WARN("Data path has no local port.");
- }
- strcpy(pw->local_port_name, name);
- }
-
- /* Invoke callbacks. */
- for (i = 0; i < pw->n_local_cbs; i++) {
- local_port_changed_cb_func *cb = pw->local_cbs[i].local_port_changed;
- (cb)(port, pw->local_cbs[i].aux);
- }
-}
-
-static void
-update_phy_port(struct port_watcher *pw, struct ofp_phy_port *opp,
- uint8_t reason)
-{
- struct ofp_phy_port *old;
- uint16_t port_no;
-
- port_no = ntohs(opp->port_no);
- old = lookup_port(pw, port_no);
-
- if (reason == OFPPR_DELETE && old) {
- call_port_changed_callbacks(pw, port_no, old, NULL);
- free(old);
- port_array_set(&pw->ports, port_no, NULL);
- } else if (reason == OFPPR_MODIFY || reason == OFPPR_ADD) {
- if (old) {
- uint32_t s_mask = htonl(OFPPS_STP_MASK);
- opp->state = (opp->state & ~s_mask) | (old->state & s_mask);
- }
- if (!old || opp_differs(opp, old)) {
- struct ofp_phy_port new = *opp;
- sanitize_opp(&new);
- call_port_changed_callbacks(pw, port_no, old, &new);
- if (old) {
- *old = new;
- } else {
- port_array_set(&pw->ports, port_no, xmemdup(&new, sizeof new));
- }
- }
- }
-}
-
-static void
-update_netdev_monitor_devices(struct port_watcher *pw)
-{
- struct ofp_phy_port *p;
- struct svec netdevs;
- unsigned int port_no;
-
- svec_init(&netdevs);
- shash_clear(&pw->port_by_name);
- for (p = port_array_first(&pw->ports, &port_no); p;
- p = port_array_next(&pw->ports, &port_no)) {
- const char *name = (const char *) p->name;
- svec_add(&netdevs, name);
- shash_add(&pw->port_by_name, name, p);
- }
- netdev_monitor_set_devices(pw->mon, netdevs.names, netdevs.n);
- svec_destroy(&netdevs);
-}
-
-static bool
-port_watcher_local_packet_cb(struct relay *r, void *pw_)
-{
- struct port_watcher *pw = pw_;
- struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf;
- struct ofp_header *oh = msg->data;
-
- if (oh->type == OFPT_FEATURES_REPLY
- && msg->size >= offsetof(struct ofp_switch_features, ports)) {
- struct ofp_switch_features *osf = msg->data;
- bool seen[PORT_ARRAY_SIZE];
- struct ofp_phy_port *p;
- unsigned int port_no;
- size_t n_ports;
- size_t i;
-
- pw->got_feature_reply = true;
- if (pw->datapath_id != osf->datapath_id) {
- pw->datapath_id = osf->datapath_id;
- VLOG_INFO("Datapath id is %012"PRIx64, ntohll(pw->datapath_id));
- }
-
- /* Update each port included in the message. */
- memset(seen, false, sizeof seen);
- n_ports = ((msg->size - offsetof(struct ofp_switch_features, ports))
- / sizeof *osf->ports);
- for (i = 0; i < n_ports; i++) {
- struct ofp_phy_port *opp = &osf->ports[i];
- update_phy_port(pw, opp, OFPPR_MODIFY);
- seen[ntohs(opp->port_no)] = true;
- }
-
- /* Delete all the ports not included in the message. */
- for (p = port_array_first(&pw->ports, &port_no); p;
- p = port_array_next(&pw->ports, &port_no)) {
- if (!seen[port_no]) {
- update_phy_port(pw, p, OFPPR_DELETE);
- }
- }
-
- update_netdev_monitor_devices(pw);
-
- call_local_port_changed_callbacks(pw);
- } else if (oh->type == OFPT_PORT_STATUS
- && msg->size >= sizeof(struct ofp_port_status)) {
- struct ofp_port_status *ops = msg->data;
- update_phy_port(pw, &ops->desc, ops->reason);
- if (ops->desc.port_no == htons(OFPP_LOCAL)) {
- call_local_port_changed_callbacks(pw);
- }
- if (ops->reason == OFPPR_ADD || OFPPR_DELETE) {
- update_netdev_monitor_devices(pw);
- }
- }
- return false;
-}
-
-static void
-bring_netdev_up_or_down(const char *name, bool down)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- struct netdev *netdev;
- int retval;
-
- retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev);
- if (!retval) {
- if (down) {
- retval = netdev_turn_flags_off(netdev, NETDEV_UP, true);
- } else {
- retval = netdev_turn_flags_on(netdev, NETDEV_UP, true);
- }
- if (retval) {
- VLOG_WARN_RL(&rl, "failed to bring network device %s %s: %s",
- name, down ? "down" : "up", strerror(retval));
- }
- netdev_close(netdev);
- } else {
- VLOG_WARN_RL(&rl, "failed to open network device %s: %s",
- name, strerror(retval));
- }
-}
-
-static bool
-port_watcher_remote_packet_cb(struct relay *r, void *pw_)
-{
- struct port_watcher *pw = pw_;
- struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf;
- struct ofp_header *oh = msg->data;
-
- if (oh->type == OFPT_PORT_MOD
- && msg->size >= sizeof(struct ofp_port_mod)) {
- struct ofp_port_mod *opm = msg->data;
- uint16_t port_no = ntohs(opm->port_no);
- struct ofp_phy_port *pw_opp = lookup_port(pw, port_no);
- if (pw_opp->port_no != htons(OFPP_NONE)) {
- struct ofp_phy_port old = *pw_opp;
- pw_opp->config = ((pw_opp->config & ~opm->mask)
- | (opm->config & opm->mask));
- call_port_changed_callbacks(pw, port_no, &old, pw_opp);
- if (pw_opp->port_no == htons(OFPP_LOCAL)) {
- call_local_port_changed_callbacks(pw);
- }
-
- if (opm->mask & htonl(OFPPC_PORT_DOWN)) {
- bring_netdev_up_or_down((const char *) pw_opp->name,
- opm->config & htonl(OFPPC_PORT_DOWN));
- }
- }
- }
- return false;
-}
-
-/* Sets 'bit' in '*word' to 0 or 1 according to 'value'. */
-static void
-set_bit(uint32_t bit, bool value, uint32_t *word)
-{
- if (value) {
- *word |= bit;
- } else {
- *word &= ~bit;
- }
-}
-
-static void
-port_watcher_periodic_cb(void *pw_)
-{
- struct port_watcher *pw = pw_;
- const char *name;
-
- if (!pw->got_feature_reply
- && time_now() >= pw->last_feature_request + 5
- && rconn_is_connected(pw->local_rconn)) {
- struct ofpbuf *b;
- make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &b);
- rconn_send_with_limit(pw->local_rconn, b, &pw->n_txq, 1);
- pw->last_feature_request = time_now();
- }
-
- netdev_monitor_run(pw->mon);
- while ((name = netdev_monitor_poll(pw->mon)) != NULL) {
- struct ofp_phy_port *opp;
- struct ofp_phy_port new_opp;
- enum netdev_flags flags;
- int retval;
-
- opp = shash_find_data(&pw->port_by_name, name);
- if (!opp) {
- continue;
- }
-
- retval = netdev_nodev_get_flags(name, &flags);
- if (retval) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_WARN_RL(&rl, "could not get flags for %s", name);
- continue;
- }
-
- new_opp = *opp;
- set_bit(htonl(OFPPC_PORT_DOWN), flags & NETDEV_UP, &new_opp.config);
- set_bit(htonl(OFPPS_LINK_DOWN), flags & NETDEV_CARRIER,
- &new_opp.state);
- if (opp->config != new_opp.config || opp->state != new_opp.state) {
- struct ofp_port_status *ops;
- struct ofpbuf *b;
-
- /* Notify other secchan modules. */
- update_phy_port(pw, &new_opp, OFPPR_MODIFY);
- if (new_opp.port_no == htons(OFPP_LOCAL)) {
- call_local_port_changed_callbacks(pw);
- }
-
- /* Notify the controller that the flags changed. */
- ops = make_openflow(sizeof *ops, OFPT_PORT_STATUS, &b);
- ops->reason = OFPPR_MODIFY;
- ops->desc = new_opp;
- rconn_send(pw->remote_rconn, b, NULL);
- }
- }
-}
-
-static void
-port_watcher_wait_cb(void *pw_)
-{
- struct port_watcher *pw = pw_;
- if (!pw->got_feature_reply && rconn_is_connected(pw->local_rconn)) {
- if (pw->last_feature_request != TIME_MIN) {
- poll_timer_wait(pw->last_feature_request + 5 - time_now());
- } else {
- poll_immediate_wake();
- }
- }
- netdev_monitor_wait(pw->mon);
-}
-
-static void
-put_duplexes(struct ds *ds, const char *name, uint32_t features,
- uint32_t hd_bit, uint32_t fd_bit)
-{
- if (features & (hd_bit | fd_bit)) {
- ds_put_format(ds, " %s", name);
- if (features & hd_bit) {
- ds_put_cstr(ds, "(HD)");
- }
- if (features & fd_bit) {
- ds_put_cstr(ds, "(FD)");
- }
- }
-}
-
-static void
-put_features(struct ds *ds, const char *name, uint32_t features)
-{
- if (features & (OFPPF_10MB_HD | OFPPF_10MB_FD
- | OFPPF_100MB_HD | OFPPF_100MB_FD
- | OFPPF_1GB_HD | OFPPF_1GB_FD | OFPPF_10GB_FD)) {
- ds_put_cstr(ds, name);
- put_duplexes(ds, "10M", features, OFPPF_10MB_HD, OFPPF_10MB_FD);
- put_duplexes(ds, "100M", features,
- OFPPF_100MB_HD, OFPPF_100MB_FD);
- put_duplexes(ds, "1G", features, OFPPF_1GB_HD, OFPPF_1GB_FD);
- if (features & OFPPF_10GB_FD) {
- ds_put_cstr(ds, " 10G");
- }
- if (features & OFPPF_AUTONEG) {
- ds_put_cstr(ds, " AUTO_NEG");
- }
- if (features & OFPPF_PAUSE) {
- ds_put_cstr(ds, " PAUSE");
- }
- if (features & OFPPF_PAUSE_ASYM) {
- ds_put_cstr(ds, " PAUSE_ASYM");
- }
- }
-}
-
-static void
-log_port_status(uint16_t port_no,
- const struct ofp_phy_port *old,
- const struct ofp_phy_port *new,
- void *aux UNUSED)
-{
- if (VLOG_IS_DBG_ENABLED()) {
- if (old && new && (opp_differs(old, new)
- == ((old->config != new->config)
- + (old->state != new->state))))
- {
- /* Don't care if only state or config changed. */
- } else if (!new) {
- if (old) {
- VLOG_DBG("Port %d deleted", port_no);
- }
- } else {
- struct ds ds = DS_EMPTY_INITIALIZER;
- uint32_t curr = ntohl(new->curr);
- uint32_t supported = ntohl(new->supported);
- ds_put_format(&ds, "\"%s\", "ETH_ADDR_FMT, new->name,
- ETH_ADDR_ARGS(new->hw_addr));
- if (curr) {
- put_features(&ds, ", current", curr);
- }
- if (supported) {
- put_features(&ds, ", supports", supported);
- }
- VLOG_DBG("Port %d %s: %s",
- port_no, old ? "changed" : "added", ds_cstr(&ds));
- ds_destroy(&ds);
- }
- }
-}
-
-void
-port_watcher_register_callback(struct port_watcher *pw,
- port_changed_cb_func *port_changed,
- void *aux)
-{
- assert(pw->n_cbs < ARRAY_SIZE(pw->cbs));
- pw->cbs[pw->n_cbs].port_changed = port_changed;
- pw->cbs[pw->n_cbs].aux = aux;
- pw->n_cbs++;
-}
-
-void
-port_watcher_register_local_port_callback(struct port_watcher *pw,
- local_port_changed_cb_func *cb,
- void *aux)
-{
- assert(pw->n_local_cbs < ARRAY_SIZE(pw->local_cbs));
- pw->local_cbs[pw->n_local_cbs].local_port_changed = cb;
- pw->local_cbs[pw->n_local_cbs].aux = aux;
- pw->n_local_cbs++;
-}
-
-uint32_t
-port_watcher_get_config(const struct port_watcher *pw, uint16_t port_no)
-{
- struct ofp_phy_port *p = lookup_port(pw, port_no);
- return p ? ntohl(p->config) : 0;
-}
-
-const char *
-port_watcher_get_name(const struct port_watcher *pw, uint16_t port_no)
-{
- struct ofp_phy_port *p = lookup_port(pw, port_no);
- return p ? (const char *) p->name : NULL;
-}
-
-const uint8_t *
-port_watcher_get_hwaddr(const struct port_watcher *pw, uint16_t port_no)
-{
- struct ofp_phy_port *p = lookup_port(pw, port_no);
- return p ? p->hw_addr : NULL;
-}
-
-void
-port_watcher_set_flags(struct port_watcher *pw, uint16_t port_no,
- uint32_t config, uint32_t c_mask,
- uint32_t state, uint32_t s_mask)
-{
- struct ofp_phy_port old;
- struct ofp_phy_port *p;
- struct ofp_port_mod *opm;
- struct ofp_port_status *ops;
- struct ofpbuf *b;
-
- p = lookup_port(pw, port_no);
- if (!p) {
- return;
- }
-
- if (!((ntohl(p->state) ^ state) & s_mask)
- && (!((ntohl(p->config) ^ config) & c_mask))) {
- return;
- }
- old = *p;
-
- /* Update our idea of the flags. */
- p->config = htonl((ntohl(p->config) & ~c_mask) | (config & c_mask));
- p->state = htonl((ntohl(p->state) & ~s_mask) | (state & s_mask));
- call_port_changed_callbacks(pw, port_no, &old, p);
-
- /* Change the flags in the datapath. */
- opm = make_openflow(sizeof *opm, OFPT_PORT_MOD, &b);
- opm->port_no = p->port_no;
- memcpy(opm->hw_addr, p->hw_addr, OFP_ETH_ALEN);
- opm->config = p->config;
- opm->mask = htonl(c_mask);
- opm->advertise = htonl(0);
- rconn_send(pw->local_rconn, b, NULL);
-
- /* Notify the controller that the flags changed. */
- ops = make_openflow(sizeof *ops, OFPT_PORT_STATUS, &b);
- ops->reason = OFPPR_MODIFY;
- ops->desc = *p;
- rconn_send(pw->remote_rconn, b, NULL);
-}
-
-bool
-port_watcher_is_ready(const struct port_watcher *pw)
-{
- return pw->got_feature_reply;
-}
-
-static const struct hook_class port_watcher_hook_class = {
- port_watcher_local_packet_cb, /* local_packet_cb */
- port_watcher_remote_packet_cb, /* remote_packet_cb */
- port_watcher_periodic_cb, /* periodic_cb */
- port_watcher_wait_cb, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-port_watcher_start(struct secchan *secchan,
- struct rconn *local_rconn, struct rconn *remote_rconn,
- struct port_watcher **pwp)
-{
- struct port_watcher *pw;
- int retval;
-
- pw = *pwp = xcalloc(1, sizeof *pw);
- pw->local_rconn = local_rconn;
- pw->remote_rconn = remote_rconn;
- pw->last_feature_request = TIME_MIN;
- port_array_init(&pw->ports);
- pw->local_port_name[0] = '\0';
- retval = netdev_monitor_create(&pw->mon);
- if (retval) {
- ofp_fatal(retval, "failed to start network device monitoring");
- }
- shash_init(&pw->port_by_name);
- port_watcher_register_callback(pw, log_port_status, NULL);
- add_hook(secchan, &port_watcher_hook_class, pw);
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef PORT_WATCHER_H
-#define PORT_WATCHER_H 1
-
-#include <stdint.h>
-#include "compiler.h"
-#include "secchan.h"
-
-struct ofp_phy_port;
-struct port_watcher;
-struct secchan;
-
-void port_watcher_start(struct secchan *,
- struct rconn *local, struct rconn *remote,
- struct port_watcher **);
-bool port_watcher_is_ready(const struct port_watcher *);
-uint32_t port_watcher_get_config(const struct port_watcher *,
- uint16_t port_no);
-const char *port_watcher_get_name(const struct port_watcher *,
- uint16_t port_no) UNUSED;
-const uint8_t *port_watcher_get_hwaddr(const struct port_watcher *,
- uint16_t port_no);
-void port_watcher_set_flags(struct port_watcher *, uint16_t port_no,
- uint32_t config, uint32_t c_mask,
- uint32_t state, uint32_t s_mask);
-
-typedef void port_changed_cb_func(uint16_t port_no,
- const struct ofp_phy_port *old,
- const struct ofp_phy_port *new,
- void *aux);
-
-void port_watcher_register_callback(struct port_watcher *,
- port_changed_cb_func *port_changed,
- void *aux);
-
-typedef void local_port_changed_cb_func(const struct ofp_phy_port *new,
- void *aux);
-
-void port_watcher_register_local_port_callback(struct port_watcher *pw,
- local_port_changed_cb_func *cb,
- void *aux);
-
-void get_port_name(const struct ofp_phy_port *, char *name, size_t name_size);
-
-#endif /* port-watcher.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "ratelimit.h"
-#include <arpa/inet.h>
-#include <stdlib.h>
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "poll-loop.h"
-#include "queue.h"
-#include "rconn.h"
-#include "secchan.h"
-#include "status.h"
-#include "timeval.h"
-#include "vconn.h"
-
-struct rate_limiter {
- const struct settings *s;
- struct rconn *remote_rconn;
-
- /* One queue per physical port. */
- struct ofp_queue queues[OFPP_MAX];
- int n_queued; /* Sum over queues[*].n. */
- int next_tx_port; /* Next port to check in round-robin. */
-
- /* Token bucket.
- *
- * It costs 1000 tokens to send a single packet_in message. A single token
- * per message would be more straightforward, but this choice lets us avoid
- * round-off error in refill_bucket()'s calculation of how many tokens to
- * add to the bucket, since no division step is needed. */
- long long int last_fill; /* Time at which we last added tokens. */
- int tokens; /* Current number of tokens. */
-
- /* Transmission queue. */
- int n_txq; /* No. of packets waiting in rconn for tx. */
-
- /* Statistics reporting. */
- unsigned long long n_normal; /* # txed w/o rate limit queuing. */
- unsigned long long n_limited; /* # queued for rate limiting. */
- unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */
- unsigned long long n_tx_dropped; /* # dropped due to tx overflow. */
-};
-
-/* Drop a packet from the longest queue in 'rl'. */
-static void
-drop_packet(struct rate_limiter *rl)
-{
- struct ofp_queue *longest; /* Queue currently selected as longest. */
- int n_longest; /* # of queues of same length as 'longest'. */
- struct ofp_queue *q;
-
- longest = &rl->queues[0];
- n_longest = 1;
- for (q = &rl->queues[0]; q < &rl->queues[OFPP_MAX]; q++) {
- if (longest->n < q->n) {
- longest = q;
- n_longest = 1;
- } else if (longest->n == q->n) {
- n_longest++;
-
- /* Randomly select one of the longest queues, with a uniform
- * distribution (Knuth algorithm 3.4.2R). */
- if (!random_range(n_longest)) {
- longest = q;
- }
- }
- }
-
- /* FIXME: do we want to pop the tail instead? */
- ofpbuf_delete(queue_pop_head(longest));
- rl->n_queued--;
-}
-
-/* Remove and return the next packet to transmit (in round-robin order). */
-static struct ofpbuf *
-dequeue_packet(struct rate_limiter *rl)
-{
- unsigned int i;
-
- for (i = 0; i < OFPP_MAX; i++) {
- unsigned int port = (rl->next_tx_port + i) % OFPP_MAX;
- struct ofp_queue *q = &rl->queues[port];
- if (q->n) {
- rl->next_tx_port = (port + 1) % OFPP_MAX;
- rl->n_queued--;
- return queue_pop_head(q);
- }
- }
- NOT_REACHED();
-}
-
-/* Add tokens to the bucket based on elapsed time. */
-static void
-refill_bucket(struct rate_limiter *rl)
-{
- const struct settings *s = rl->s;
- long long int now = time_msec();
- long long int tokens = (now - rl->last_fill) * s->rate_limit + rl->tokens;
- if (tokens >= 1000) {
- rl->last_fill = now;
- rl->tokens = MIN(tokens, s->burst_limit * 1000);
- }
-}
-
-/* Attempts to remove enough tokens from 'rl' to transmit a packet. Returns
- * true if successful, false otherwise. (In the latter case no tokens are
- * removed.) */
-static bool
-get_token(struct rate_limiter *rl)
-{
- if (rl->tokens >= 1000) {
- rl->tokens -= 1000;
- return true;
- } else {
- return false;
- }
-}
-
-static bool
-rate_limit_local_packet_cb(struct relay *r, void *rl_)
-{
- struct rate_limiter *rl = rl_;
- const struct settings *s = rl->s;
- struct ofp_packet_in *opi;
-
- opi = get_ofp_packet_in(r);
- if (!opi) {
- return false;
- }
-
- if (opi->reason == OFPR_ACTION) {
- /* Don't rate-limit 'ofp-packet_in's generated by flows that the
- * controller set up. XXX we should really just rate-limit them
- * *separately* so that no one can flood the controller this way. */
- return false;
- }
-
- if (!rl->n_queued && get_token(rl)) {
- /* In the common case where we are not constrained by the rate limit,
- * let the packet take the normal path. */
- rl->n_normal++;
- return false;
- } else {
- /* Otherwise queue it up for the periodic callback to drain out. */
- struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf;
- int port = ntohs(opi->in_port) % OFPP_MAX;
- if (rl->n_queued >= s->burst_limit) {
- drop_packet(rl);
- }
- queue_push_tail(&rl->queues[port], ofpbuf_clone(msg));
- rl->n_queued++;
- rl->n_limited++;
- return true;
- }
-}
-
-static void
-rate_limit_status_cb(struct status_reply *sr, void *rl_)
-{
- struct rate_limiter *rl = rl_;
-
- status_reply_put(sr, "normal=%llu", rl->n_normal);
- status_reply_put(sr, "limited=%llu", rl->n_limited);
- status_reply_put(sr, "queue-dropped=%llu", rl->n_queue_dropped);
- status_reply_put(sr, "tx-dropped=%llu", rl->n_tx_dropped);
-}
-
-static void
-rate_limit_periodic_cb(void *rl_)
-{
- struct rate_limiter *rl = rl_;
- int i;
-
- /* Drain some packets out of the bucket if possible, but limit the number
- * of iterations to allow other code to get work done too. */
- refill_bucket(rl);
- for (i = 0; rl->n_queued && get_token(rl) && i < 50; i++) {
- /* Use a small, arbitrary limit for the amount of queuing to do here,
- * because the TCP connection is responsible for buffering and there is
- * no point in trying to transmit faster than the TCP connection can
- * handle. */
- struct ofpbuf *b = dequeue_packet(rl);
- if (rconn_send_with_limit(rl->remote_rconn, b, &rl->n_txq, 10)) {
- rl->n_tx_dropped++;
- }
- }
-}
-
-static void
-rate_limit_wait_cb(void *rl_)
-{
- struct rate_limiter *rl = rl_;
- if (rl->n_queued) {
- if (rl->tokens >= 1000) {
- /* We can transmit more packets as soon as we're called again. */
- poll_immediate_wake();
- } else {
- /* We have to wait for the bucket to re-fill. We could calculate
- * the exact amount of time here for increased smoothness. */
- poll_timer_wait(TIME_UPDATE_INTERVAL / 2);
- }
- }
-}
-
-static const struct hook_class rate_limit_hook_class = {
- rate_limit_local_packet_cb, /* local_packet_cb */
- NULL, /* remote_packet_cb */
- rate_limit_periodic_cb, /* periodic_cb */
- rate_limit_wait_cb, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-rate_limit_start(struct secchan *secchan, const struct settings *s,
- struct switch_status *ss, struct rconn *remote)
-{
- struct rate_limiter *rl;
- size_t i;
-
- rl = xcalloc(1, sizeof *rl);
- rl->s = s;
- rl->remote_rconn = remote;
- for (i = 0; i < ARRAY_SIZE(rl->queues); i++) {
- queue_init(&rl->queues[i]);
- }
- rl->last_fill = time_msec();
- rl->tokens = s->rate_limit * 100;
- switch_status_register_category(ss, "rate-limit",
- rate_limit_status_cb, rl);
- add_hook(secchan, &rate_limit_hook_class, rl);
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef RATELIMIT_H
-#define RATELIMIT_H 1
-
-struct rconn;
-struct secchan;
-struct settings;
-struct switch_status;
-
-void rate_limit_start(struct secchan *, const struct settings *,
- struct switch_status *, struct rconn *remote);
-
-#endif /* ratelimit.h */
Listens for connections on Unix domain server socket named \fIfile\fR.
.RE
-.TP
-\fB-m\fR, \fB--monitor=\fImethod\fR
-Configures the switch to additionally listen for incoming OpenFlow
-connections for switch monitoring with \fBdpctl\fR's \fBmonitor\fR
-command. The \fImethod\fR must be given as one of the passive
-OpenFlow connection methods listed above as acceptable for
-\fB--listen\fR.
-
-When \fBdpctl monitor\fR makes a monitoring connection, \fBsecchan\fR
-sends it a copy of every OpenFlow message sent to or received from the
-kernel in the normal course of its operations. It does not send a
-copy of any messages sent to or from the OpenFlow connection to the
-controller. Most of these messages will be seen anyhow, however,
-because \fBsecchan\fR mainly acts as a relay between the controller
-and the kernel. \fBsecchan\fR also does not send a copy of any
-messages sent to or from the OpenFlow connection to the controller.
-Such messages will typically \fBnot\fR be seen, because \fBsecchan\fR
-maintains a separate connection to the kernel for each management
-connection.
-
-Messages are copied to the monitoring connections on a best-effort
-basis. In particular, if the socket buffer of the monitoring
-connection fills up, some messages will be lost.
-
.TP
\fB--in-band\fR, \fB--out-of-band\fR
Configures \fBsecchan\fR to operate in in-band or out-of-band control
#include <assert.h>
#include <errno.h>
#include <getopt.h>
+#include <inttypes.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <signal.h>
#include "daemon.h"
#include "dirs.h"
#include "discovery.h"
-#include "executer.h"
+#include "dpif.h"
#include "fail-open.h"
#include "fault.h"
#include "in-band.h"
#include "leak-checker.h"
#include "list.h"
+#include "netdev.h"
#include "ofpbuf.h"
+#include "ofproto.h"
#include "openflow/openflow.h"
#include "packets.h"
-#include "port-watcher.h"
#include "poll-loop.h"
-#include "ratelimit.h"
#include "rconn.h"
#include "signals.h"
-#ifdef SUPPORT_SNAT
-#include "snat.h"
-#endif
-#include "flow-end.h"
-#include "stp-secchan.h"
#include "status.h"
#include "timeval.h"
#include "util.h"
#include "vlog.h"
#define THIS_MODULE VLM_secchan
-struct hook {
- const struct hook_class *class;
- void *aux;
-};
-
-struct secchan {
- struct hook *hooks;
- size_t n_hooks, allocated_hooks;
-};
-
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
-
-static void reconfigure(struct secchan *);
+static void reconfigure(struct ofproto *);
static void parse_options(int argc, char *argv[], struct settings *);
static void usage(void) NO_RETURN;
-static char *vconn_name_without_subscription(const char *);
-static struct pvconn *open_passive_vconn(const char *name);
-static struct vconn *accept_vconn(struct pvconn *pvconn);
-
-static struct relay *relay_create(struct rconn *async,
- struct rconn *local, struct rconn *remote,
- bool is_mgmt_conn);
-static struct relay *relay_accept(const struct settings *, struct pvconn *);
-static void relay_run(struct relay *, struct secchan *);
-static void relay_wait(struct relay *);
-static void relay_destroy(struct relay *);
-
int
main(int argc, char *argv[])
{
- struct settings s;
-
- struct list relays = LIST_INITIALIZER(&relays);
-
- struct secchan secchan;
-
- struct pvconn *monitor;
-
- struct pvconn *listeners[MAX_MGMT];
- size_t n_listeners;
-
struct signal *sighup;
- char *local_rconn_name;
- struct rconn *async_rconn, *local_rconn, *remote_rconn;
- struct relay *controller_relay;
- struct discovery *discovery;
- struct switch_status *switch_status;
- struct port_watcher *pw;
- int i;
+ struct ofproto *ofproto;
+ struct settings s;
int retval;
set_program_name(argv[0]);
signal(SIGPIPE, SIG_IGN);
sighup = signal_register(SIGHUP);
- secchan.hooks = NULL;
- secchan.n_hooks = 0;
- secchan.allocated_hooks = 0;
-
- /* Start listening for management and monitoring connections. */
- n_listeners = 0;
- for (i = 0; i < s.n_listeners; i++) {
- listeners[n_listeners++] = open_passive_vconn(s.listener_names[i]);
- }
- monitor = s.monitor_name ? open_passive_vconn(s.monitor_name) : NULL;
-
- /* Initialize switch status hook. */
- switch_status_start(&secchan, &s, &switch_status);
-
die_if_already_running();
daemonize();
VLOG_INFO("OpenFlow reference implementation version %s", VERSION BUILDNR);
VLOG_INFO("OpenFlow protocol version 0x%02x", OFP_VERSION);
- /* Check datapath name, to try to catch command-line invocation errors. */
- if (strncmp(s.dp_name, "nl:", 3) && strncmp(s.dp_name, "unix:", 5)
- && !s.controller_name) {
- VLOG_WARN("Controller not specified and datapath is not nl: or "
- "unix:. (Did you forget to specify the datapath?)");
- }
-
- if (!strncmp(s.dp_name, "nl:", 3)) {
- /* Connect to datapath with a subscription for asynchronous events. By
- * separating the connection for asynchronous events from that for
- * request and replies we prevent the socket receive buffer from being
- * filled up by received packet data, which in turn would prevent
- * getting replies to any Netlink messages we send to the kernel. */
- async_rconn = rconn_create(0, s.max_backoff);
- rconn_connect(async_rconn, s.dp_name);
- switch_status_register_category(switch_status, "async",
- rconn_status_cb, async_rconn);
- } else {
- /* No need for a separate asynchronous connection: we must be connected
- * to the user datapath, which is smart enough to discard packet events
- * instead of message replies. In fact, having a second connection
- * would work against us since we'd get double copies of asynchronous
- * event messages (the user datapath provides no way to turn off
- * asynchronous events). */
- async_rconn = NULL;
- }
-
- /* Connect to datapath without a subscription, for requests and replies. */
- local_rconn_name = vconn_name_without_subscription(s.dp_name);
- local_rconn = rconn_create(0, s.max_backoff);
- rconn_connect(local_rconn, local_rconn_name);
- free(local_rconn_name);
- switch_status_register_category(switch_status, "local",
- rconn_status_cb, local_rconn);
-
- /* Connect to controller. */
- remote_rconn = rconn_create(s.probe_interval, s.max_backoff);
- if (s.controller_name) {
- retval = rconn_connect(remote_rconn, s.controller_name);
- if (retval == EAFNOSUPPORT) {
- ofp_fatal(0, "No support for %s vconn", s.controller_name);
- }
- }
- switch_status_register_category(switch_status, "remote",
- rconn_status_cb, remote_rconn);
-
- /* Start relaying. */
- controller_relay = relay_create(async_rconn, local_rconn, remote_rconn,
- false);
- list_push_back(&relays, &controller_relay->node);
-
- /* Set up hooks. */
- port_watcher_start(&secchan, local_rconn, remote_rconn, &pw);
- discovery = s.discovery ? discovery_init(&s, pw, switch_status) : NULL;
-#ifdef SUPPORT_SNAT
- snat_start(&secchan, pw);
-#endif
- flow_end_start(&secchan, &s, local_rconn, remote_rconn);
- if (s.enable_stp) {
- stp_start(&secchan, pw, local_rconn, remote_rconn);
- }
- if (s.in_band) {
- in_band_start(&secchan, &s, switch_status, pw, remote_rconn);
- }
- if (s.fail_mode == FAIL_OPEN) {
- fail_open_start(&secchan, &s, switch_status,
- local_rconn, remote_rconn);
- }
- if (s.rate_limit) {
- rate_limit_start(&secchan, &s, switch_status, remote_rconn);
- }
- if (s.command_acl[0]) {
- executer_start(&secchan, &s);
- }
-
- reconfigure(&secchan);
-
- while (s.discovery || rconn_is_alive(remote_rconn)) {
- struct relay *r, *n;
- size_t i;
+ /* Start OpenFlow processing. */
+ ofproto = ofproto_create(&s);
+ reconfigure(ofproto);
+ while (ofproto_is_alive(ofproto)) {
if (signal_poll(sighup)) {
- reconfigure(&secchan);
+ reconfigure(ofproto);
}
/* Do work. */
- LIST_FOR_EACH_SAFE (r, n, struct relay, node, &relays) {
- relay_run(r, &secchan);
- }
- for (i = 0; i < n_listeners; i++) {
- for (;;) {
- struct relay *r = relay_accept(&s, listeners[i]);
- if (!r) {
- break;
- }
- list_push_back(&relays, &r->node);
- }
- }
- if (monitor) {
- struct vconn *new = accept_vconn(monitor);
- if (new) {
- /* XXX should monitor async_rconn too but rconn_add_monitor()
- * takes ownership of the vconn passed in. */
- rconn_add_monitor(local_rconn, new);
- }
- }
- for (i = 0; i < secchan.n_hooks; i++) {
- if (secchan.hooks[i].class->periodic_cb) {
- secchan.hooks[i].class->periodic_cb(secchan.hooks[i].aux);
- }
- }
- if (s.discovery) {
- char *controller_name;
- if (rconn_is_connectivity_questionable(remote_rconn)) {
- discovery_question_connectivity(discovery);
- }
- if (discovery_run(discovery, &controller_name)) {
- if (controller_name) {
- rconn_connect(remote_rconn, controller_name);
- } else {
- rconn_disconnect(remote_rconn);
- }
- }
- }
+ ofproto_run(ofproto);
/* Wait for something to happen. */
- LIST_FOR_EACH (r, struct relay, node, &relays) {
- relay_wait(r);
- }
- for (i = 0; i < n_listeners; i++) {
- pvconn_wait(listeners[i]);
- }
- if (monitor) {
- pvconn_wait(monitor);
- }
- for (i = 0; i < secchan.n_hooks; i++) {
- if (secchan.hooks[i].class->wait_cb) {
- secchan.hooks[i].class->wait_cb(secchan.hooks[i].aux);
- }
- }
- if (discovery) {
- discovery_wait(discovery);
- }
+ ofproto_wait(ofproto);
signal_wait(sighup);
poll_block();
}
}
static void
-reconfigure(struct secchan *secchan)
+reconfigure(struct ofproto *ofproto)
{
- int i;
-
cfg_read();
- for (i = 0; i < secchan->n_hooks; i++) {
- if (secchan->hooks[i].class->reconfigure_cb) {
- secchan->hooks[i].class->reconfigure_cb(secchan->hooks[i].aux);
- }
- }
-}
-
-static struct pvconn *
-open_passive_vconn(const char *name)
-{
- struct pvconn *pvconn;
- int retval;
-
- retval = pvconn_open(name, &pvconn);
- if (retval && retval != EAGAIN) {
- ofp_fatal(retval, "opening %s", name);
- }
- return pvconn;
-}
-
-static struct vconn *
-accept_vconn(struct pvconn *pvconn)
-{
- struct vconn *new;
- int retval;
-
- retval = pvconn_accept(pvconn, OFP_VERSION, &new);
- if (retval && retval != EAGAIN) {
- VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval));
- }
- return new;
-}
-
-void
-add_hook(struct secchan *secchan, const struct hook_class *class, void *aux)
-{
- struct hook *hook;
-
- if (secchan->n_hooks >= secchan->allocated_hooks) {
- secchan->hooks = x2nrealloc(secchan->hooks, &secchan->allocated_hooks,
- sizeof *secchan->hooks);
- }
- hook = &secchan->hooks[secchan->n_hooks++];
- hook->class = class;
- hook->aux = aux;
-}
-
-struct ofp_packet_in *
-get_ofp_packet_in(struct relay *r)
-{
- struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf;
- struct ofp_header *oh = msg->data;
- if (oh->type == OFPT_PACKET_IN) {
- if (msg->size >= offsetof (struct ofp_packet_in, data)) {
- return msg->data;
- } else {
- VLOG_WARN("packet too short (%zu bytes) for packet_in",
- msg->size);
- }
- }
- return NULL;
-}
-
-bool
-get_ofp_packet_eth_header(struct relay *r, struct ofp_packet_in **opip,
- struct eth_header **ethp)
-{
- const int min_len = offsetof(struct ofp_packet_in, data) + ETH_HEADER_LEN;
- struct ofp_packet_in *opi = get_ofp_packet_in(r);
- if (opi && ntohs(opi->header.length) >= min_len) {
- *opip = opi;
- *ethp = (void *) opi->data;
- return true;
- }
- return false;
-}
-\f
-/* OpenFlow message relaying. */
-
-/* Returns a malloc'd string containing a copy of 'vconn_name' modified not to
- * subscribe to asynchronous messages such as 'ofp_packet_in' events (if
- * possible). */
-static char *
-vconn_name_without_subscription(const char *vconn_name)
-{
- int nl_index;
- if (sscanf(vconn_name, "nl:%d", &nl_index) == 1) {
- /* nl:123 or nl:123:1 opens a netlink connection to local datapath 123.
- * nl:123:0 opens a netlink connection to local datapath 123 without
- * obtaining a subscription for ofp_packet_in or ofp_flow_expired
- * messages. */
- return xasprintf("nl:%d:0", nl_index);
- } else {
- /* We don't have a way to specify not to subscribe to those messages
- * for other transports. (That's a defect: really this should be in
- * the OpenFlow protocol, not the Netlink transport). */
- VLOG_WARN_RL(&rl, "new management connection will receive "
- "asynchronous messages");
- return xstrdup(vconn_name);
- }
-}
-
-static struct relay *
-relay_accept(const struct settings *s, struct pvconn *pvconn)
-{
- struct vconn *new_remote, *new_local;
- struct rconn *r1, *r2;
- char *vconn_name;
- int retval;
-
- new_remote = accept_vconn(pvconn);
- if (!new_remote) {
- return NULL;
- }
-
- vconn_name = vconn_name_without_subscription(s->dp_name);
- retval = vconn_open(vconn_name, OFP_VERSION, &new_local);
- if (retval) {
- VLOG_ERR_RL(&rl, "could not connect to %s (%s)",
- vconn_name, strerror(retval));
- vconn_close(new_remote);
- free(vconn_name);
- return NULL;
- }
-
- /* Create and return relay. */
- r1 = rconn_create(0, 0);
- rconn_connect_unreliably(r1, vconn_name, new_local);
- free(vconn_name);
-
- r2 = rconn_create(0, 0);
- rconn_connect_unreliably(r2, "passive", new_remote);
-
- return relay_create(NULL, r1, r2, true);
-}
-
-static struct relay *
-relay_create(struct rconn *async, struct rconn *local, struct rconn *remote,
- bool is_mgmt_conn)
-{
- struct relay *r = xcalloc(1, sizeof *r);
- r->halves[HALF_LOCAL].rconn = local;
- r->halves[HALF_REMOTE].rconn = remote;
- r->is_mgmt_conn = is_mgmt_conn;
- r->async_rconn = async;
- return r;
-}
-
-static bool
-call_local_packet_cbs(struct secchan *secchan, struct relay *r)
-{
- const struct hook *h;
- for (h = secchan->hooks; h < &secchan->hooks[secchan->n_hooks]; h++) {
- bool (*cb)(struct relay *, void *aux) = h->class->local_packet_cb;
- if (cb && (cb)(r, h->aux)) {
- return true;
- }
- }
- return false;
-}
-
-static bool
-call_remote_packet_cbs(struct secchan *secchan, struct relay *r)
-{
- const struct hook *h;
- for (h = secchan->hooks; h < &secchan->hooks[secchan->n_hooks]; h++) {
- bool (*cb)(struct relay *, void *aux) = h->class->remote_packet_cb;
- if (cb && (cb)(r, h->aux)) {
- return true;
- }
- }
- return false;
-}
-
-static void
-relay_run(struct relay *r, struct secchan *secchan)
-{
- int iteration;
- int i;
-
- if (r->async_rconn) {
- rconn_run(r->async_rconn);
- }
- for (i = 0; i < 2; i++) {
- rconn_run(r->halves[i].rconn);
- }
-
- /* Limit the number of iterations to prevent other tasks from starving. */
- for (iteration = 0; iteration < 50; iteration++) {
- bool progress = false;
- for (i = 0; i < 2; i++) {
- struct half *this = &r->halves[i];
- struct half *peer = &r->halves[!i];
-
- if (!this->rxbuf) {
- this->rxbuf = rconn_recv(this->rconn);
- if (!this->rxbuf && i == HALF_LOCAL && r->async_rconn) {
- this->rxbuf = rconn_recv(r->async_rconn);
- }
- if (this->rxbuf && (i == HALF_REMOTE || !r->is_mgmt_conn)) {
- if (i == HALF_LOCAL
- ? call_local_packet_cbs(secchan, r)
- : call_remote_packet_cbs(secchan, r))
- {
- ofpbuf_delete(this->rxbuf);
- this->rxbuf = NULL;
- progress = true;
- break;
- }
- }
- }
-
- if (this->rxbuf && !this->n_txq) {
- int retval = rconn_send(peer->rconn, this->rxbuf,
- &this->n_txq);
- if (retval != EAGAIN) {
- if (!retval) {
- progress = true;
- } else {
- ofpbuf_delete(this->rxbuf);
- }
- this->rxbuf = NULL;
- }
- }
- }
- if (!progress) {
- break;
- }
- }
-
- if (r->is_mgmt_conn) {
- for (i = 0; i < 2; i++) {
- struct half *this = &r->halves[i];
- if (!rconn_is_alive(this->rconn)) {
- relay_destroy(r);
- return;
- }
- }
- }
-}
-
-static void
-relay_wait(struct relay *r)
-{
- int i;
-
- if (r->async_rconn) {
- rconn_run_wait(r->async_rconn);
- }
- for (i = 0; i < 2; i++) {
- struct half *this = &r->halves[i];
-
- rconn_run_wait(this->rconn);
- if (!this->rxbuf) {
- rconn_recv_wait(this->rconn);
- if (i == HALF_LOCAL && r->async_rconn) {
- rconn_recv_wait(r->async_rconn);
- }
- }
- }
-}
-
-static void
-relay_destroy(struct relay *r)
-{
- int i;
-
- list_remove(&r->node);
- rconn_destroy(r->async_rconn);
- for (i = 0; i < 2; i++) {
- struct half *this = &r->halves[i];
- rconn_destroy(this->rconn);
- ofpbuf_delete(this->rxbuf);
- }
- free(r);
+ ofproto_reconfigure(ofproto);
}
\f
/* User interface. */
parse_options(int argc, char *argv[], struct settings *s)
{
enum {
- OPT_ACCEPT_VCONN = UCHAR_MAX + 1,
+ OPT_DATAPATH_ID = UCHAR_MAX + 1,
+ OPT_MANUFACTURER,
+ OPT_HARDWARE,
+ OPT_SOFTWARE,
+ OPT_SERIAL,
+ OPT_ACCEPT_VCONN,
OPT_NO_RESOLV_CONF,
OPT_BR_NAME,
OPT_FAIL_MODE,
LEAK_CHECKER_OPTION_ENUMS
};
static struct option long_options[] = {
+ {"datapath-id", required_argument, 0, OPT_DATAPATH_ID},
+ {"manufacturer", required_argument, 0, OPT_MANUFACTURER},
+ {"hardware", required_argument, 0, OPT_HARDWARE},
+ {"software", required_argument, 0, OPT_SOFTWARE},
+ {"serial", required_argument, 0, OPT_SERIAL},
{"accept-vconn", required_argument, 0, OPT_ACCEPT_VCONN},
{"no-resolv-conf", no_argument, 0, OPT_NO_RESOLV_CONF},
{"config", required_argument, 0, 'F'},
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
{"max-backoff", required_argument, 0, OPT_MAX_BACKOFF},
{"listen", required_argument, 0, 'l'},
- {"monitor", required_argument, 0, 'm'},
{"rate-limit", optional_argument, 0, OPT_RATE_LIMIT},
{"burst-limit", required_argument, 0, OPT_BURST_LIMIT},
{"stp", no_argument, 0, OPT_STP},
{0, 0, 0, 0},
};
char *short_options = long_options_to_short_options(long_options);
- char *accept_re = NULL;
- int retval;
/* Set defaults that we can figure out before parsing options. */
+ s->datapath_id = 0;
+ s->mfr_desc = "Nicira Networks, Inc.";
+ s->hw_desc = "Reference Implementation";
+ s->sw_desc = VERSION BUILDNR;
+ s->serial_desc = "None";
s->n_listeners = 0;
- s->monitor_name = NULL;
s->fail_mode = FAIL_OPEN;
s->max_idle = 15;
s->probe_interval = 15;
s->update_resolv_conf = true;
s->rate_limit = 0;
s->burst_limit = 0;
+ s->accept_controller_re = NULL;
s->enable_stp = false;
s->in_band = true;
s->command_acl = "";
}
switch (c) {
+ case OPT_DATAPATH_ID:
+ if (strlen(optarg) != 12
+ || strspn(optarg, "0123456789abcdefABCDEF") != 12) {
+ ofp_fatal(0, "argument to --datapath-id must be "
+ "exactly 12 hex digits");
+ }
+ s->datapath_id = strtoll(optarg, NULL, 16);
+ if (!s->datapath_id) {
+ ofp_fatal(0, "argument to --datapath-id must be nonzero");
+ }
+ break;
+
+ case OPT_MANUFACTURER:
+ s->mfr_desc = optarg;
+ break;
+
+ case OPT_HARDWARE:
+ s->hw_desc = optarg;
+ break;
+
+ case OPT_SOFTWARE:
+ s->sw_desc = optarg;
+ break;
+
+ case OPT_SERIAL:
+ s->serial_desc = optarg;
+ break;
+
case OPT_ACCEPT_VCONN:
- accept_re = optarg[0] == '^' ? optarg : xasprintf("^%s", optarg);
+ s->accept_controller_re = (optarg[0] == '^' ? optarg
+ : xasprintf("^%s", optarg));
break;
case OPT_BR_NAME:
s->listener_names[s->n_listeners++] = optarg;
break;
- case 'm':
- if (s->monitor_name) {
- ofp_fatal(0, "-m or --monitor may only be specified once");
- }
- s->monitor_name = optarg;
- break;
-
case 'h':
usage();
s->controller_name = argc > 1 ? xstrdup(argv[1]) : NULL;
/* Set accept_controller_regex. */
- if (!accept_re) {
- accept_re = vconn_ssl_is_configured() ? "^ssl:.*" : ".*";
+ if (!s->accept_controller_re) {
+ s->accept_controller_re = vconn_ssl_is_configured() ? "^ssl:.*" : ".*";
}
- retval = regcomp(&s->accept_controller_regex, accept_re,
- REG_NOSUB | REG_EXTENDED);
- if (retval) {
- size_t length = regerror(retval, &s->accept_controller_regex, NULL, 0);
- char *buffer = xmalloc(length);
- regerror(retval, &s->accept_controller_regex, buffer, length);
- ofp_fatal(0, "%s: %s", accept_re, buffer);
- }
- s->accept_controller_re = accept_re;
/* Mode of operation. */
s->discovery = s->controller_name == NULL;
{
printf("%s: secure channel, a relay for OpenFlow messages.\n"
"usage: %s [OPTIONS] DATAPATH [CONTROLLER]\n"
- "DATAPATH is an active connection method to a local datapath.\n"
+ "DATAPATH is a local datapath (e.g. \"dp0\").\n"
"CONTROLLER is an active OpenFlow connection method; if it is\n"
"omitted, then secchan performs controller discovery.\n",
program_name, program_name);
printf("\nConfiguration options:\n"
" -F, --config=FILE|DIR reads configuration from FILE or DIR\n"
" --br-name=NAME bridge name to use for configuration\n"
+ "\nOpenFlow options:\n"
+ " -d, --datapath-id=ID Use ID as the OpenFlow switch ID\n"
+ " (ID must consist of 12 hex digits)\n"
+ " --manufacturer=MFR Identify manufacturer as MFR\n"
+ " --hardware=HW Identify hardware as HW\n"
+ " --software=SW Identify software as SW\n"
+ " --serial=SERIAL Identify serial number as SERIAL\n"
"\nController discovery options:\n"
" --accept-vconn=REGEX accept matching discovered controllers\n"
" --no-resolv-conf do not update /etc/resolv.conf\n"
" attempts (default: 15 seconds)\n"
" -l, --listen=METHOD allow management connections on METHOD\n"
" (a passive OpenFlow connection method)\n"
- " -m, --monitor=METHOD copy traffic to/from kernel to METHOD\n"
- " (a passive OpenFlow connection method)\n"
" --out-of-band controller connection is out-of-band\n"
" --stp enable 802.1D Spanning Tree Protocol\n"
" --no-stp disable 802.1D Spanning Tree Protocol\n"
#ifndef SECCHAN_H
#define SECCHAN_H 1
-#include <regex.h>
#include <stdbool.h>
#include <stddef.h>
#include "list.h"
-#include "packets.h"
-
-struct secchan;
/* Behavior when the connection to the controller fails. */
enum fail_mode {
bool discovery; /* Discover the controller automatically? */
bool in_band; /* Connect to controller in-band? */
+ /* Datapath. */
+ uint64_t datapath_id; /* Datapath ID. */
+ const char *dp_name; /* Name of local datapath. */
+
+ /* Description strings. */
+ const char *mfr_desc; /* Manufacturer. */
+ const char *hw_desc; /* Hardware. */
+ const char *sw_desc; /* Software version. */
+ const char *serial_desc; /* Serial number. */
+
/* Related vconns and network devices. */
- const char *dp_name; /* Local datapath. */
const char *controller_name; /* Controller (if not discovery mode). */
const char *listener_names[MAX_MGMT]; /* Listen for mgmt connections. */
size_t n_listeners; /* Number of mgmt connection listeners. */
- const char *monitor_name; /* Listen for traffic monitor connections. */
/* Failure behavior. */
enum fail_mode fail_mode; /* Act as learning switch if no controller? */
int burst_limit; /* Maximum number token bucket size. */
/* Discovery behavior. */
- regex_t accept_controller_regex; /* Controller vconns to accept. */
- const char *accept_controller_re; /* String version of regex. */
+ const char *accept_controller_re; /* Controller vconns to accept. */
bool update_resolv_conf; /* Update /etc/resolv.conf? */
/* Spanning tree protocol. */
char *command_dir; /* Directory that contains commands. */
};
-struct half {
- struct rconn *rconn;
- struct ofpbuf *rxbuf;
- int n_txq; /* No. of packets queued for tx on 'rconn'. */
-};
-
-struct relay {
- struct list node;
-
-#define HALF_LOCAL 0
-#define HALF_REMOTE 1
- struct half halves[2];
-
- /* The secchan has a primary connection (relay) to an OpenFlow controller.
- * This primary connection actually makes two connections to the datapath:
- * one for OpenFlow requests and responses, and one that is only used for
- * receiving asynchronous events such as 'ofp_packet_in' events. This
- * design keeps replies to OpenFlow requests from being dropped by the
- * kernel due to a flooded network device.
- *
- * The secchan may also have any number of secondary "management"
- * connections (relays). These connections do not receive asychronous
- * events and thus have a null 'async_rconn'. */
- bool is_mgmt_conn; /* Is this a management connection? */
- struct rconn *async_rconn; /* For receiving asynchronous events. */
-};
-
-struct hook_class {
- bool (*local_packet_cb)(struct relay *, void *aux);
- bool (*remote_packet_cb)(struct relay *, void *aux);
- void (*periodic_cb)(void *aux);
- void (*wait_cb)(void *aux);
- void (*closing_cb)(struct relay *, void *aux);
- void (*reconfigure_cb)(void *aux);
-};
-
-void add_hook(struct secchan *, const struct hook_class *, void *);
-
-struct ofp_packet_in *get_ofp_packet_in(struct relay *);
-bool get_ofp_packet_eth_header(struct relay *, struct ofp_packet_in **,
- struct eth_header **);
-void get_ofp_packet_payload(struct ofp_packet_in *, struct ofpbuf *);
-
-
#endif /* secchan.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "snat.h"
-#include <arpa/inet.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include "openflow/nicira-ext.h"
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "port-watcher.h"
-
-#define THIS_MODULE VLM_snat
-#include "vlog.h"
-
-struct snat_port_conf {
- struct list node;
- struct nx_snat_config config;
-};
-
-struct snat_data {
- struct port_watcher *pw;
- struct list port_list;
-};
-
-
-/* Source-NAT configuration monitor. */
-#define SNAT_CMD_LEN 1024
-
-/* Commands to configure iptables. There is no programmatic interface
- * to iptables from the kernel, so we're stuck making command-line calls
- * in user-space. */
-#define SNAT_FLUSH_ALL_CMD "/sbin/iptables -t nat -F"
-#define SNAT_FLUSH_CHAIN_CMD "/sbin/iptables -t nat -F of-snat-%s"
-
-#define SNAT_ADD_CHAIN_CMD "/sbin/iptables -t nat -N of-snat-%s"
-#define SNAT_CONF_CHAIN_CMD "/sbin/iptables -t nat -A POSTROUTING -o %s -j of-snat-%s"
-
-#define SNAT_ADD_IP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT --to %s-%s"
-#define SNAT_ADD_TCP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p TCP --to %s-%s:%d-%d"
-#define SNAT_ADD_UDP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p UDP --to %s-%s:%d-%d"
-
-#define SNAT_UNSET_CHAIN_CMD "/sbin/iptables -t nat -D POSTROUTING -o %s -j of-snat-%s"
-#define SNAT_DEL_CHAIN_CMD "/sbin/iptables -t nat -X of-snat-%s"
-
-static void
-snat_add_rules(const struct nx_snat_config *sc, const uint8_t *dev_name)
-{
- char command[SNAT_CMD_LEN];
- char ip_str_start[16];
- char ip_str_end[16];
-
-
- snprintf(ip_str_start, sizeof ip_str_start, IP_FMT,
- IP_ARGS(&sc->ip_addr_start));
- snprintf(ip_str_end, sizeof ip_str_end, IP_FMT,
- IP_ARGS(&sc->ip_addr_end));
-
- /* We always attempt to remove existing entries, so that we know
- * there's a pristine state for SNAT on the interface. We just ignore
- * the results of these calls, since iptables will complain about
- * any non-existent entries. */
-
- /* Flush the chain that does the SNAT. */
- snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name);
- system(command);
-
- /* We always try to create the a new chain. */
- snprintf(command, sizeof(command), SNAT_ADD_CHAIN_CMD, dev_name);
- system(command);
-
- /* Disassociate any old SNAT chain from the POSTROUTING chain. */
- snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name,
- dev_name);
- system(command);
-
- /* Associate the new chain with the POSTROUTING hook. */
- snprintf(command, sizeof(command), SNAT_CONF_CHAIN_CMD, dev_name,
- dev_name);
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem flushing chain for add");
- return;
- }
-
- /* If configured, restrict TCP source port ranges. */
- if ((sc->tcp_start != 0) && (sc->tcp_end != 0)) {
- snprintf(command, sizeof(command), SNAT_ADD_TCP_CMD,
- dev_name, ip_str_start, ip_str_end,
- ntohs(sc->tcp_start), ntohs(sc->tcp_end));
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem adding TCP rule");
- return;
- }
- }
-
- /* If configured, restrict UDP source port ranges. */
- if ((sc->udp_start != 0) && (sc->udp_end != 0)) {
- snprintf(command, sizeof(command), SNAT_ADD_UDP_CMD,
- dev_name, ip_str_start, ip_str_end,
- ntohs(sc->udp_start), ntohs(sc->udp_end));
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem adding UDP rule");
- return;
- }
- }
-
- /* Add a rule that covers all IP traffic that would not be covered
- * by the prior TCP or UDP ranges. */
- snprintf(command, sizeof(command), SNAT_ADD_IP_CMD,
- dev_name, ip_str_start, ip_str_end);
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem adding base rule");
- return;
- }
-}
-
-static void
-snat_del_rules(const uint8_t *dev_name)
-{
- char command[SNAT_CMD_LEN];
-
- /* Flush the chain that does the SNAT. */
- snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name);
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem flushing chain for deletion");
- return;
- }
-
- /* Disassociate the SNAT chain from the POSTROUTING chain. */
- snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name,
- dev_name);
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem unsetting chain");
- return;
- }
-
- /* Now we can finally delete our SNAT chain. */
- snprintf(command, sizeof(command), SNAT_DEL_CHAIN_CMD, dev_name);
- if (system(command) != 0) {
- VLOG_ERR("SNAT: problem deleting chain");
- return;
- }
-}
-
-static void
-snat_config(const struct nx_snat_config *sc, struct snat_data *snat)
-{
- struct snat_port_conf *c, *spc=NULL;
- const uint8_t *netdev_name;
-
- netdev_name = (const uint8_t *) port_watcher_get_name(snat->pw,
- ntohs(sc->port));
- if (!netdev_name) {
- return;
- }
-
- LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) {
- if (c->config.port == sc->port) {
- spc = c;
- break;
- }
- }
-
- if (sc->command == NXSC_ADD) {
- if (!spc) {
- spc = xmalloc(sizeof(*c));
- if (!spc) {
- VLOG_ERR("SNAT: no memory for new entry");
- return;
- }
- list_push_back(&snat->port_list, &spc->node);
- }
- memcpy(&spc->config, sc, sizeof(spc->config));
- snat_add_rules(sc, netdev_name);
- } else if (spc) {
- snat_del_rules(netdev_name);
- list_remove(&spc->node);
- }
-}
-
-static bool
-snat_remote_packet_cb(struct relay *r, void *snat_)
-{
- struct snat_data *snat = snat_;
- struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf;
- struct nicira_header *request = msg->data;
- struct nx_act_config *nac = msg->data;
- int n_configs, i;
-
-
- if (msg->size < sizeof(struct nx_act_config)) {
- return false;
- }
- request = msg->data;
- if (request->header.type != OFPT_VENDOR
- || request->vendor != htonl(NX_VENDOR_ID)
- || request->subtype != htonl(NXT_ACT_SET_CONFIG)) {
- return false;
- }
-
- /* We're only interested in attempts to configure SNAT */
- if (nac->type != htons(NXAST_SNAT)) {
- return false;
- }
-
- n_configs = (msg->size - sizeof *nac) / sizeof *nac->snat;
- for (i=0; i<n_configs; i++) {
- snat_config(&nac->snat[i], snat);
- }
-
- return false;
-}
-
-static void
-snat_port_changed_cb(uint16_t port_no UNUSED,
- const struct ofp_phy_port *old,
- const struct ofp_phy_port *new,
- void *snat_)
-{
- struct snat_data *snat = snat_;
- struct snat_port_conf *c;
-
- /* We're only interested in ports that went away */
- if (old && !new) {
- return;
- }
-
- LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) {
- if (c->config.port == old->port_no) {
- snat_del_rules(old->name);
- list_remove(&c->node);
- return;
- }
- }
-}
-
-static const struct hook_class snat_hook_class = {
- NULL, /* local_packet_cb */
- snat_remote_packet_cb, /* remote_packet_cb */
- NULL, /* periodic_cb */
- NULL, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-snat_start(struct secchan *secchan, struct port_watcher *pw)
-{
- int ret;
- struct snat_data *snat;
-
- ret = system(SNAT_FLUSH_ALL_CMD);
- if (ret != 0) {
- VLOG_ERR("SNAT: problem flushing tables");
- }
-
- snat = xcalloc(1, sizeof *snat);
- snat->pw = pw;
- list_init(&snat->port_list);
-
- port_watcher_register_callback(pw, snat_port_changed_cb, snat);
- add_hook(secchan, &snat_hook_class, snat);
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef SNAT_H
-#define SNAT_H 1
-
-#include "secchan.h"
-
-struct port_watcher;
-struct secchan;
-
-void snat_start(struct secchan *, struct port_watcher *);
-
-#endif /* snat.h */
#include <errno.h>
#include <unistd.h>
#include "dynamic-string.h"
-#include "openflow/nicira-ext.h"
#include "ofpbuf.h"
-#include "openflow/openflow.h"
+#include "openflow/nicira-ext.h"
#include "rconn.h"
+#include "secchan.h"
#include "timeval.h"
#include "vconn.h"
};
struct switch_status {
- const struct settings *s;
time_t booted;
struct switch_status_category *categories;
size_t n_categories, allocated_categories;
struct ds output;
};
-static bool
-switch_status_remote_packet_cb(struct relay *r, void *ss_)
+int
+switch_status_handle_request(struct switch_status *ss, struct rconn *rconn,
+ struct nicira_header *request)
{
- struct switch_status *ss = ss_;
- struct rconn *rc = r->halves[HALF_REMOTE].rconn;
- struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf;
struct switch_status_category *c;
- struct nicira_header *request;
struct nicira_header *reply;
struct status_reply sr;
struct ofpbuf *b;
int retval;
- if (msg->size < sizeof(struct nicira_header)) {
- return false;
- }
- request = msg->data;
- if (request->header.type != OFPT_VENDOR
- || request->vendor != htonl(NX_VENDOR_ID)
- || request->subtype != htonl(NXT_STATUS_REQUEST)) {
- return false;
- }
-
sr.request.string = (void *) (request + 1);
- sr.request.length = msg->size - sizeof *request;
+ sr.request.length = ntohs(request->header.length) - sizeof *request;
ds_init(&sr.output);
for (c = ss->categories; c < &ss->categories[ss->n_categories]; c++) {
if (!memcmp(c->name, sr.request.string,
reply->vendor = htonl(NX_VENDOR_ID);
reply->subtype = htonl(NXT_STATUS_REPLY);
memcpy(reply + 1, sr.output.string, sr.output.length);
- retval = rconn_send(rc, b, NULL);
+ retval = rconn_send(rconn, b, NULL);
if (retval && retval != EAGAIN) {
VLOG_WARN("send failed (%s)", strerror(retval));
}
ds_destroy(&sr.output);
- return true;
+ return 0;
}
void
status_reply_put(sr, "pid=%ld", (long int) getpid());
}
-static const struct hook_class switch_status_hook_class = {
- NULL, /* local_packet_cb */
- switch_status_remote_packet_cb, /* remote_packet_cb */
- NULL, /* periodic_cb */
- NULL, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-switch_status_start(struct secchan *secchan, const struct settings *s,
- struct switch_status **ssp)
+struct switch_status *
+switch_status_create(const struct settings *settings)
{
struct switch_status *ss = xcalloc(1, sizeof *ss);
- ss->s = s;
ss->booted = time_now();
switch_status_register_category(ss, "config",
- config_status_cb, (void *) s);
+ config_status_cb, (void *) settings);
switch_status_register_category(ss, "switch", switch_status_cb, ss);
- *ssp = ss;
- add_hook(secchan, &switch_status_hook_class, ss);
+ return ss;
}
void
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
+/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
* Junior University
*
* We are making the OpenFlow specification and associated documentation
#ifndef STATUS_H
#define STATUS_H 1
-#include "secchan.h"
+#include "compiler.h"
+struct nicira_header;
+struct rconn;
struct secchan;
+struct settings;
struct status_reply;
-struct switch_status;
-void switch_status_start(struct secchan *, const struct settings *,
- struct switch_status **);
+struct switch_status *switch_status_create(const struct settings *);
+
+int switch_status_handle_request(struct switch_status *, struct rconn *,
+ struct nicira_header *);
+
void switch_status_register_category(struct switch_status *,
const char *category,
void (*cb)(struct status_reply *,
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "stp-secchan.h"
-#include <arpa/inet.h>
-#include <inttypes.h>
-#include "flow.h"
-#include "secchan.h"
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "poll-loop.h"
-#include "port-watcher.h"
-#include "rconn.h"
-#include "stp.h"
-#include "timeval.h"
-#include "vconn.h"
-
-#define THIS_MODULE VLM_stp_secchan
-#include "vlog.h"
-
-struct stp_data {
- struct stp *stp;
- struct port_watcher *pw;
- struct rconn *local_rconn;
- struct rconn *remote_rconn;
- long long int last_tick;
- int n_txq;
-};
-
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
-
-static bool
-stp_local_packet_cb(struct relay *r, void *stp_)
-{
- struct ofpbuf *msg = r->halves[HALF_LOCAL].rxbuf;
- struct ofp_header *oh;
- struct stp_data *stp = stp_;
- struct ofp_packet_in *opi;
- struct eth_header *eth;
- struct llc_header *llc;
- struct ofpbuf payload;
- uint16_t port_no;
- struct flow flow;
-
- oh = msg->data;
- if (oh->type == OFPT_FEATURES_REPLY
- && msg->size >= offsetof(struct ofp_switch_features, ports)) {
- struct ofp_switch_features *osf = msg->data;
- osf->capabilities |= htonl(OFPC_STP);
- return false;
- }
-
- if (!get_ofp_packet_eth_header(r, &opi, ð)
- || !eth_addr_equals(eth->eth_dst, stp_eth_addr)) {
- return false;
- }
-
- port_no = ntohs(opi->in_port);
- if (port_no >= STP_MAX_PORTS) {
- /* STP only supports 255 ports. */
- return false;
- }
- if (port_watcher_get_config(stp->pw, port_no) & OFPPC_NO_STP) {
- /* We're not doing STP on this port. */
- return false;
- }
-
- if (opi->reason == OFPR_ACTION) {
- /* The controller set up a flow for this, so we won't intercept it. */
- return false;
- }
-
- get_ofp_packet_payload(opi, &payload);
- flow_extract(&payload, port_no, &flow);
- if (flow.dl_type != htons(OFP_DL_TYPE_NOT_ETH_TYPE)) {
- VLOG_DBG("non-LLC frame received on STP multicast address");
- return false;
- }
- llc = ofpbuf_at_assert(&payload, sizeof *eth, sizeof *llc);
- if (llc->llc_dsap != STP_LLC_DSAP) {
- VLOG_DBG("bad DSAP 0x%02"PRIx8" received on STP multicast address",
- llc->llc_dsap);
- return false;
- }
-
- /* Trim off padding on payload. */
- if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
- payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN;
- }
- if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
- struct stp_port *p = stp_get_port(stp->stp, port_no);
- stp_received_bpdu(p, payload.data, payload.size);
- }
-
- return true;
-}
-
-static void
-stp_periodic_cb(void *stp_)
-{
- struct stp_data *stp = stp_;
- long long int now = time_msec();
- long long int elapsed = now - stp->last_tick;
- struct stp_port *p;
-
- if (!port_watcher_is_ready(stp->pw)) {
- /* Can't start STP until we know port flags, because port flags can
- * disable STP. */
- return;
- }
- if (elapsed <= 0) {
- return;
- }
-
- stp_tick(stp->stp, MIN(INT_MAX, elapsed));
- stp->last_tick = now;
-
- while (stp_get_changed_port(stp->stp, &p)) {
- int port_no = stp_port_no(p);
- enum stp_state s_state = stp_port_get_state(p);
-
- if (s_state != STP_DISABLED) {
- VLOG_INFO("STP: Port %d entered %s state",
- port_no, stp_state_name(s_state));
- }
- if (!(port_watcher_get_config(stp->pw, port_no) & OFPPC_NO_STP)) {
- uint32_t p_config = 0;
- uint32_t p_state;
- switch (s_state) {
- case STP_LISTENING:
- p_state = OFPPS_STP_LISTEN;
- break;
- case STP_LEARNING:
- p_state = OFPPS_STP_LEARN;
- break;
- case STP_DISABLED:
- case STP_FORWARDING:
- p_state = OFPPS_STP_FORWARD;
- break;
- case STP_BLOCKING:
- p_state = OFPPS_STP_BLOCK;
- break;
- default:
- VLOG_DBG_RL(&rl, "STP: Port %d has bad state %x",
- port_no, s_state);
- p_state = OFPPS_STP_FORWARD;
- break;
- }
- if (!stp_forward_in_state(s_state)) {
- p_config = OFPPC_NO_FLOOD;
- }
- port_watcher_set_flags(stp->pw, port_no,
- p_config, OFPPC_NO_FLOOD,
- p_state, OFPPS_STP_MASK);
- } else {
- /* We don't own those flags. */
- }
- }
-}
-
-static void
-stp_wait_cb(void *stp_ UNUSED)
-{
- poll_timer_wait(1000);
-}
-
-static void
-send_bpdu(struct ofpbuf *pkt, int port_no, void *stp_)
-{
- struct stp_data *stp = stp_;
- const uint8_t *port_mac = port_watcher_get_hwaddr(stp->pw, port_no);
- if (port_mac) {
- struct eth_header *eth = pkt->l2;
- struct ofpbuf *opo;
-
- memcpy(eth->eth_src, port_mac, ETH_ADDR_LEN);
- opo = make_unbuffered_packet_out(pkt, OFPP_NONE, port_no);
-
- rconn_send_with_limit(stp->local_rconn, opo, &stp->n_txq, OFPP_MAX);
- } else {
- VLOG_WARN_RL(&rl, "cannot send BPDU on missing port %d", port_no);
- }
- ofpbuf_delete(pkt);
-}
-
-static bool
-stp_is_port_supported(uint16_t port_no)
-{
- return port_no < STP_MAX_PORTS;
-}
-
-static void
-stp_port_changed_cb(uint16_t port_no,
- const struct ofp_phy_port *old UNUSED,
- const struct ofp_phy_port *new,
- void *stp_)
-{
- struct stp_data *stp = stp_;
- struct stp_port *p;
-
- if (!stp_is_port_supported(port_no)) {
- return;
- }
-
- p = stp_get_port(stp->stp, port_no);
- if (!new
- || new->config & htonl(OFPPC_NO_STP | OFPPC_PORT_DOWN)
- || new->state & htonl(OFPPS_LINK_DOWN)) {
- stp_port_disable(p);
- } else {
- int speed = 0;
- stp_port_enable(p);
- if (new->curr & (OFPPF_10MB_HD | OFPPF_10MB_FD)) {
- speed = 10;
- } else if (new->curr & (OFPPF_100MB_HD | OFPPF_100MB_FD)) {
- speed = 100;
- } else if (new->curr & (OFPPF_1GB_HD | OFPPF_1GB_FD)) {
- speed = 1000;
- } else if (new->curr & OFPPF_10GB_FD) {
- speed = 10000;
- }
- stp_port_set_speed(p, speed);
- }
-}
-
-static void
-stp_local_port_changed_cb(const struct ofp_phy_port *port, void *stp_)
-{
- struct stp_data *stp = stp_;
- if (port) {
- stp_set_bridge_id(stp->stp, eth_addr_to_uint64(port->hw_addr));
- }
-}
-
-static const struct hook_class stp_hook_class = {
- stp_local_packet_cb, /* local_packet_cb */
- NULL, /* remote_packet_cb */
- stp_periodic_cb, /* periodic_cb */
- stp_wait_cb, /* wait_cb */
- NULL, /* closing_cb */
- NULL, /* reconfigure_cb */
-};
-
-void
-stp_start(struct secchan *secchan, struct port_watcher *pw,
- struct rconn *local, struct rconn *remote)
-{
- uint8_t dpid[ETH_ADDR_LEN];
- struct stp_data *stp;
-
- stp = xcalloc(1, sizeof *stp);
- eth_addr_random(dpid);
- stp->stp = stp_create("stp", eth_addr_to_uint64(dpid), send_bpdu, stp);
- stp->pw = pw;
- stp->local_rconn = local;
- stp->remote_rconn = remote;
- stp->last_tick = time_msec();
-
- port_watcher_register_callback(pw, stp_port_changed_cb, stp);
- port_watcher_register_local_port_callback(pw, stp_local_port_changed_cb,
- stp);
- add_hook(secchan, &stp_hook_class, stp);
-}
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef STP_SECCHAN_H
-#define STP_SECCHAN_H 1
-
-/* Extra time, in seconds, at boot before going into fail-open, to give the
- * spanning tree protocol time to figure out the network layout. */
-#define STP_EXTRA_BOOT_TIME 30
-
-struct port_watcher;
-struct rconn;
-struct secchan;
-
-void stp_start(struct secchan *, struct port_watcher *,
- struct rconn *local, struct rconn *remote);
-
-#endif /* stp-secchan.h */
}
static bool
-match(const struct cls_rule *wild, const struct flow *fixed)
+match(const struct cls_rule *wild, const flow_t *fixed)
{
int f_idx;
}
static struct cls_rule *
-tcls_lookup(const struct tcls *cls, const struct flow *flow)
+tcls_lookup(const struct tcls *cls, const flow_t *flow)
{
size_t i;
for (i = 0; i < N_FLOW_VALUES; i++) {
struct cls_rule *cr0, *cr1;
- struct flow flow;
+ flow_t flow;
unsigned int x;
x = i;
const struct cls_field *f;
struct test_rule *rule;
uint32_t wildcards;
- struct flow flow;
+ flow_t flow;
wildcards = 0;
memset(&flow, 0, sizeof flow);
while (fread(&expected_match, sizeof expected_match, 1, flows)) {
struct ofpbuf *packet;
struct ofp_match extracted_match;
- struct flow flow;
+ flow_t flow;
n++;
+++ /dev/null
-/Makefile
-/Makefile.in
-/udatapath
-/udatapath.8
+++ /dev/null
-bin_PROGRAMS += udatapath/udatapath
-man_MANS += udatapath/udatapath.8
-
-udatapath_udatapath_SOURCES = \
- udatapath/chain.c \
- udatapath/chain.h \
- udatapath/crc32.c \
- udatapath/crc32.h \
- udatapath/datapath.c \
- udatapath/datapath.h \
- udatapath/dp_act.c \
- udatapath/dp_act.h \
- udatapath/nx_act.c \
- udatapath/nx_act.h \
- udatapath/nx_msg.c \
- udatapath/nx_msg.h \
- udatapath/udatapath.c \
- udatapath/switch-flow.c \
- udatapath/switch-flow.h \
- udatapath/table.h \
- udatapath/table-hash.c \
- udatapath/table-linear.c
-
-udatapath_udatapath_LDADD = lib/libopenflow.a $(SSL_LIBS) $(FAULT_LIBS)
-
-EXTRA_DIST += udatapath/udatapath.8.in
-DISTCLEANFILES += udatapath/udatapath.8
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "chain.h"
-#include <assert.h>
-#include <errno.h>
-#include <stdlib.h>
-#include "switch-flow.h"
-#include "table.h"
-
-#define THIS_MODULE VLM_chain
-#include "vlog.h"
-
-/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or
- * negative error. If 'table' is null it is assumed that table creation failed
- * due to out-of-memory. */
-static int add_table(struct sw_chain *chain, struct sw_table *table)
-{
- if (table == NULL)
- return -ENOMEM;
- if (chain->n_tables >= CHAIN_MAX_TABLES) {
- VLOG_ERR("too many tables in chain\n");
- table->destroy(table);
- return -ENOBUFS;
- }
- chain->tables[chain->n_tables++] = table;
- return 0;
-}
-
-/* Creates and returns a new chain. Returns NULL if the chain cannot be
- * created. */
-struct sw_chain *chain_create(struct datapath *dp)
-{
- struct sw_chain *chain = calloc(1, sizeof *chain);
- if (chain == NULL)
- return NULL;
-
- chain->dp = dp;
- if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS,
- 0x741B8CD7, TABLE_HASH_MAX_FLOWS))
- || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) {
- chain_destroy(chain);
- return NULL;
- }
-
- return chain;
-}
-
-/* Searches 'chain' for a flow matching 'key', which must not have any wildcard
- * fields. Returns the flow if successful, otherwise a null pointer. */
-struct sw_flow *
-chain_lookup(struct sw_chain *chain, const struct sw_flow_key *key)
-{
- int i;
-
- assert(!key->wildcards);
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- struct sw_flow *flow = t->lookup(t, key);
- t->n_lookup++;
- if (flow) {
- t->n_matched++;
- return flow;
- }
- }
- return NULL;
-}
-
-/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if
- * successful or a negative error.
- *
- * If successful, 'flow' becomes owned by the chain, otherwise it is retained
- * by the caller. */
-int
-chain_insert(struct sw_chain *chain, struct sw_flow *flow)
-{
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- if (t->insert(t, flow))
- return 0;
- }
-
- return -ENOBUFS;
-}
-
-/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards
- * and priority must match. Returns the number of flows that were modified.
- *
- * Expensive in the general case as currently implemented, since it requires
- * iterating through the entire contents of each table for keys that contain
- * wildcards. Relatively cheap for fully specified keys. */
-int
-chain_modify(struct sw_chain *chain, const struct sw_flow_key *key,
- uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- count += t->modify(t, key, priority, strict, actions, actions_len);
- }
-
- return count;
-}
-
-/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port'
- * is not OFPP_NONE, then matching entries must have that port as an
- * argument for an output action. If 'strict" is set, then wildcards and
- * priority must match. Returns the number of flows that were deleted.
- *
- * Expensive in the general case as currently implemented, since it requires
- * iterating through the entire contents of each table for keys that contain
- * wildcards. Relatively cheap for fully specified keys. */
-int
-chain_delete(struct sw_chain *chain, const struct sw_flow_key *key,
- uint16_t out_port, uint16_t priority, int strict)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- count += t->delete(chain->dp, t, key, out_port, priority, strict);
- }
-
- return count;
-
-}
-
-/* Deletes timed-out flow entries from all the tables in 'chain' and appends
- * the deleted flows to 'deleted'.
- *
- * Expensive as currently implemented, since it iterates through the entire
- * contents of each table. */
-void
-chain_timeout(struct sw_chain *chain, struct list *deleted)
-{
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- t->timeout(t, deleted);
- }
-}
-
-/* Destroys 'chain', which must not have any users. */
-void
-chain_destroy(struct sw_chain *chain)
-{
- int i;
-
- for (i = 0; i < chain->n_tables; i++) {
- struct sw_table *t = chain->tables[i];
- t->destroy(t);
- }
- free(chain);
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef CHAIN_H
-#define CHAIN_H 1
-
-#include <stddef.h>
-#include <stdint.h>
-
-struct sw_flow;
-struct sw_flow_key;
-struct ofp_action_header;
-struct list;
-struct datapath;
-
-#define TABLE_LINEAR_MAX_FLOWS 100
-#define TABLE_HASH_MAX_FLOWS 65536
-#define TABLE_MAC_MAX_FLOWS 1024
-#define TABLE_MAC_NUM_BUCKETS 1024
-
-/* Set of tables chained together in sequence from cheap to expensive. */
-#define CHAIN_MAX_TABLES 4
-struct sw_chain {
- int n_tables;
- struct sw_table *tables[CHAIN_MAX_TABLES];
-
- struct datapath *dp;
-};
-
-struct sw_chain *chain_create(struct datapath *);
-struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *);
-int chain_insert(struct sw_chain *, struct sw_flow *);
-int chain_modify(struct sw_chain *, const struct sw_flow_key *,
- uint16_t, int, const struct ofp_action_header *, size_t);
-int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t,
- uint16_t, int);
-void chain_timeout(struct sw_chain *, struct list *deleted);
-void chain_destroy(struct sw_chain *);
-
-#endif /* chain.h */
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "crc32.h"
-
-void
-crc32_init(struct crc32 *crc, unsigned int polynomial)
-{
- int i;
-
- for (i = 0; i < CRC32_TABLE_SIZE; ++i) {
- unsigned int reg = i << 24;
- int j;
- for (j = 0; j < CRC32_TABLE_BITS; j++) {
- int topBit = (reg & 0x80000000) != 0;
- reg <<= 1;
- if (topBit)
- reg ^= polynomial;
- }
- crc->table[i] = reg;
- }
-}
-
-unsigned int
-crc32_calculate(const struct crc32 *crc, const void *data_, size_t n_bytes)
-{
- const uint8_t *data = data_;
- unsigned int result = 0;
- size_t i;
-
- for (i = 0; i < n_bytes; i++) {
- unsigned int top = result >> 24;
- top ^= data[i];
- result = (result << 8) ^ crc->table[top];
- }
- return result;
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef CRC32_H
-#define CRC32_H 1
-
-#include <stdint.h>
-#include <stddef.h>
-
-#define CRC32_TABLE_BITS 8
-#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS)
-
-struct crc32 {
- unsigned int table[CRC32_TABLE_SIZE];
-};
-
-void crc32_init(struct crc32 *, unsigned int polynomial);
-unsigned int crc32_calculate(const struct crc32 *, const void *, size_t);
-
-#endif /* crc32.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include "datapath.h"
-#include <arpa/inet.h>
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <string.h>
-#include "chain.h"
-#include "csum.h"
-#include "flow.h"
-#include "netdev.h"
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "openflow/nicira-ext.h"
-#include "packets.h"
-#include "poll-loop.h"
-#include "rconn.h"
-#include "stp.h"
-#include "switch-flow.h"
-#include "table.h"
-#include "vconn.h"
-#include "xtoxll.h"
-#include "nx_msg.h"
-#include "dp_act.h"
-
-#define THIS_MODULE VLM_datapath
-#include "vlog.h"
-
-extern char mfr_desc;
-extern char hw_desc;
-extern char sw_desc;
-extern char serial_num;
-
-/* Capabilities supported by this implementation. */
-#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \
- | OFPC_TABLE_STATS \
- | OFPC_PORT_STATS \
- | OFPC_MULTI_PHY_TX )
-
-/* Actions supported by this implementation. */
-#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \
- | (1 << OFPAT_SET_VLAN_VID) \
- | (1 << OFPAT_SET_VLAN_PCP) \
- | (1 << OFPAT_STRIP_VLAN) \
- | (1 << OFPAT_SET_DL_SRC) \
- | (1 << OFPAT_SET_DL_DST) \
- | (1 << OFPAT_SET_NW_SRC) \
- | (1 << OFPAT_SET_NW_DST) \
- | (1 << OFPAT_SET_TP_SRC) \
- | (1 << OFPAT_SET_TP_DST) )
-
-/* The origin of a received OpenFlow message, to enable sending a reply. */
-struct sender {
- struct remote *remote; /* The device that sent the message. */
- uint32_t xid; /* The OpenFlow transaction ID. */
-};
-
-/* A connection to a secure channel. */
-struct remote {
- struct list node;
- struct rconn *rconn;
-#define TXQ_LIMIT 128 /* Max number of packets to queue for tx. */
- int n_txq; /* Number of packets queued for tx on rconn. */
-
- /* Support for reliable, multi-message replies to requests.
- *
- * If an incoming request needs to have a reliable reply that might
- * require multiple messages, it can use remote_start_dump() to set up
- * a callback that will be called as buffer space for replies. */
- int (*cb_dump)(struct datapath *, void *aux);
- void (*cb_done)(void *aux);
- void *cb_aux;
-};
-
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
-
-static struct remote *remote_create(struct datapath *, struct rconn *);
-static void remote_run(struct datapath *, struct remote *);
-static void remote_wait(struct remote *);
-static void remote_destroy(struct remote *);
-
-static void update_port_flags(struct datapath *, const struct ofp_port_mod *);
-static void send_port_status(struct sw_port *p, uint8_t status);
-
-/* Buffers are identified by a 31-bit opaque ID. We divide the ID
- * into a buffer number (low bits) and a cookie (high bits). The buffer number
- * is an index into an array of buffers. The cookie distinguishes between
- * different packets that have occupied a single buffer. Thus, the more
- * buffers we have, the lower-quality the cookie... */
-#define PKT_BUFFER_BITS 8
-#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS)
-#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1)
-
-#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS)
-
-int run_flow_through_tables(struct datapath *, struct ofpbuf *,
- struct sw_port *);
-void fwd_port_input(struct datapath *, struct ofpbuf *, struct sw_port *);
-int fwd_control_input(struct datapath *, const struct sender *,
- const void *, size_t);
-
-uint32_t save_buffer(struct ofpbuf *);
-static struct ofpbuf *retrieve_buffer(uint32_t id);
-static void discard_buffer(uint32_t id);
-
-static struct sw_port *
-lookup_port(struct datapath *dp, uint16_t port_no)
-{
- return (port_no < DP_MAX_PORTS ? &dp->ports[port_no]
- : port_no == OFPP_LOCAL ? dp->local_port
- : NULL);
-}
-
-/* Generates and returns a random datapath id. */
-static uint64_t
-gen_datapath_id(void)
-{
- uint8_t ea[ETH_ADDR_LEN];
- eth_addr_random(ea);
- ea[0] = 0x00; /* Set Nicira OUI. */
- ea[1] = 0x23;
- ea[2] = 0x20;
- return eth_addr_to_uint64(ea);
-}
-
-int
-dp_new(struct datapath **dp_, uint64_t dpid)
-{
- struct datapath *dp;
-
- dp = calloc(1, sizeof *dp);
- if (!dp) {
- return ENOMEM;
- }
-
- dp->last_timeout = time_now();
- list_init(&dp->remotes);
- dp->listeners = NULL;
- dp->n_listeners = 0;
- dp->id = dpid <= UINT64_C(0xffffffffffff) ? dpid : gen_datapath_id();
- dp->chain = chain_create(dp);
- if (!dp->chain) {
- VLOG_ERR("could not create chain");
- free(dp);
- return ENOMEM;
- }
-
- list_init(&dp->port_list);
- dp->flags = 0;
- dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
- *dp_ = dp;
- return 0;
-}
-
-static int
-new_port(struct datapath *dp, struct sw_port *port, uint16_t port_no,
- const char *netdev_name, const uint8_t *new_mac)
-{
- struct netdev *netdev;
- struct in6_addr in6;
- struct in_addr in4;
- int error;
-
- error = netdev_open(netdev_name, NETDEV_ETH_TYPE_ANY, &netdev);
- if (error) {
- return error;
- }
- if (new_mac && !eth_addr_equals(netdev_get_etheraddr(netdev), new_mac)) {
- /* Generally the device has to be down before we change its hardware
- * address. Don't bother to check for an error because it's really
- * the netdev_set_etheraddr() call below that we care about. */
- netdev_set_flags(netdev, 0, false);
- error = netdev_set_etheraddr(netdev, new_mac);
- if (error) {
- VLOG_WARN("failed to change %s Ethernet address "
- "to "ETH_ADDR_FMT": %s",
- netdev_name, ETH_ADDR_ARGS(new_mac), strerror(error));
- }
- }
- error = netdev_set_flags(netdev, NETDEV_UP | NETDEV_PROMISC, false);
- if (error) {
- VLOG_ERR("failed to set promiscuous mode on %s device", netdev_name);
- netdev_close(netdev);
- return error;
- }
- if (netdev_get_in4(netdev, &in4)) {
- VLOG_ERR("%s device has assigned IP address %s",
- netdev_name, inet_ntoa(in4));
- }
- if (netdev_get_in6(netdev, &in6)) {
- char in6_name[INET6_ADDRSTRLEN + 1];
- inet_ntop(AF_INET6, &in6, in6_name, sizeof in6_name);
- VLOG_ERR("%s device has assigned IPv6 address %s",
- netdev_name, in6_name);
- }
-
- memset(port, '\0', sizeof *port);
-
- port->dp = dp;
- port->netdev = netdev;
- port->port_no = port_no;
- list_push_back(&dp->port_list, &port->node);
-
- /* Notify the ctlpath that this port has been added */
- send_port_status(port, OFPPR_ADD);
-
- return 0;
-}
-
-int
-dp_add_port(struct datapath *dp, const char *netdev)
-{
- int port_no;
- for (port_no = 0; port_no < DP_MAX_PORTS; port_no++) {
- struct sw_port *port = &dp->ports[port_no];
- if (!port->netdev) {
- return new_port(dp, port, port_no, netdev, NULL);
- }
- }
- return EXFULL;
-}
-
-int
-dp_add_local_port(struct datapath *dp, const char *netdev)
-{
- if (!dp->local_port) {
- uint8_t ea[ETH_ADDR_LEN];
- struct sw_port *port;
- int error;
-
- port = xcalloc(1, sizeof *port);
- eth_addr_from_uint64(dp->id, ea);
- error = new_port(dp, port, OFPP_LOCAL, netdev, ea);
- if (!error) {
- dp->local_port = port;
- } else {
- free(port);
- }
- return error;
- } else {
- return EXFULL;
- }
-}
-
-void
-dp_add_pvconn(struct datapath *dp, struct pvconn *pvconn)
-{
- dp->listeners = xrealloc(dp->listeners,
- sizeof *dp->listeners * (dp->n_listeners + 1));
- dp->listeners[dp->n_listeners++] = pvconn;
-}
-
-void
-dp_run(struct datapath *dp)
-{
- time_t now = time_now();
- struct sw_port *p, *pn;
- struct remote *r, *rn;
- struct ofpbuf *buffer = NULL;
- size_t i;
-
- if (now != dp->last_timeout) {
- struct list deleted = LIST_INITIALIZER(&deleted);
- struct sw_flow *f, *n;
-
- chain_timeout(dp->chain, &deleted);
- LIST_FOR_EACH_SAFE (f, n, struct sw_flow, node, &deleted) {
- dp_send_flow_end(dp, f, f->reason);
- list_remove(&f->node);
- flow_free(f);
- }
- dp->last_timeout = now;
- }
- poll_timer_wait(1000);
-
- LIST_FOR_EACH_SAFE (p, pn, struct sw_port, node, &dp->port_list) {
- int error;
-
- if (!buffer) {
- /* Allocate buffer with some headroom to add headers in forwarding
- * to the controller or adding a vlan tag, plus an extra 2 bytes to
- * allow IP headers to be aligned on a 4-byte boundary. */
- const int headroom = 128 + 2;
- const int hard_header = VLAN_ETH_HEADER_LEN;
- const int mtu = netdev_get_mtu(p->netdev);
- buffer = ofpbuf_new(headroom + hard_header + mtu);
- buffer->data = (char*)buffer->data + headroom;
- }
- error = netdev_recv(p->netdev, buffer);
- if (!error) {
- p->rx_packets++;
- p->rx_bytes += buffer->size;
- fwd_port_input(dp, buffer, p);
- buffer = NULL;
- } else if (error != EAGAIN) {
- VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
- netdev_get_name(p->netdev), strerror(error));
- }
- }
- ofpbuf_delete(buffer);
-
- /* Talk to remotes. */
- LIST_FOR_EACH_SAFE (r, rn, struct remote, node, &dp->remotes) {
- remote_run(dp, r);
- }
-
- for (i = 0; i < dp->n_listeners; ) {
- struct pvconn *pvconn = dp->listeners[i];
- struct vconn *new_vconn;
- int retval = pvconn_accept(pvconn, OFP_VERSION, &new_vconn);
- if (!retval) {
- remote_create(dp, rconn_new_from_vconn("passive", new_vconn));
- } else if (retval != EAGAIN) {
- VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval));
- dp->listeners[i] = dp->listeners[--dp->n_listeners];
- continue;
- }
- i++;
- }
-}
-
-static void
-remote_run(struct datapath *dp, struct remote *r)
-{
- int i;
-
- rconn_run(r->rconn);
-
- /* Do some remote processing, but cap it at a reasonable amount so that
- * other processing doesn't starve. */
- for (i = 0; i < 50; i++) {
- if (!r->cb_dump) {
- struct ofpbuf *buffer;
- struct ofp_header *oh;
-
- buffer = rconn_recv(r->rconn);
- if (!buffer) {
- break;
- }
-
- if (buffer->size >= sizeof *oh) {
- struct sender sender;
-
- oh = buffer->data;
- sender.remote = r;
- sender.xid = oh->xid;
- fwd_control_input(dp, &sender, buffer->data, buffer->size);
- } else {
- VLOG_WARN_RL(&rl, "received too-short OpenFlow message");
- }
- ofpbuf_delete(buffer);
- } else {
- if (r->n_txq < TXQ_LIMIT) {
- int error = r->cb_dump(dp, r->cb_aux);
- if (error <= 0) {
- if (error) {
- VLOG_WARN_RL(&rl, "dump callback error: %s",
- strerror(-error));
- }
- r->cb_done(r->cb_aux);
- r->cb_dump = NULL;
- }
- } else {
- break;
- }
- }
- }
-
- if (!rconn_is_alive(r->rconn)) {
- remote_destroy(r);
- }
-}
-
-static void
-remote_wait(struct remote *r)
-{
- rconn_run_wait(r->rconn);
- rconn_recv_wait(r->rconn);
-}
-
-static void
-remote_destroy(struct remote *r)
-{
- if (r) {
- if (r->cb_dump && r->cb_done) {
- r->cb_done(r->cb_aux);
- }
- list_remove(&r->node);
- rconn_destroy(r->rconn);
- free(r);
- }
-}
-
-static struct remote *
-remote_create(struct datapath *dp, struct rconn *rconn)
-{
- struct remote *remote = xmalloc(sizeof *remote);
- list_push_back(&dp->remotes, &remote->node);
- remote->rconn = rconn;
- remote->cb_dump = NULL;
- remote->n_txq = 0;
- return remote;
-}
-
-/* Starts a callback-based, reliable, possibly multi-message reply to a
- * request made by 'remote'.
- *
- * 'dump' designates a function that will be called when the 'remote' send
- * queue has an empty slot. It should compose a message and send it on
- * 'remote'. On success, it should return 1 if it should be called again when
- * another send queue slot opens up, 0 if its transmissions are complete, or a
- * negative errno value on failure.
- *
- * 'done' designates a function to clean up any resources allocated for the
- * dump. It must handle being called before the dump is complete (which will
- * happen if 'remote' is closed unexpectedly).
- *
- * 'aux' is passed to 'dump' and 'done'. */
-static void
-remote_start_dump(struct remote *remote,
- int (*dump)(struct datapath *, void *),
- void (*done)(void *),
- void *aux)
-{
- assert(!remote->cb_dump);
- remote->cb_dump = dump;
- remote->cb_done = done;
- remote->cb_aux = aux;
-}
-
-void
-dp_wait(struct datapath *dp)
-{
- struct sw_port *p;
- struct remote *r;
- size_t i;
-
- LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) {
- netdev_recv_wait(p->netdev);
- }
- LIST_FOR_EACH (r, struct remote, node, &dp->remotes) {
- remote_wait(r);
- }
- for (i = 0; i < dp->n_listeners; i++) {
- pvconn_wait(dp->listeners[i]);
- }
-}
-
-/* Send packets out all the ports except the originating one. If the
- * "flood" argument is set, don't send out ports with flooding disabled.
- */
-static int
-output_all(struct datapath *dp, struct ofpbuf *buffer, int in_port, int flood)
-{
- struct sw_port *p;
- int prev_port;
-
- prev_port = -1;
- LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) {
- if (p->port_no == in_port) {
- continue;
- }
- if (flood && p->config & OFPPC_NO_FLOOD) {
- continue;
- }
- if (prev_port != -1) {
- dp_output_port(dp, ofpbuf_clone(buffer), in_port, prev_port,
- false);
- }
- prev_port = p->port_no;
- }
- if (prev_port != -1)
- dp_output_port(dp, buffer, in_port, prev_port, false);
- else
- ofpbuf_delete(buffer);
-
- return 0;
-}
-
-static void
-output_packet(struct datapath *dp, struct ofpbuf *buffer, uint16_t out_port)
-{
- struct sw_port *p = lookup_port(dp, out_port);
- if (p && p->netdev != NULL) {
- if (!(p->config & OFPPC_PORT_DOWN)) {
- if (!netdev_send(p->netdev, buffer)) {
- p->tx_packets++;
- p->tx_bytes += buffer->size;
- } else {
- p->tx_dropped++;
- }
- }
- ofpbuf_delete(buffer);
- return;
- }
-
- ofpbuf_delete(buffer);
- VLOG_DBG_RL(&rl, "can't forward to bad port %d\n", out_port);
-}
-
-/* Takes ownership of 'buffer' and transmits it to 'out_port' on 'dp'.
- */
-void
-dp_output_port(struct datapath *dp, struct ofpbuf *buffer,
- int in_port, int out_port, bool ignore_no_fwd)
-{
-
- assert(buffer);
- switch (out_port) {
- case OFPP_IN_PORT:
- output_packet(dp, buffer, in_port);
- break;
-
- case OFPP_TABLE: {
- struct sw_port *p = lookup_port(dp, in_port);
- if (run_flow_through_tables(dp, buffer, p)) {
- ofpbuf_delete(buffer);
- }
- break;
- }
-
- case OFPP_FLOOD:
- output_all(dp, buffer, in_port, 1);
- break;
-
- case OFPP_ALL:
- output_all(dp, buffer, in_port, 0);
- break;
-
- case OFPP_CONTROLLER:
- dp_output_control(dp, buffer, in_port, 0, OFPR_ACTION);
- break;
-
- case OFPP_LOCAL:
- default:
- if (in_port == out_port) {
- VLOG_DBG_RL(&rl, "can't directly forward to input port");
- return;
- }
- output_packet(dp, buffer, out_port);
- break;
- }
-}
-
-static void *
-make_openflow_reply(size_t openflow_len, uint8_t type,
- const struct sender *sender, struct ofpbuf **bufferp)
-{
- return make_openflow_xid(openflow_len, type, sender ? sender->xid : 0,
- bufferp);
-}
-
-static int
-send_openflow_buffer_to_remote(struct ofpbuf *buffer, struct remote *remote)
-{
- int retval = rconn_send_with_limit(remote->rconn, buffer, &remote->n_txq,
- TXQ_LIMIT);
- if (retval) {
- VLOG_WARN_RL(&rl, "send to %s failed: %s",
- rconn_get_name(remote->rconn), strerror(retval));
- }
- return retval;
-}
-
-static int
-send_openflow_buffer(struct datapath *dp, struct ofpbuf *buffer,
- const struct sender *sender)
-{
- update_openflow_length(buffer);
- if (sender) {
- /* Send back to the sender. */
- return send_openflow_buffer_to_remote(buffer, sender->remote);
- } else {
- /* Broadcast to all remotes. */
- struct remote *r, *prev = NULL;
- LIST_FOR_EACH (r, struct remote, node, &dp->remotes) {
- if (prev) {
- send_openflow_buffer_to_remote(ofpbuf_clone(buffer), prev);
- }
- prev = r;
- }
- if (prev) {
- send_openflow_buffer_to_remote(buffer, prev);
- } else {
- ofpbuf_delete(buffer);
- }
- return 0;
- }
-}
-
-/* Takes ownership of 'buffer' and transmits it to 'dp''s controller. If the
- * packet can be saved in a buffer, then only the first max_len bytes of
- * 'buffer' are sent; otherwise, all of 'buffer' is sent. 'reason' indicates
- * why 'buffer' is being sent. 'max_len' sets the maximum number of bytes that
- * the caller wants to be sent; a value of 0 indicates the entire packet should
- * be sent. */
-void
-dp_output_control(struct datapath *dp, struct ofpbuf *buffer, int in_port,
- size_t max_len, int reason)
-{
- struct ofp_packet_in *opi;
- size_t total_len;
- uint32_t buffer_id;
-
- buffer_id = save_buffer(buffer);
- total_len = buffer->size;
- if (buffer_id != UINT32_MAX && max_len && buffer->size > max_len) {
- buffer->size = max_len;
- }
-
- opi = ofpbuf_push_uninit(buffer, offsetof(struct ofp_packet_in, data));
- opi->header.version = OFP_VERSION;
- opi->header.type = OFPT_PACKET_IN;
- opi->header.length = htons(buffer->size);
- opi->header.xid = htonl(0);
- opi->buffer_id = htonl(buffer_id);
- opi->total_len = htons(total_len);
- opi->in_port = htons(in_port);
- opi->reason = reason;
- opi->pad = 0;
- send_openflow_buffer(dp, buffer, NULL);
-}
-
-static void
-fill_port_desc(struct sw_port *p, struct ofp_phy_port *desc)
-{
- uint32_t curr, advertised, supported, peer;
-
- desc->port_no = htons(p->port_no);
- strncpy((char *) desc->name, netdev_get_name(p->netdev),
- sizeof desc->name);
- desc->name[sizeof desc->name - 1] = '\0';
- memcpy(desc->hw_addr, netdev_get_etheraddr(p->netdev), ETH_ADDR_LEN);
- desc->config = htonl(p->config);
- desc->state = htonl(p->state);
- netdev_get_features(p->netdev, &curr, &advertised, &supported, &peer);
- desc->curr = htonl(curr);
- desc->supported = htonl(supported);
- desc->advertised = htonl(advertised);
- desc->peer = htonl(peer);
-}
-
-static void
-dp_send_features_reply(struct datapath *dp, const struct sender *sender)
-{
- struct ofpbuf *buffer;
- struct ofp_switch_features *ofr;
- struct sw_port *p;
-
- ofr = make_openflow_reply(sizeof *ofr, OFPT_FEATURES_REPLY,
- sender, &buffer);
- ofr->datapath_id = htonll(dp->id);
- ofr->n_tables = dp->chain->n_tables;
- ofr->n_buffers = htonl(N_PKT_BUFFERS);
- ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES);
- ofr->actions = htonl(OFP_SUPPORTED_ACTIONS);
- LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) {
- struct ofp_phy_port *opp = ofpbuf_put_uninit(buffer, sizeof *opp);
- memset(opp, 0, sizeof *opp);
- fill_port_desc(p, opp);
- }
- send_openflow_buffer(dp, buffer, sender);
-}
-
-void
-update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
-{
- struct sw_port *p = lookup_port(dp, ntohs(opm->port_no));
-
- /* Make sure the port id hasn't changed since this was sent */
- if (!p || memcmp(opm->hw_addr, netdev_get_etheraddr(p->netdev),
- ETH_ADDR_LEN) != 0) {
- return;
- }
-
-
- if (opm->mask) {
- uint32_t config_mask = ntohl(opm->mask);
- p->config &= ~config_mask;
- p->config |= ntohl(opm->config) & config_mask;
- }
-}
-
-static void
-send_port_status(struct sw_port *p, uint8_t status)
-{
- struct ofpbuf *buffer;
- struct ofp_port_status *ops;
- ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &buffer);
- ops->reason = status;
- memset(ops->pad, 0, sizeof ops->pad);
- fill_port_desc(p, &ops->desc);
-
- send_openflow_buffer(p->dp, buffer, NULL);
-}
-
-void
-dp_send_flow_end(struct datapath *dp, struct sw_flow *flow,
- enum nx_flow_end_reason reason)
-{
- struct ofpbuf *buffer;
- struct nx_flow_end *nfe;
-
- if (!dp->send_flow_end) {
- return;
- }
-
- nfe = make_openflow_xid(sizeof *nfe, OFPT_VENDOR, 0, &buffer);
- if (!nfe) {
- return;
- }
- nfe->header.vendor = htonl(NX_VENDOR_ID);
- nfe->header.subtype = htonl(NXT_FLOW_END);
-
- flow_to_match(&flow->key.flow, flow->key.wildcards, &nfe->match);
-
- nfe->priority = htons(flow->priority);
- nfe->reason = reason;
-
- nfe->tcp_flags = flow->tcp_flags;
- nfe->ip_tos = flow->ip_tos;
-
- memset(nfe->pad, 0, sizeof nfe->pad);
-
- nfe->init_time = htonll(flow->created);
- nfe->used_time = htonll(flow->used);
- nfe->end_time = htonll(time_msec());
-
- nfe->packet_count = htonll(flow->packet_count);
- nfe->byte_count = htonll(flow->byte_count);
-
- send_openflow_buffer(dp, buffer, NULL);
-}
-
-void
-dp_send_error_msg(struct datapath *dp, const struct sender *sender,
- uint16_t type, uint16_t code, const void *data, size_t len)
-{
- struct ofpbuf *buffer;
- struct ofp_error_msg *oem;
- oem = make_openflow_reply(sizeof(*oem)+len, OFPT_ERROR, sender, &buffer);
- oem->type = htons(type);
- oem->code = htons(code);
- memcpy(oem->data, data, len);
- send_openflow_buffer(dp, buffer, sender);
-}
-
-static void
-fill_flow_stats(struct ofpbuf *buffer, struct sw_flow *flow,
- int table_idx, uint64_t now)
-{
- struct ofp_flow_stats *ofs;
- int length = sizeof *ofs + flow->sf_acts->actions_len;
- ofs = ofpbuf_put_uninit(buffer, length);
- ofs->length = htons(length);
- ofs->table_id = table_idx;
- ofs->pad = 0;
- ofs->match.wildcards = htonl(flow->key.wildcards);
- ofs->match.in_port = flow->key.flow.in_port;
- memcpy(ofs->match.dl_src, flow->key.flow.dl_src, ETH_ADDR_LEN);
- memcpy(ofs->match.dl_dst, flow->key.flow.dl_dst, ETH_ADDR_LEN);
- ofs->match.dl_vlan = flow->key.flow.dl_vlan;
- ofs->match.dl_type = flow->key.flow.dl_type;
- ofs->match.nw_src = flow->key.flow.nw_src;
- ofs->match.nw_dst = flow->key.flow.nw_dst;
- ofs->match.nw_proto = flow->key.flow.nw_proto;
- ofs->match.pad = 0;
- ofs->match.tp_src = flow->key.flow.tp_src;
- ofs->match.tp_dst = flow->key.flow.tp_dst;
- ofs->duration = htonl((now - flow->created) / 1000);
- ofs->priority = htons(flow->priority);
- ofs->idle_timeout = htons(flow->idle_timeout);
- ofs->hard_timeout = htons(flow->hard_timeout);
- memset(ofs->pad2, 0, sizeof ofs->pad2);
- ofs->packet_count = htonll(flow->packet_count);
- ofs->byte_count = htonll(flow->byte_count);
- memcpy(ofs->actions, flow->sf_acts->actions, flow->sf_acts->actions_len);
-}
-
-\f
-/* 'buffer' was received on 'p', which may be a a physical switch port or a
- * null pointer. Process it according to 'dp''s flow table. Returns 0 if
- * successful, in which case 'buffer' is destroyed, or -ESRCH if there is no
- * matching flow, in which case 'buffer' still belongs to the caller. */
-int run_flow_through_tables(struct datapath *dp, struct ofpbuf *buffer,
- struct sw_port *p)
-{
- struct sw_flow_key key;
- struct sw_flow *flow;
-
- key.wildcards = 0;
- if (flow_extract(buffer, p ? p->port_no : OFPP_NONE, &key.flow)
- && (dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) {
- /* Drop fragment. */
- ofpbuf_delete(buffer);
- return 0;
- }
- if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP)
- && p->config & (!eth_addr_equals(key.flow.dl_dst, stp_eth_addr)
- ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) {
- ofpbuf_delete(buffer);
- return 0;
- }
-
- flow = chain_lookup(dp->chain, &key);
- if (flow != NULL) {
- flow_used(flow, buffer);
- execute_actions(dp, buffer, &key, flow->sf_acts->actions,
- flow->sf_acts->actions_len, false);
- return 0;
- } else {
- return -ESRCH;
- }
-}
-
-/* 'buffer' was received on 'p', which may be a a physical switch port or a
- * null pointer. Process it according to 'dp''s flow table, sending it up to
- * the controller if no flow matches. Takes ownership of 'buffer'. */
-void fwd_port_input(struct datapath *dp, struct ofpbuf *buffer,
- struct sw_port *p)
-{
- if (run_flow_through_tables(dp, buffer, p)) {
- dp_output_control(dp, buffer, p->port_no,
- dp->miss_send_len, OFPR_NO_MATCH);
- }
-}
-
-static int
-recv_features_request(struct datapath *dp, const struct sender *sender,
- const void *msg UNUSED)
-{
- dp_send_features_reply(dp, sender);
- return 0;
-}
-
-static int
-recv_get_config_request(struct datapath *dp, const struct sender *sender,
- const void *msg UNUSED)
-{
- struct ofpbuf *buffer;
- struct ofp_switch_config *osc;
-
- osc = make_openflow_reply(sizeof *osc, OFPT_GET_CONFIG_REPLY,
- sender, &buffer);
-
- osc->flags = htons(dp->flags);
- osc->miss_send_len = htons(dp->miss_send_len);
-
- return send_openflow_buffer(dp, buffer, sender);
-}
-
-static int
-recv_set_config(struct datapath *dp, const struct sender *sender UNUSED,
- const void *msg)
-{
- const struct ofp_switch_config *osc = msg;
- int flags;
-
- flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK);
- if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL
- && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) {
- flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP;
- }
- dp->flags = flags;
- dp->miss_send_len = ntohs(osc->miss_send_len);
- return 0;
-}
-
-static int
-recv_packet_out(struct datapath *dp, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_packet_out *opo = msg;
- struct sw_flow_key key;
- uint16_t v_code;
- struct ofpbuf *buffer;
- size_t actions_len = ntohs(opo->actions_len);
-
- if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) {
- VLOG_DBG_RL(&rl, "message too short for number of actions");
- return -EINVAL;
- }
-
- if (ntohl(opo->buffer_id) == (uint32_t) -1) {
- /* FIXME: can we avoid copying data here? */
- int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len;
- buffer = ofpbuf_new(data_len);
- ofpbuf_put(buffer, (uint8_t *)opo->actions + actions_len, data_len);
- } else {
- buffer = retrieve_buffer(ntohl(opo->buffer_id));
- if (!buffer) {
- return -ESRCH;
- }
- }
-
- flow_extract(buffer, ntohs(opo->in_port), &key.flow);
-
- v_code = dp_validate_actions(dp, &key, opo->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code,
- msg, ntohs(opo->header.length));
- goto error;
- }
-
- execute_actions(dp, buffer, &key, opo->actions, actions_len, true);
-
- return 0;
-
-error:
- ofpbuf_delete(buffer);
- return -EINVAL;
-}
-
-static int
-recv_port_mod(struct datapath *dp, const struct sender *sender UNUSED,
- const void *msg)
-{
- const struct ofp_port_mod *opm = msg;
-
- update_port_flags(dp, opm);
-
- return 0;
-}
-
-static int
-add_flow(struct datapath *dp, const struct sender *sender,
- const struct ofp_flow_mod *ofm)
-{
- int error = -ENOMEM;
- uint16_t v_code;
- struct sw_flow *flow;
- size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm;
-
- /* Allocate memory. */
- flow = flow_alloc(actions_len);
- if (flow == NULL)
- goto error;
-
- flow_extract_match(&flow->key, &ofm->match);
-
- v_code = dp_validate_actions(dp, &flow->key, ofm->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code,
- ofm, ntohs(ofm->header.length));
- goto error_free_flow;
- }
-
- /* Fill out flow. */
- flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1;
- flow->idle_timeout = ntohs(ofm->idle_timeout);
- flow->hard_timeout = ntohs(ofm->hard_timeout);
- flow->used = flow->created = time_msec();
- flow->sf_acts->actions_len = actions_len;
- flow->byte_count = 0;
- flow->packet_count = 0;
- flow->tcp_flags = 0;
- flow->ip_tos = 0;
- memcpy(flow->sf_acts->actions, ofm->actions, actions_len);
-
- /* Act. */
- error = chain_insert(dp->chain, flow);
- if (error == -ENOBUFS) {
- dp_send_error_msg(dp, sender, OFPET_FLOW_MOD_FAILED,
- OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length));
- goto error_free_flow;
- } else if (error) {
- goto error_free_flow;
- }
- error = 0;
- if (ntohl(ofm->buffer_id) != UINT32_MAX) {
- struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id));
- if (buffer) {
- struct sw_flow_key key;
- uint16_t in_port = ntohs(ofm->match.in_port);
- flow_extract(buffer, in_port, &key.flow);
- flow_used(flow, buffer);
- execute_actions(dp, buffer, &key,
- ofm->actions, actions_len, false);
- } else {
- error = -ESRCH;
- }
- }
- return error;
-
-error_free_flow:
- flow_free(flow);
-error:
- if (ntohl(ofm->buffer_id) != (uint32_t) -1)
- discard_buffer(ntohl(ofm->buffer_id));
- return error;
-}
-
-static int
-mod_flow(struct datapath *dp, const struct sender *sender,
- const struct ofp_flow_mod *ofm)
-{
- int error = -ENOMEM;
- uint16_t v_code;
- size_t actions_len;
- struct sw_flow_key key;
- uint16_t priority;
- int strict;
-
- flow_extract_match(&key, &ofm->match);
-
- actions_len = ntohs(ofm->header.length) - sizeof *ofm;
-
- v_code = dp_validate_actions(dp, &key, ofm->actions, actions_len);
- if (v_code != ACT_VALIDATION_OK) {
- dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code,
- ofm, ntohs(ofm->header.length));
- goto error;
- }
-
- priority = key.wildcards ? ntohs(ofm->priority) : -1;
- strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0;
- chain_modify(dp->chain, &key, priority, strict, ofm->actions, actions_len);
-
- if (ntohl(ofm->buffer_id) != UINT32_MAX) {
- struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id));
- if (buffer) {
- struct sw_flow_key skb_key;
- uint16_t in_port = ntohs(ofm->match.in_port);
- flow_extract(buffer, in_port, &skb_key.flow);
- execute_actions(dp, buffer, &skb_key,
- ofm->actions, actions_len, false);
- } else {
- error = -ESRCH;
- }
- }
- return error;
-
-error:
- if (ntohl(ofm->buffer_id) != (uint32_t) -1)
- discard_buffer(ntohl(ofm->buffer_id));
- return error;
-}
-
-static int
-recv_flow(struct datapath *dp, const struct sender *sender,
- const void *msg)
-{
- const struct ofp_flow_mod *ofm = msg;
- uint16_t command = ntohs(ofm->command);
-
- if (command == OFPFC_ADD) {
- return add_flow(dp, sender, ofm);
- } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) {
- return mod_flow(dp, sender, ofm);
- } else if (command == OFPFC_DELETE) {
- struct sw_flow_key key;
- flow_extract_match(&key, &ofm->match);
- return chain_delete(dp->chain, &key, ofm->out_port, 0, 0) ? 0 : -ESRCH;
- } else if (command == OFPFC_DELETE_STRICT) {
- struct sw_flow_key key;
- uint16_t priority;
- flow_extract_match(&key, &ofm->match);
- priority = key.wildcards ? ntohs(ofm->priority) : -1;
- return chain_delete(dp->chain, &key, ofm->out_port,
- priority, 1) ? 0 : -ESRCH;
- } else {
- return -ENODEV;
- }
-}
-
-static int
-desc_stats_dump(struct datapath *dp UNUSED, void *state UNUSED,
- struct ofpbuf *buffer)
-{
- struct ofp_desc_stats *ods = ofpbuf_put_uninit(buffer, sizeof *ods);
-
- strncpy(ods->mfr_desc, &mfr_desc, sizeof ods->mfr_desc);
- strncpy(ods->hw_desc, &hw_desc, sizeof ods->hw_desc);
- strncpy(ods->sw_desc, &sw_desc, sizeof ods->sw_desc);
- strncpy(ods->serial_num, &serial_num, sizeof ods->serial_num);
-
- return 0;
-}
-
-struct flow_stats_state {
- int table_idx;
- struct sw_table_position position;
- struct ofp_flow_stats_request rq;
- uint64_t now; /* Current time in milliseconds */
-
- struct ofpbuf *buffer;
-};
-
-#define MAX_FLOW_STATS_BYTES 4096
-
-static int
-flow_stats_init(const void *body, int body_len UNUSED, void **state)
-{
- const struct ofp_flow_stats_request *fsr = body;
- struct flow_stats_state *s = xmalloc(sizeof *s);
- s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id;
- memset(&s->position, 0, sizeof s->position);
- s->rq = *fsr;
- *state = s;
- return 0;
-}
-
-static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
-{
- struct flow_stats_state *s = private;
- fill_flow_stats(s->buffer, flow, s->table_idx, s->now);
- return s->buffer->size >= MAX_FLOW_STATS_BYTES;
-}
-
-static int flow_stats_dump(struct datapath *dp, void *state,
- struct ofpbuf *buffer)
-{
- struct flow_stats_state *s = state;
- struct sw_flow_key match_key;
-
- flow_extract_match(&match_key, &s->rq.match);
- s->buffer = buffer;
- s->now = time_msec();
- while (s->table_idx < dp->chain->n_tables
- && (s->rq.table_id == 0xff || s->rq.table_id == s->table_idx))
- {
- struct sw_table *table = dp->chain->tables[s->table_idx];
-
- if (table->iterate(table, &match_key, s->rq.out_port,
- &s->position, flow_stats_dump_callback, s))
- break;
-
- s->table_idx++;
- memset(&s->position, 0, sizeof s->position);
- }
- return s->buffer->size >= MAX_FLOW_STATS_BYTES;
-}
-
-static void flow_stats_done(void *state)
-{
- free(state);
-}
-
-struct aggregate_stats_state {
- struct ofp_aggregate_stats_request rq;
-};
-
-static int
-aggregate_stats_init(const void *body, int body_len UNUSED, void **state)
-{
- const struct ofp_aggregate_stats_request *rq = body;
- struct aggregate_stats_state *s = xmalloc(sizeof *s);
- s->rq = *rq;
- *state = s;
- return 0;
-}
-
-static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private)
-{
- struct ofp_aggregate_stats_reply *rpy = private;
- rpy->packet_count += flow->packet_count;
- rpy->byte_count += flow->byte_count;
- rpy->flow_count++;
- return 0;
-}
-
-static int aggregate_stats_dump(struct datapath *dp, void *state,
- struct ofpbuf *buffer)
-{
- struct aggregate_stats_state *s = state;
- struct ofp_aggregate_stats_request *rq = &s->rq;
- struct ofp_aggregate_stats_reply *rpy;
- struct sw_table_position position;
- struct sw_flow_key match_key;
- int table_idx;
-
- rpy = ofpbuf_put_uninit(buffer, sizeof *rpy);
- memset(rpy, 0, sizeof *rpy);
-
- flow_extract_match(&match_key, &rq->match);
- table_idx = rq->table_id == 0xff ? 0 : rq->table_id;
- memset(&position, 0, sizeof position);
- while (table_idx < dp->chain->n_tables
- && (rq->table_id == 0xff || rq->table_id == table_idx))
- {
- struct sw_table *table = dp->chain->tables[table_idx];
- int error;
-
- error = table->iterate(table, &match_key, rq->out_port, &position,
- aggregate_stats_dump_callback, rpy);
- if (error)
- return error;
-
- table_idx++;
- memset(&position, 0, sizeof position);
- }
-
- rpy->packet_count = htonll(rpy->packet_count);
- rpy->byte_count = htonll(rpy->byte_count);
- rpy->flow_count = htonl(rpy->flow_count);
- return 0;
-}
-
-static void aggregate_stats_done(void *state)
-{
- free(state);
-}
-
-static int
-table_stats_dump(struct datapath *dp, void *state UNUSED,
- struct ofpbuf *buffer)
-{
- int i;
- for (i = 0; i < dp->chain->n_tables; i++) {
- struct ofp_table_stats *ots = ofpbuf_put_uninit(buffer, sizeof *ots);
- struct sw_table_stats stats;
- dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
- strncpy(ots->name, stats.name, sizeof ots->name);
- ots->table_id = i;
- ots->wildcards = htonl(stats.wildcards);
- memset(ots->pad, 0, sizeof ots->pad);
- ots->max_entries = htonl(stats.max_flows);
- ots->active_count = htonl(stats.n_flows);
- ots->lookup_count = htonll(stats.n_lookup);
- ots->matched_count = htonll(stats.n_matched);
- }
- return 0;
-}
-
-struct port_stats_state {
- int port;
-};
-
-static int
-port_stats_init(const void *body UNUSED, int body_len UNUSED, void **state)
-{
- struct port_stats_state *s = xmalloc(sizeof *s);
- s->port = 0;
- *state = s;
- return 0;
-}
-
-static void
-dump_port_stats(struct sw_port *port, struct ofpbuf *buffer)
-{
- struct ofp_port_stats *ops = ofpbuf_put_uninit(buffer, sizeof *ops);
- ops->port_no = htons(port->port_no);
- memset(ops->pad, 0, sizeof ops->pad);
- ops->rx_packets = htonll(port->rx_packets);
- ops->tx_packets = htonll(port->tx_packets);
- ops->rx_bytes = htonll(port->rx_bytes);
- ops->tx_bytes = htonll(port->tx_bytes);
- ops->rx_dropped = htonll(-1);
- ops->tx_dropped = htonll(port->tx_dropped);
- ops->rx_errors = htonll(-1);
- ops->tx_errors = htonll(-1);
- ops->rx_frame_err = htonll(-1);
- ops->rx_over_err = htonll(-1);
- ops->rx_crc_err = htonll(-1);
- ops->collisions = htonll(-1);
-}
-
-static int port_stats_dump(struct datapath *dp, void *state,
- struct ofpbuf *buffer)
-{
- struct port_stats_state *s = state;
- int i;
-
- for (i = s->port; i < DP_MAX_PORTS; i++) {
- struct sw_port *p = &dp->ports[i];
- if (p->netdev) {
- dump_port_stats(p, buffer);
- }
- }
- s->port = i;
-
- if (dp->local_port) {
- dump_port_stats(dp->local_port, buffer);
- s->port = OFPP_LOCAL + 1;
- }
- return 0;
-}
-
-static void port_stats_done(void *state)
-{
- free(state);
-}
-
-struct stats_type {
- /* Value for 'type' member of struct ofp_stats_request. */
- int type;
-
- /* Minimum and maximum acceptable number of bytes in body member of
- * struct ofp_stats_request. */
- size_t min_body, max_body;
-
- /* Prepares to dump some kind of datapath statistics. 'body' and
- * 'body_len' are the 'body' member of the struct ofp_stats_request.
- * Returns zero if successful, otherwise a negative error code.
- * May initialize '*state' to state information. May be null if no
- * initialization is required.*/
- int (*init)(const void *body, int body_len, void **state);
-
- /* Appends statistics for 'dp' to 'buffer', which initially contains a
- * struct ofp_stats_reply. On success, it should return 1 if it should be
- * called again later with another buffer, 0 if it is done, or a negative
- * errno value on failure. */
- int (*dump)(struct datapath *dp, void *state, struct ofpbuf *buffer);
-
- /* Cleans any state created by the init or dump functions. May be null
- * if no cleanup is required. */
- void (*done)(void *state);
-};
-
-static const struct stats_type stats[] = {
- {
- OFPST_DESC,
- 0,
- 0,
- NULL,
- desc_stats_dump,
- NULL
- },
- {
- OFPST_FLOW,
- sizeof(struct ofp_flow_stats_request),
- sizeof(struct ofp_flow_stats_request),
- flow_stats_init,
- flow_stats_dump,
- flow_stats_done
- },
- {
- OFPST_AGGREGATE,
- sizeof(struct ofp_aggregate_stats_request),
- sizeof(struct ofp_aggregate_stats_request),
- aggregate_stats_init,
- aggregate_stats_dump,
- aggregate_stats_done
- },
- {
- OFPST_TABLE,
- 0,
- 0,
- NULL,
- table_stats_dump,
- NULL
- },
- {
- OFPST_PORT,
- 0,
- 0,
- port_stats_init,
- port_stats_dump,
- port_stats_done
- },
-};
-
-struct stats_dump_cb {
- bool done;
- struct ofp_stats_request *rq;
- struct sender sender;
- const struct stats_type *s;
- void *state;
-};
-
-static int
-stats_dump(struct datapath *dp, void *cb_)
-{
- struct stats_dump_cb *cb = cb_;
- struct ofp_stats_reply *osr;
- struct ofpbuf *buffer;
- int err;
-
- if (cb->done) {
- return 0;
- }
-
- osr = make_openflow_reply(sizeof *osr, OFPT_STATS_REPLY, &cb->sender,
- &buffer);
- osr->type = htons(cb->s->type);
- osr->flags = 0;
-
- err = cb->s->dump(dp, cb->state, buffer);
- if (err >= 0) {
- int err2;
- if (!err) {
- cb->done = true;
- } else {
- /* Buffer might have been reallocated, so find our data again. */
- osr = ofpbuf_at_assert(buffer, 0, sizeof *osr);
- osr->flags = ntohs(OFPSF_REPLY_MORE);
- }
- err2 = send_openflow_buffer(dp, buffer, &cb->sender);
- if (err2) {
- err = err2;
- }
- }
-
- return err;
-}
-
-static void
-stats_done(void *cb_)
-{
- struct stats_dump_cb *cb = cb_;
- if (cb) {
- if (cb->s->done) {
- cb->s->done(cb->state);
- }
- free(cb);
- }
-}
-
-static int
-recv_stats_request(struct datapath *dp UNUSED, const struct sender *sender,
- const void *oh)
-{
- const struct ofp_stats_request *rq = oh;
- size_t rq_len = ntohs(rq->header.length);
- const struct stats_type *st;
- struct stats_dump_cb *cb;
- int type, body_len;
- int err;
-
- type = ntohs(rq->type);
- for (st = stats; ; st++) {
- if (st >= &stats[ARRAY_SIZE(stats)]) {
- VLOG_WARN_RL(&rl, "received stats request of unknown type %d",
- type);
- return -EINVAL;
- } else if (type == st->type) {
- break;
- }
- }
-
- cb = xmalloc(sizeof *cb);
- cb->done = false;
- cb->rq = xmemdup(rq, rq_len);
- cb->sender = *sender;
- cb->s = st;
- cb->state = NULL;
-
- body_len = rq_len - offsetof(struct ofp_stats_request, body);
- if (body_len < cb->s->min_body || body_len > cb->s->max_body) {
- VLOG_WARN_RL(&rl, "stats request type %d with bad body length %d",
- type, body_len);
- err = -EINVAL;
- goto error;
- }
-
- if (cb->s->init) {
- err = cb->s->init(rq->body, body_len, &cb->state);
- if (err) {
- VLOG_WARN_RL(&rl,
- "failed initialization of stats request type %d: %s",
- type, strerror(-err));
- goto error;
- }
- }
-
- remote_start_dump(sender->remote, stats_dump, stats_done, cb);
- return 0;
-
-error:
- free(cb->rq);
- free(cb);
- return err;
-}
-
-static int
-recv_echo_request(struct datapath *dp, const struct sender *sender,
- const void *oh)
-{
- return send_openflow_buffer(dp, make_echo_reply(oh), sender);
-}
-
-static int
-recv_echo_reply(struct datapath *dp UNUSED, const struct sender *sender UNUSED,
- const void *oh UNUSED)
-{
- return 0;
-}
-
-static int
-recv_vendor(struct datapath *dp, const struct sender *sender,
- const void *oh)
-{
- const struct ofp_vendor_header *ovh = oh;
-
- switch (ntohl(ovh->vendor))
- {
- case NX_VENDOR_ID:
- return nx_recv_msg(dp, sender, oh);
-
- default:
- VLOG_WARN_RL(&rl, "unknown vendor: 0x%x\n", ntohl(ovh->vendor));
- dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_VENDOR, oh, ntohs(ovh->header.length));
- return -EINVAL;
- }
-}
-
-/* 'msg', which is 'length' bytes long, was received from the control path.
- * Apply it to 'chain'. */
-int
-fwd_control_input(struct datapath *dp, const struct sender *sender,
- const void *msg, size_t length)
-{
- int (*handler)(struct datapath *, const struct sender *, const void *);
- struct ofp_header *oh;
- size_t min_size;
-
- /* Check encapsulated length. */
- oh = (struct ofp_header *) msg;
- if (ntohs(oh->length) > length) {
- return -EINVAL;
- }
- assert(oh->version == OFP_VERSION);
-
- /* Figure out how to handle it. */
- switch (oh->type) {
- case OFPT_FEATURES_REQUEST:
- min_size = sizeof(struct ofp_header);
- handler = recv_features_request;
- break;
- case OFPT_GET_CONFIG_REQUEST:
- min_size = sizeof(struct ofp_header);
- handler = recv_get_config_request;
- break;
- case OFPT_SET_CONFIG:
- min_size = sizeof(struct ofp_switch_config);
- handler = recv_set_config;
- break;
- case OFPT_PACKET_OUT:
- min_size = sizeof(struct ofp_packet_out);
- handler = recv_packet_out;
- break;
- case OFPT_FLOW_MOD:
- min_size = sizeof(struct ofp_flow_mod);
- handler = recv_flow;
- break;
- case OFPT_PORT_MOD:
- min_size = sizeof(struct ofp_port_mod);
- handler = recv_port_mod;
- break;
- case OFPT_STATS_REQUEST:
- min_size = sizeof(struct ofp_stats_request);
- handler = recv_stats_request;
- break;
- case OFPT_ECHO_REQUEST:
- min_size = sizeof(struct ofp_header);
- handler = recv_echo_request;
- break;
- case OFPT_ECHO_REPLY:
- min_size = sizeof(struct ofp_header);
- handler = recv_echo_reply;
- break;
- case OFPT_VENDOR:
- min_size = sizeof(struct ofp_vendor_header);
- handler = recv_vendor;
- break;
- default:
- dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE,
- msg, length);
- return -EINVAL;
- }
-
- /* Handle it. */
- if (length < min_size)
- return -EFAULT;
- return handler(dp, sender, msg);
-}
-\f
-/* Packet buffering. */
-
-#define OVERWRITE_SECS 1
-
-struct packet_buffer {
- struct ofpbuf *buffer;
- uint32_t cookie;
- time_t timeout;
-};
-
-static struct packet_buffer buffers[N_PKT_BUFFERS];
-static unsigned int buffer_idx;
-
-uint32_t save_buffer(struct ofpbuf *buffer)
-{
- struct packet_buffer *p;
- uint32_t id;
-
- buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK;
- p = &buffers[buffer_idx];
- if (p->buffer) {
- /* Don't buffer packet if existing entry is less than
- * OVERWRITE_SECS old. */
- if (time_now() < p->timeout) { /* FIXME */
- return -1;
- } else {
- ofpbuf_delete(p->buffer);
- }
- }
- /* Don't use maximum cookie value since the all-bits-1 id is
- * special. */
- if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1)
- p->cookie = 0;
- p->buffer = ofpbuf_clone(buffer); /* FIXME */
- p->timeout = time_now() + OVERWRITE_SECS; /* FIXME */
- id = buffer_idx | (p->cookie << PKT_BUFFER_BITS);
-
- return id;
-}
-
-static struct ofpbuf *retrieve_buffer(uint32_t id)
-{
- struct ofpbuf *buffer = NULL;
- struct packet_buffer *p;
-
- p = &buffers[id & PKT_BUFFER_MASK];
- if (p->cookie == id >> PKT_BUFFER_BITS) {
- buffer = p->buffer;
- p->buffer = NULL;
- } else {
- printf("cookie mismatch: %x != %x\n",
- id >> PKT_BUFFER_BITS, p->cookie);
- }
-
- return buffer;
-}
-
-static void discard_buffer(uint32_t id)
-{
- struct packet_buffer *p;
-
- p = &buffers[id & PKT_BUFFER_MASK];
- if (p->cookie == id >> PKT_BUFFER_BITS) {
- ofpbuf_delete(p->buffer);
- p->buffer = NULL;
- }
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-/* Interface exported by OpenFlow module. */
-
-#ifndef DATAPATH_H
-#define DATAPATH_H 1
-
-#include <stdbool.h>
-#include <stdint.h>
-#include "openflow/nicira-ext.h"
-#include "ofpbuf.h"
-#include "timeval.h"
-#include "list.h"
-
-struct rconn;
-struct pvconn;
-struct sw_flow;
-struct sender;
-
-struct sw_port {
- uint32_t config; /* Some subset of OFPPC_* flags. */
- uint32_t state; /* Some subset of OFPPS_* flags. */
- struct datapath *dp;
- struct netdev *netdev;
- struct list node; /* Element in datapath.ports. */
- unsigned long long int rx_packets, tx_packets;
- unsigned long long int rx_bytes, tx_bytes;
- unsigned long long int tx_dropped;
- uint16_t port_no;
-};
-
-#define DP_MAX_PORTS 255
-BUILD_ASSERT_DECL(DP_MAX_PORTS <= OFPP_MAX);
-
-struct datapath {
- /* Remote connections. */
- struct list remotes; /* All connections (including controller). */
-
- /* Listeners. */
- struct pvconn **listeners;
- size_t n_listeners;
-
- time_t last_timeout;
-
- /* Unique identifier for this datapath */
- uint64_t id;
-
- struct sw_chain *chain; /* Forwarding rules. */
-
- /* Configuration set from controller. */
- uint16_t flags;
- uint16_t miss_send_len;
-
- /* Flag controlling whether Flow End messages are generated. */
- uint8_t send_flow_end;
-
- /* Switch ports. */
- struct sw_port ports[DP_MAX_PORTS];
- struct sw_port *local_port; /* OFPP_LOCAL port, if any. */
- struct list port_list; /* All ports, including local_port. */
-};
-
-int dp_new(struct datapath **, uint64_t dpid);
-int dp_add_port(struct datapath *, const char *netdev);
-int dp_add_local_port(struct datapath *, const char *netdev);
-void dp_add_pvconn(struct datapath *, struct pvconn *);
-void dp_run(struct datapath *);
-void dp_wait(struct datapath *);
-void dp_send_error_msg(struct datapath *, const struct sender *,
- uint16_t, uint16_t, const void *, size_t);
-void dp_send_flow_end(struct datapath *, struct sw_flow *,
- enum nx_flow_end_reason);
-void dp_output_port(struct datapath *, struct ofpbuf *, int in_port,
- int out_port, bool ignore_no_fwd);
-void dp_output_control(struct datapath *, struct ofpbuf *, int in_port,
- size_t max_len, int reason);
-
-#endif /* datapath.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-/* Functions for executing OpenFlow actions. */
-
-#include <arpa/inet.h>
-#include "csum.h"
-#include "packets.h"
-#include "dp_act.h"
-#include "openflow/nicira-ext.h"
-#include "nx_act.h"
-
-
-static uint16_t
-validate_output(struct datapath *dp UNUSED, const struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_output *oa = (struct ofp_action_output *)ah;
-
- /* To prevent loops, make sure there's no action to send to the
- * OFP_TABLE virtual port.
- */
- if (oa->port == htons(OFPP_NONE) ||
- (!(key->wildcards & OFPFW_IN_PORT)
- && oa->port == key->flow.in_port)) {
- return OFPBAC_BAD_OUT_PORT;
- }
- return ACT_VALIDATION_OK;
-}
-
-static void
-do_output(struct datapath *dp, struct ofpbuf *buffer, int in_port,
- size_t max_len, int out_port, bool ignore_no_fwd)
-{
- if (out_port != OFPP_CONTROLLER) {
- dp_output_port(dp, buffer, in_port, out_port, ignore_no_fwd);
- } else {
- dp_output_control(dp, buffer, in_port, max_len, OFPR_ACTION);
- }
-}
-
-/* Modify vlan tag control information (TCI). Only sets the TCI bits
- * indicated by 'mask'. If no vlan tag is present, one is added.
- */
-static void
-modify_vlan_tci(struct ofpbuf *buffer, struct sw_flow_key *key,
- uint16_t tci, uint16_t mask)
-{
- struct vlan_eth_header *veh;
-
- if (key->flow.dl_vlan != htons(OFP_VLAN_NONE)) {
- /* Modify vlan id, but maintain other TCI values */
- veh = buffer->l2;
- veh->veth_tci &= ~htons(mask);
- veh->veth_tci |= htons(tci);
- } else {
- /* Insert new vlan id. */
- struct eth_header *eh = buffer->l2;
- struct vlan_eth_header tmp;
- memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN);
- memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN);
- tmp.veth_type = htons(ETH_TYPE_VLAN);
- tmp.veth_tci = htons(tci);
- tmp.veth_next_type = eh->eth_type;
-
- veh = ofpbuf_push_uninit(buffer, VLAN_HEADER_LEN);
- memcpy(veh, &tmp, sizeof tmp);
- buffer->l2 = (char*)buffer->l2 - VLAN_HEADER_LEN;
- }
-
- key->flow.dl_vlan = veh->veth_tci & htons(VLAN_VID_MASK);
-}
-
-
-/* Remove an existing vlan header if it exists. */
-static void
-vlan_pull_tag(struct ofpbuf *buffer)
-{
- struct vlan_eth_header *veh = buffer->l2;
-
- if (veh->veth_type == htons(ETH_TYPE_VLAN)) {
- struct eth_header tmp;
-
- memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN);
- memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN);
- tmp.eth_type = veh->veth_next_type;
-
- buffer->size -= VLAN_HEADER_LEN;
- buffer->data = (char*)buffer->data + VLAN_HEADER_LEN;
- buffer->l2 = (char*)buffer->l2 + VLAN_HEADER_LEN;
- memcpy(buffer->data, &tmp, sizeof tmp);
- }
-}
-
-static void
-set_vlan_vid(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah;
- uint16_t tci = ntohs(va->vlan_vid);
-
- modify_vlan_tci(buffer, key, tci, VLAN_VID_MASK);
-}
-
-static void
-set_vlan_pcp(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah;
- uint16_t tci = (uint16_t)va->vlan_pcp << 13;
-
- modify_vlan_tci(buffer, key, tci, VLAN_PCP_MASK);
-}
-
-static void
-strip_vlan(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah UNUSED)
-{
- vlan_pull_tag(buffer);
- key->flow.dl_vlan = htons(OFP_VLAN_NONE);
-}
-
-static void
-set_dl_addr(struct ofpbuf *buffer, struct sw_flow_key *key UNUSED,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah;
- struct eth_header *eh = buffer->l2;
-
- if (da->type == htons(OFPAT_SET_DL_SRC)) {
- memcpy(eh->eth_src, da->dl_addr, sizeof eh->eth_src);
- } else {
- memcpy(eh->eth_dst, da->dl_addr, sizeof eh->eth_dst);
- }
-}
-
-static void
-set_nw_addr(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah;
- uint16_t eth_proto = ntohs(key->flow.dl_type);
-
- if (eth_proto == ETH_TYPE_IP) {
- struct ip_header *nh = buffer->l3;
- uint8_t nw_proto = key->flow.nw_proto;
- uint32_t new, *field;
-
- new = na->nw_addr;
- field = na->type == OFPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst;
- if (nw_proto == IP_TYPE_TCP) {
- struct tcp_header *th = buffer->l4;
- th->tcp_csum = recalc_csum32(th->tcp_csum, *field, new);
- } else if (nw_proto == IP_TYPE_UDP) {
- struct udp_header *th = buffer->l4;
- if (th->udp_csum) {
- th->udp_csum = recalc_csum32(th->udp_csum, *field, new);
- if (!th->udp_csum) {
- th->udp_csum = 0xffff;
- }
- }
- }
- nh->ip_csum = recalc_csum32(nh->ip_csum, *field, new);
- *field = new;
- }
-}
-
-static void
-set_tp_port(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah;
- uint16_t eth_proto = ntohs(key->flow.dl_type);
-
- if (eth_proto == ETH_TYPE_IP) {
- uint8_t nw_proto = key->flow.nw_proto;
- uint16_t new, *field;
-
- new = ta->tp_port;
- if (nw_proto == IP_TYPE_TCP) {
- struct tcp_header *th = buffer->l4;
- field = ta->type == OFPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst;
- th->tcp_csum = recalc_csum16(th->tcp_csum, *field, new);
- *field = new;
- } else if (nw_proto == IP_TYPE_UDP) {
- struct udp_header *th = buffer->l4;
- field = ta->type == OFPAT_SET_TP_SRC ? &th->udp_src : &th->udp_dst;
- th->udp_csum = recalc_csum16(th->udp_csum, *field, new);
- *field = new;
- }
- }
-}
-
-struct openflow_action {
- size_t min_size;
- size_t max_size;
- uint16_t (*validate)(struct datapath *dp,
- const struct sw_flow_key *key,
- const struct ofp_action_header *ah);
- void (*execute)(struct ofpbuf *buffer,
- struct sw_flow_key *key,
- const struct ofp_action_header *ah);
-};
-
-static const struct openflow_action of_actions[] = {
- [OFPAT_OUTPUT] = {
- sizeof(struct ofp_action_output),
- sizeof(struct ofp_action_output),
- validate_output,
- NULL /* This is optimized into execute_actions */
- },
- [OFPAT_SET_VLAN_VID] = {
- sizeof(struct ofp_action_vlan_vid),
- sizeof(struct ofp_action_vlan_vid),
- NULL,
- set_vlan_vid
- },
- [OFPAT_SET_VLAN_PCP] = {
- sizeof(struct ofp_action_vlan_pcp),
- sizeof(struct ofp_action_vlan_pcp),
- NULL,
- set_vlan_pcp
- },
- [OFPAT_STRIP_VLAN] = {
- sizeof(struct ofp_action_header),
- sizeof(struct ofp_action_header),
- NULL,
- strip_vlan
- },
- [OFPAT_SET_DL_SRC] = {
- sizeof(struct ofp_action_dl_addr),
- sizeof(struct ofp_action_dl_addr),
- NULL,
- set_dl_addr
- },
- [OFPAT_SET_DL_DST] = {
- sizeof(struct ofp_action_dl_addr),
- sizeof(struct ofp_action_dl_addr),
- NULL,
- set_dl_addr
- },
- [OFPAT_SET_NW_SRC] = {
- sizeof(struct ofp_action_nw_addr),
- sizeof(struct ofp_action_nw_addr),
- NULL,
- set_nw_addr
- },
- [OFPAT_SET_NW_DST] = {
- sizeof(struct ofp_action_nw_addr),
- sizeof(struct ofp_action_nw_addr),
- NULL,
- set_nw_addr
- },
- [OFPAT_SET_TP_SRC] = {
- sizeof(struct ofp_action_tp_port),
- sizeof(struct ofp_action_tp_port),
- NULL,
- set_tp_port
- },
- [OFPAT_SET_TP_DST] = {
- sizeof(struct ofp_action_tp_port),
- sizeof(struct ofp_action_tp_port),
- NULL,
- set_tp_port
- }
- /* OFPAT_VENDOR is not here, since it would blow up the array size. */
-};
-
-/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK
- * or an OFPET_BAD_ACTION error code. */
-static uint16_t
-validate_ofpat(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t type, uint16_t len)
-{
- uint16_t ret = ACT_VALIDATION_OK;
- const struct openflow_action *act = &of_actions[type];
-
- if ((len < act->min_size) || (len > act->max_size)) {
- return OFPBAC_BAD_LEN;
- }
-
- if (act->validate) {
- ret = act->validate(dp, key, ah);
- }
-
- return ret;
-}
-
-/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK
- * or an OFPET_BAD_ACTION error code. */
-static uint16_t
-validate_vendor(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t len)
-{
- struct ofp_action_vendor_header *avh;
- int ret = ACT_VALIDATION_OK;
-
- if (len < sizeof(struct ofp_action_vendor_header)) {
- return OFPBAC_BAD_LEN;
- }
-
- avh = (struct ofp_action_vendor_header *)ah;
-
- switch(ntohl(avh->vendor)) {
- case NX_VENDOR_ID:
- ret = nx_validate_act(dp, key, avh, len);
- break;
-
- default:
- return OFPBAC_BAD_VENDOR;
- }
-
- return ret;
-}
-
-/* Validates a list of actions. If a problem is found, a code for the
- * OFPET_BAD_ACTION error type is returned. If the action list validates,
- * ACT_VALIDATION_OK is returned. */
-uint16_t
-dp_validate_actions(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- uint8_t *p = (uint8_t *)actions;
- int err;
-
- while (actions_len >= sizeof(struct ofp_action_header)) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = ntohs(ah->len);
- uint16_t type;
-
- /* Make there's enough remaining data for the specified length
- * and that the action length is a multiple of 64 bits. */
- if (!len || (actions_len < len) || (len % 8) != 0) {
- return OFPBAC_BAD_LEN;
- }
-
- type = ntohs(ah->type);
- if (type < ARRAY_SIZE(of_actions)) {
- err = validate_ofpat(dp, key, ah, type, len);
- if (err != ACT_VALIDATION_OK) {
- return err;
- }
- } else if (type == OFPAT_VENDOR) {
- err = validate_vendor(dp, key, ah, len);
- if (err != ACT_VALIDATION_OK) {
- return err;
- }
- } else {
- return OFPBAC_BAD_TYPE;
- }
-
- p += len;
- actions_len -= len;
- }
-
- /* Check if there's any trailing garbage. */
- if (actions_len != 0) {
- return OFPBAC_BAD_LEN;
- }
-
- return ACT_VALIDATION_OK;
-}
-
-/* Execute a built-in OpenFlow action against 'buffer'. */
-static void
-execute_ofpat(struct ofpbuf *buffer, struct sw_flow_key *key,
- const struct ofp_action_header *ah, uint16_t type)
-{
- const struct openflow_action *act = &of_actions[type];
-
- if (act->execute) {
- act->execute(buffer, key, ah);
- }
-}
-
-/* Execute a vendor-defined action against 'buffer'. */
-static void
-execute_vendor(struct ofpbuf *buffer, const struct sw_flow_key *key,
- const struct ofp_action_header *ah)
-{
- struct ofp_action_vendor_header *avh
- = (struct ofp_action_vendor_header *)ah;
-
- switch(ntohl(avh->vendor)) {
- case NX_VENDOR_ID:
- nx_execute_act(buffer, key, avh);
- break;
-
- default:
- /* This should not be possible due to prior validation. */
- printf("attempt to execute action with unknown vendor: %#x\n",
- ntohl(avh->vendor));
- break;
- }
-}
-
-/* Execute a list of actions against 'buffer'. */
-void execute_actions(struct datapath *dp, struct ofpbuf *buffer,
- struct sw_flow_key *key,
- const struct ofp_action_header *actions, size_t actions_len,
- int ignore_no_fwd)
-{
- /* Every output action needs a separate clone of 'buffer', but the common
- * case is just a single output action, so that doing a clone and then
- * freeing the original buffer is wasteful. So the following code is
- * slightly obscure just to avoid that. */
- int prev_port;
- size_t max_len=0; /* Initialze to make compiler happy */
- uint16_t in_port = ntohs(key->flow.in_port);
- uint8_t *p = (uint8_t *)actions;
-
- prev_port = -1;
-
- /* The action list was already validated, so we can be a bit looser
- * in our sanity-checking. */
- while (actions_len > 0) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = htons(ah->len);
-
- if (prev_port != -1) {
- do_output(dp, ofpbuf_clone(buffer), in_port, max_len,
- prev_port, ignore_no_fwd);
- prev_port = -1;
- }
-
- if (ah->type == htons(OFPAT_OUTPUT)) {
- struct ofp_action_output *oa = (struct ofp_action_output *)p;
- prev_port = ntohs(oa->port);
- max_len = ntohs(oa->max_len);
- } else {
- uint16_t type = ntohs(ah->type);
-
- if (type < ARRAY_SIZE(of_actions)) {
- execute_ofpat(buffer, key, ah, type);
- } else if (type == OFPAT_VENDOR) {
- execute_vendor(buffer, key, ah);
- }
- }
-
- p += len;
- actions_len -= len;
- }
- if (prev_port != -1) {
- do_output(dp, buffer, in_port, max_len, prev_port, ignore_no_fwd);
- } else {
- ofpbuf_delete(buffer);
- }
-}
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef DP_ACT_H
-#define DP_ACT_H 1
-
-#include "openflow/openflow.h"
-#include "switch-flow.h"
-#include "datapath.h"
-
-#define ACT_VALIDATION_OK ((uint16_t)-1)
-
-uint16_t dp_validate_actions(struct datapath *, const struct sw_flow_key *,
- const struct ofp_action_header *, size_t);
-void execute_actions(struct datapath *, struct ofpbuf *,
- struct sw_flow_key *, const struct ofp_action_header *,
- size_t action_len, int ignore_no_fwd);
-
-#endif /* dp_act.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-/* Functions for Nicira-extended actions. */
-#include "openflow/nicira-ext.h"
-#include "nx_act.h"
-
-uint16_t
-nx_validate_act(struct datapath *dp UNUSED,
- const struct sw_flow_key *key UNUSED,
- const struct ofp_action_vendor_header *avh UNUSED,
- uint16_t len UNUSED)
-{
- /* Nothing to validate yet */
- return OFPBAC_BAD_VENDOR_TYPE;
-}
-
-void
-nx_execute_act(struct ofpbuf *buffer UNUSED,
- const struct sw_flow_key *key UNUSED,
- const struct ofp_action_vendor_header *avh UNUSED)
-{
- /* Nothing to execute yet */
-}
-
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef NX_ACT_H
-#define NX_ACT_H 1
-
-#include "switch-flow.h"
-#include "datapath.h"
-
-
-uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key,
- const struct ofp_action_vendor_header *avh, uint16_t len);
-
-void nx_execute_act(struct ofpbuf *buffer,
- const struct sw_flow_key *key,
- const struct ofp_action_vendor_header *avh);
-
-#endif /* nx_act.h */
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <errno.h>
-#include <arpa/inet.h>
-#include "openflow/nicira-ext.h"
-#include "nx_msg.h"
-
-int nx_recv_msg(struct datapath *dp, const struct sender *sender,
- const void *oh)
-{
- const struct nicira_header *nh = oh;
-
- switch (ntohl(nh->subtype)) {
- case NXT_FLOW_END_CONFIG: {
- const struct nx_flow_end_config *nfec = oh;
- dp->send_flow_end = nfec->enable;
- return 0;
- }
-
- default:
- dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST,
- OFPBRC_BAD_SUBTYPE, oh, ntohs(nh->header.length));
- return -EINVAL;
- }
-
- return -EINVAL;
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef NX_MSG_H
-#define NX_MSG_H 1
-
-#include "datapath.h"
-
-struct sender;
-
-int nx_recv_msg(struct datapath *, const struct sender *, const void *);
-
-#endif /* nx_msg.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "switch-flow.h"
-#include <arpa/inet.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include "ofpbuf.h"
-#include "openflow/openflow.h"
-#include "openflow/nicira-ext.h"
-#include "packets.h"
-#include "timeval.h"
-
-/* Internal function used to compare fields in flow. */
-static inline int
-flow_fields_match(const struct flow *a, const struct flow *b, uint16_t w,
- uint32_t src_mask, uint32_t dst_mask)
-{
- return ((w & OFPFW_IN_PORT || a->in_port == b->in_port)
- && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan)
- && (w & OFPFW_DL_SRC || eth_addr_equals(a->dl_src, b->dl_src))
- && (w & OFPFW_DL_DST || eth_addr_equals(a->dl_dst, b->dl_dst))
- && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type)
- && !((a->nw_src ^ b->nw_src) & src_mask)
- && !((a->nw_dst ^ b->nw_dst) & dst_mask)
- && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto)
- && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src)
- && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst));
-}
-
-/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
- * modulo wildcards in 'b', zero otherwise. */
-inline int
-flow_matches_1wild(const struct sw_flow_key *a, const struct sw_flow_key *b)
-{
- return flow_fields_match(&a->flow, &b->flow, b->wildcards,
- b->nw_src_mask, b->nw_dst_mask);
-}
-
-/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
- * modulo wildcards in 'a' or 'b', zero otherwise. */
-inline int
-flow_matches_2wild(const struct sw_flow_key *a, const struct sw_flow_key *b)
-{
- return flow_fields_match(&a->flow, &b->flow, a->wildcards | b->wildcards,
- a->nw_src_mask & b->nw_src_mask,
- a->nw_dst_mask & b->nw_dst_mask);
-}
-
-/* Returns nonzero if 't' (the table entry's key) and 'd' (the key
- * describing the match) match, that is, if their fields are
- * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the
- * wildcards must match in both 't_key' and 'd_key'. Note that the
- * table's wildcards are ignored unless 'strict' is set. */
-int
-flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d,
- int strict)
-{
- if (strict && d->wildcards != t->wildcards) {
- return 0;
- }
- return flow_matches_1wild(t, d);
-}
-
-void
-flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from)
-{
- to->wildcards = ntohl(from->wildcards) & OFPFW_ALL;
- to->flow.reserved = 0;
- to->flow.in_port = from->in_port;
- to->flow.dl_vlan = from->dl_vlan;
- memcpy(to->flow.dl_src, from->dl_src, ETH_ADDR_LEN);
- memcpy(to->flow.dl_dst, from->dl_dst, ETH_ADDR_LEN);
- to->flow.dl_type = from->dl_type;
-
- to->flow.nw_src = to->flow.nw_dst = to->flow.nw_proto = 0;
- to->flow.tp_src = to->flow.tp_dst = 0;
-
-#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST)
-#define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO)
- if (to->wildcards & OFPFW_DL_TYPE) {
- /* Can't sensibly match on network or transport headers if the
- * data link type is unknown. */
- to->wildcards |= OFPFW_NW | OFPFW_TP;
- } else if (from->dl_type == htons(ETH_TYPE_IP)) {
- to->flow.nw_src = from->nw_src;
- to->flow.nw_dst = from->nw_dst;
- to->flow.nw_proto = from->nw_proto;
-
- if (to->wildcards & OFPFW_NW_PROTO) {
- /* Can't sensibly match on transport headers if the network
- * protocol is unknown. */
- to->wildcards |= OFPFW_TP;
- } else if (from->nw_proto == IPPROTO_TCP
- || from->nw_proto == IPPROTO_UDP
- || from->nw_proto == IPPROTO_ICMP) {
- to->flow.tp_src = from->tp_src;
- to->flow.tp_dst = from->tp_dst;
- } else {
- /* Transport layer fields are undefined. Mark them as
- * exact-match to allow such flows to reside in table-hash,
- * instead of falling into table-linear. */
- to->wildcards &= ~OFPFW_TP;
- }
- } else {
- /* Network and transport layer fields are undefined. Mark them
- * as exact-match to allow such flows to reside in table-hash,
- * instead of falling into table-linear. */
- to->wildcards &= ~(OFPFW_NW | OFPFW_TP);
- }
-
- /* We set these late because code above adjusts to->wildcards. */
- to->nw_src_mask = flow_nw_bits_to_mask(to->wildcards, OFPFW_NW_SRC_SHIFT);
- to->nw_dst_mask = flow_nw_bits_to_mask(to->wildcards, OFPFW_NW_DST_SHIFT);
-}
-
-/* Allocates and returns a new flow with room for 'actions_len' actions.
- * Returns the new flow or a null pointer on failure. */
-struct sw_flow *
-flow_alloc(size_t actions_len)
-{
- struct sw_flow_actions *sfa;
- size_t size = sizeof *sfa + actions_len;
- struct sw_flow *flow = malloc(sizeof *flow);
- if (!flow)
- return NULL;
-
- sfa = malloc(size);
- if (!sfa) {
- free(flow);
- return NULL;
- }
- sfa->actions_len = actions_len;
- flow->sf_acts = sfa;
- return flow;
-}
-
-/* Frees 'flow' immediately. */
-void
-flow_free(struct sw_flow *flow)
-{
- if (!flow) {
- return;
- }
- free(flow->sf_acts);
- free(flow);
-}
-
-/* Copies 'actions' into a newly allocated structure for use by 'flow'
- * and frees the structure that defined the previous actions. */
-void flow_replace_acts(struct sw_flow *flow,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_flow_actions *sfa;
- int size = sizeof *sfa + actions_len;
-
- sfa = malloc(size);
- if (unlikely(!sfa))
- return;
-
- sfa->actions_len = actions_len;
- memcpy(sfa->actions, actions, actions_len);
-
- free(flow->sf_acts);
- flow->sf_acts = sfa;
-
- return;
-}
-
-/* Prints a representation of 'key' to the kernel log. */
-void
-print_flow(const struct sw_flow_key *key)
-{
- const struct flow *f = &key->flow;
- printf("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x"
- "->%02x:%02x:%02x:%02x:%02x:%02x "
- "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n",
- key->wildcards, ntohs(f->in_port), ntohs(f->dl_vlan),
- f->dl_src[0], f->dl_src[1], f->dl_src[2],
- f->dl_src[3], f->dl_src[4], f->dl_src[5],
- f->dl_dst[0], f->dl_dst[1], f->dl_dst[2],
- f->dl_dst[3], f->dl_dst[4], f->dl_dst[5],
- ntohs(f->dl_type),
- ((unsigned char *)&f->nw_src)[0],
- ((unsigned char *)&f->nw_src)[1],
- ((unsigned char *)&f->nw_src)[2],
- ((unsigned char *)&f->nw_src)[3],
- ((unsigned char *)&f->nw_dst)[0],
- ((unsigned char *)&f->nw_dst)[1],
- ((unsigned char *)&f->nw_dst)[2],
- ((unsigned char *)&f->nw_dst)[3],
- ntohs(f->tp_src), ntohs(f->tp_dst));
-}
-
-bool flow_timeout(struct sw_flow *flow)
-{
- uint64_t now = time_msec();
- if (flow->idle_timeout != OFP_FLOW_PERMANENT
- && now > flow->used + flow->idle_timeout * 1000) {
- flow->reason = NXFER_IDLE_TIMEOUT;
- return true;
- } else if (flow->hard_timeout != OFP_FLOW_PERMANENT
- && now > flow->created + flow->hard_timeout * 1000) {
- flow->reason = NXFER_HARD_TIMEOUT;
- return true;
- } else {
- return false;
- }
-}
-
-/* Returns nonzero if 'flow' contains an output action to 'out_port' or
- * has the value OFPP_NONE. 'out_port' is in network-byte order. */
-int flow_has_out_port(struct sw_flow *flow, uint16_t out_port)
-{
- struct sw_flow_actions *sf_acts = flow->sf_acts;
- size_t actions_len = sf_acts->actions_len;
- uint8_t *p = (uint8_t *)sf_acts->actions;
-
- if (out_port == htons(OFPP_NONE))
- return 1;
-
- while (actions_len > 0) {
- struct ofp_action_header *ah = (struct ofp_action_header *)p;
- size_t len = ntohs(ah->len);
-
- if (ah->type == htons(OFPAT_OUTPUT)) {
- struct ofp_action_output *oa = (struct ofp_action_output *)p;
- if (oa->port == out_port) {
- return 1;
- }
- }
- p += len;
- actions_len -= len;
- }
-
- return 0;
-}
-
-void flow_used(struct sw_flow *flow, struct ofpbuf *buffer)
-{
- flow->used = time_msec();
-
- if (flow->key.flow.dl_type == htons(ETH_TYPE_IP)) {
- struct ip_header *nh = buffer->l3;
- flow->ip_tos = nh->ip_tos;
-
- if (flow->key.flow.nw_proto == IP_TYPE_TCP) {
- struct tcp_header *th = buffer->l4;
- flow->tcp_flags |= TCP_FLAGS(th->tcp_ctl);
- }
- }
-
- flow->packet_count++;
- flow->byte_count += buffer->size;
-}
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#ifndef SWITCH_FLOW_H
-#define SWITCH_FLOW_H 1
-
-#include <time.h>
-#include "openflow/openflow.h"
-#include "flow.h"
-#include "list.h"
-
-struct ofp_match;
-
-/* Identification data for a flow. */
-struct sw_flow_key {
- struct flow flow; /* Flow data (in network byte order). */
- uint32_t wildcards; /* Wildcard fields (in host byte order). */
- uint32_t nw_src_mask; /* 1-bit in each significant nw_src bit. */
- uint32_t nw_dst_mask; /* 1-bit in each significant nw_dst bit. */
-};
-
-struct sw_flow_actions {
- size_t actions_len;
- struct ofp_action_header actions[0];
-};
-
-struct sw_flow {
- struct sw_flow_key key;
-
- uint16_t priority; /* Only used on entries with wildcards. */
- uint16_t idle_timeout; /* Idle time before discarding (seconds). */
- uint16_t hard_timeout; /* Hard expiration time (seconds) */
- uint64_t used; /* Last used time. */
- uint64_t created; /* When the flow was created. */
- uint64_t packet_count; /* Number of packets seen. */
- uint64_t byte_count; /* Number of bytes seen. */
- uint8_t reason; /* Reason flow expired (one of NXFER_*). */
-
- uint8_t tcp_flags; /* Union of seen TCP flags. */
- uint8_t ip_tos; /* IP TOS value. */
-
- struct sw_flow_actions *sf_acts;
-
- /* Private to table implementations. */
- struct list node;
- struct list iter_node;
- unsigned long int serial;
-};
-
-int flow_matches_1wild(const struct sw_flow_key *, const struct sw_flow_key *);
-int flow_matches_2wild(const struct sw_flow_key *, const struct sw_flow_key *);
-int flow_matches_desc(const struct sw_flow_key *, const struct sw_flow_key *,
- int);
-int flow_has_out_port(struct sw_flow *flow, uint16_t out_port);
-struct sw_flow *flow_alloc(size_t);
-void flow_free(struct sw_flow *);
-void flow_deferred_free(struct sw_flow *);
-void flow_deferred_free_acts(struct sw_flow_actions *);
-void flow_replace_acts(struct sw_flow *, const struct ofp_action_header *,
- size_t);
-void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from);
-
-void print_flow(const struct sw_flow_key *);
-bool flow_timeout(struct sw_flow *flow);
-void flow_used(struct sw_flow *flow, struct ofpbuf *buffer);
-
-#endif /* switch-flow.h */
+++ /dev/null
-/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "table.h"
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include "openflow/nicira-ext.h"
-#include "crc32.h"
-#include "datapath.h"
-#include "flow.h"
-#include "switch-flow.h"
-
-struct sw_table_hash {
- struct sw_table swt;
- struct crc32 crc32;
- unsigned int n_flows;
- unsigned int bucket_mask; /* Number of buckets minus 1. */
- struct sw_flow **buckets;
-};
-
-static struct sw_flow **find_bucket(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int crc = crc32_calculate(&th->crc32, key,
- offsetof(struct sw_flow_key, wildcards));
- return &th->buckets[crc & th->bucket_mask];
-}
-
-static struct sw_flow *table_hash_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_flow *flow = *find_bucket(swt, key);
- return flow && !flow_compare(&flow->key.flow, &key->flow) ? flow : NULL;
-}
-
-static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- struct sw_flow **bucket;
- int retval;
-
- if (flow->key.wildcards != 0)
- return 0;
-
- bucket = find_bucket(swt, &flow->key);
- if (*bucket == NULL) {
- th->n_flows++;
- *bucket = flow;
- retval = 1;
- } else {
- struct sw_flow *old_flow = *bucket;
- if (!flow_compare(&old_flow->key.flow, &flow->key.flow)) {
- *bucket = flow;
- flow_free(old_flow);
- retval = 1;
- } else {
- retval = 0;
- }
- }
- return retval;
-}
-
-static int table_hash_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int count = 0;
-
- if (key->wildcards == 0) {
- struct sw_flow **bucket = find_bucket(swt, key);
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count = 1;
- }
- } else {
- unsigned int i;
-
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count++;
- }
- }
- }
- return count;
-}
-
-/* Caller must update n_flows. */
-static void
-do_delete(struct sw_flow **bucket)
-{
- flow_free(*bucket);
- *bucket = NULL;
-}
-
-/* Returns number of deleted flows. We ignore the priority
- * argument, since all exact-match entries are the same (highest)
- * priority. */
-static int table_hash_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- uint16_t priority UNUSED, int strict)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int count = 0;
-
- if (key->wildcards == 0) {
- struct sw_flow **bucket = find_bucket(swt, key);
- struct sw_flow *flow = *bucket;
- if (flow && !flow_compare(&flow->key.flow, &key->flow)
- && flow_has_out_port(flow, out_port)) {
- dp_send_flow_end(dp, flow, NXFER_DELETE);
- do_delete(bucket);
- count = 1;
- }
- } else {
- unsigned int i;
-
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow && flow_matches_desc(&flow->key, key, strict)
- && flow_has_out_port(flow, out_port)) {
- dp_send_flow_end(dp, flow, NXFER_DELETE);
- do_delete(bucket);
- count++;
- }
- }
- }
- th->n_flows -= count;
- return count;
-}
-
-static void table_hash_timeout(struct sw_table *swt, struct list *deleted)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int i;
-
- for (i = 0; i <= th->bucket_mask; i++) {
- struct sw_flow **bucket = &th->buckets[i];
- struct sw_flow *flow = *bucket;
- if (flow && flow_timeout(flow)) {
- list_push_back(deleted, &flow->node);
- *bucket = NULL;
- th->n_flows--;
- }
- }
-}
-
-static void table_hash_destroy(struct sw_table *swt)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- unsigned int i;
- for (i = 0; i <= th->bucket_mask; i++) {
- if (th->buckets[i]) {
- flow_free(th->buckets[i]);
- }
- }
- free(th->buckets);
- free(th);
-}
-
-static int table_hash_iterate(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *private),
- void *private)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
-
- if (position->private[0] > th->bucket_mask)
- return 0;
-
- if (key->wildcards == 0) {
- struct sw_flow *flow = table_hash_lookup(swt, key);
- position->private[0] = -1;
- if (!flow || !flow_has_out_port(flow, out_port)) {
- return 0;
- }
- return callback(flow, private);
- } else {
- int i;
-
- for (i = position->private[0]; i <= th->bucket_mask; i++) {
- struct sw_flow *flow = th->buckets[i];
- if (flow && flow_matches_1wild(&flow->key, key)
- && flow_has_out_port(flow, out_port)) {
- int error = callback(flow, private);
- if (error) {
- position->private[0] = i + 1;
- return error;
- }
- }
- }
- return 0;
- }
-}
-
-static void table_hash_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_hash *th = (struct sw_table_hash *) swt;
- stats->name = "hash";
- stats->wildcards = 0; /* No wildcards are supported. */
- stats->n_flows = th->n_flows;
- stats->max_flows = th->bucket_mask + 1;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-struct sw_table *table_hash_create(unsigned int polynomial,
- unsigned int n_buckets)
-{
- struct sw_table_hash *th;
- struct sw_table *swt;
-
- th = malloc(sizeof *th);
- if (th == NULL)
- return NULL;
- memset(th, '\0', sizeof *th);
-
- assert(!(n_buckets & (n_buckets - 1)));
- th->buckets = calloc(n_buckets, sizeof *th->buckets);
- if (th->buckets == NULL) {
- printf("failed to allocate %u buckets\n", n_buckets);
- free(th);
- return NULL;
- }
- th->n_flows = 0;
- th->bucket_mask = n_buckets - 1;
-
- swt = &th->swt;
- swt->lookup = table_hash_lookup;
- swt->insert = table_hash_insert;
- swt->modify = table_hash_modify;
- swt->delete = table_hash_delete;
- swt->timeout = table_hash_timeout;
- swt->destroy = table_hash_destroy;
- swt->iterate = table_hash_iterate;
- swt->stats = table_hash_stats;
-
- crc32_init(&th->crc32, polynomial);
-
- return swt;
-}
-
-/* Double-hashing table. */
-
-struct sw_table_hash2 {
- struct sw_table swt;
- struct sw_table *subtable[2];
-};
-
-static struct sw_flow *table_hash2_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- int i;
-
- for (i = 0; i < 2; i++) {
- struct sw_flow *flow = *find_bucket(t2->subtable[i], key);
- if (flow && !flow_compare(&flow->key.flow, &key->flow))
- return flow;
- }
- return NULL;
-}
-
-static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
-
- if (table_hash_insert(t2->subtable[0], flow))
- return 1;
- return table_hash_insert(t2->subtable[1], flow);
-}
-
-static int table_hash2_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- return (table_hash_modify(t2->subtable[0], key, priority, strict,
- actions, actions_len)
- + table_hash_modify(t2->subtable[1], key, priority, strict,
- actions, actions_len));
-}
-
-static int table_hash2_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- uint16_t priority, int strict)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- return (table_hash_delete(dp, t2->subtable[0], key, out_port,
- priority, strict)
- + table_hash_delete(dp, t2->subtable[1], key, out_port,
- priority, strict));
-}
-
-static void table_hash2_timeout(struct sw_table *swt, struct list *deleted)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- table_hash_timeout(t2->subtable[0], deleted);
- table_hash_timeout(t2->subtable[1], deleted);
-}
-
-static void table_hash2_destroy(struct sw_table *swt)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- table_hash_destroy(t2->subtable[0]);
- table_hash_destroy(t2->subtable[1]);
- free(t2);
-}
-
-static int table_hash2_iterate(struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *),
- void *private)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- int i;
-
- for (i = position->private[1]; i < 2; i++) {
- int error = table_hash_iterate(t2->subtable[i], key, out_port,
- position, callback, private);
- if (error) {
- return error;
- }
- position->private[0] = 0;
- position->private[1]++;
- }
- return 0;
-}
-
-static void table_hash2_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
- struct sw_table_stats substats[2];
- int i;
-
- for (i = 0; i < 2; i++)
- table_hash_stats(t2->subtable[i], &substats[i]);
- stats->name = "hash2";
- stats->wildcards = 0; /* No wildcards are supported. */
- stats->n_flows = substats[0].n_flows + substats[1].n_flows;
- stats->max_flows = substats[0].max_flows + substats[1].max_flows;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
- unsigned int poly1, unsigned int buckets1)
-
-{
- struct sw_table_hash2 *t2;
- struct sw_table *swt;
-
- t2 = malloc(sizeof *t2);
- if (t2 == NULL)
- return NULL;
- memset(t2, '\0', sizeof *t2);
-
- t2->subtable[0] = table_hash_create(poly0, buckets0);
- if (t2->subtable[0] == NULL)
- goto out_free_t2;
-
- t2->subtable[1] = table_hash_create(poly1, buckets1);
- if (t2->subtable[1] == NULL)
- goto out_free_subtable0;
-
- swt = &t2->swt;
- swt->lookup = table_hash2_lookup;
- swt->insert = table_hash2_insert;
- swt->modify = table_hash2_modify;
- swt->delete = table_hash2_delete;
- swt->timeout = table_hash2_timeout;
- swt->destroy = table_hash2_destroy;
- swt->iterate = table_hash2_iterate;
- swt->stats = table_hash2_stats;
-
- return swt;
-
-out_free_subtable0:
- table_hash_destroy(t2->subtable[0]);
-out_free_t2:
- free(t2);
- return NULL;
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include "table.h"
-#include <stdlib.h>
-#include "flow.h"
-#include "list.h"
-#include "openflow/openflow.h"
-#include "openflow/nicira-ext.h"
-#include "switch-flow.h"
-#include "datapath.h"
-
-struct sw_table_linear {
- struct sw_table swt;
-
- unsigned int max_flows;
- unsigned int n_flows;
- struct list flows;
- struct list iter_flows;
- unsigned long int next_serial;
-};
-
-static struct sw_flow *table_linear_lookup(struct sw_table *swt,
- const struct sw_flow_key *key)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) {
- if (flow_matches_1wild(key, &flow->key))
- return flow;
- }
- return NULL;
-}
-
-static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *f;
-
- /* Loop through the existing list of entries. New entries will
- * always be placed behind those with equal priority. Just replace
- * any flows that match exactly.
- */
- LIST_FOR_EACH (f, struct sw_flow, node, &tl->flows) {
- if (f->priority == flow->priority
- && f->key.wildcards == flow->key.wildcards
- && flow_matches_2wild(&f->key, &flow->key)) {
- flow->serial = f->serial;
- list_replace(&flow->node, &f->node);
- list_replace(&flow->iter_node, &f->iter_node);
- flow_free(f);
- return 1;
- }
-
- if (f->priority < flow->priority)
- break;
- }
-
- /* Make sure there's room in the table. */
- if (tl->n_flows >= tl->max_flows) {
- return 0;
- }
- tl->n_flows++;
-
- /* Insert the entry immediately in front of where we're pointing. */
- flow->serial = tl->next_serial++;
- list_insert(&f->node, &flow->node);
- list_push_front(&tl->iter_flows, &flow->iter_node);
-
- return 1;
-}
-
-static int table_linear_modify(struct sw_table *swt,
- const struct sw_flow_key *key, uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- unsigned int count = 0;
-
- LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) {
- if (flow_matches_desc(&flow->key, key, strict)
- && (!strict || (flow->priority == priority))) {
- flow_replace_acts(flow, actions, actions_len);
- count++;
- }
- }
- return count;
-}
-
-static void
-do_delete(struct sw_flow *flow)
-{
- list_remove(&flow->node);
- list_remove(&flow->iter_node);
- flow_free(flow);
-}
-
-static int table_linear_delete(struct datapath *dp, struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- uint16_t priority, int strict)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow, *n;
- unsigned int count = 0;
-
- LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) {
- if (flow_matches_desc(&flow->key, key, strict)
- && flow_has_out_port(flow, out_port)
- && (!strict || (flow->priority == priority))) {
- dp_send_flow_end(dp, flow, NXFER_DELETE);
- do_delete(flow);
- count++;
- }
- }
- tl->n_flows -= count;
- return count;
-}
-
-static void table_linear_timeout(struct sw_table *swt, struct list *deleted)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow, *n;
-
- LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) {
- if (flow_timeout(flow)) {
- list_remove(&flow->node);
- list_remove(&flow->iter_node);
- list_push_back(deleted, &flow->node);
- tl->n_flows--;
- }
- }
-}
-
-static void table_linear_destroy(struct sw_table *swt)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
-
- while (!list_is_empty(&tl->flows)) {
- struct sw_flow *flow = CONTAINER_OF(list_front(&tl->flows),
- struct sw_flow, node);
- list_remove(&flow->node);
- flow_free(flow);
- }
- free(tl);
-}
-
-static int table_linear_iterate(struct sw_table *swt,
- const struct sw_flow_key *key,
- uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *, void *),
- void *private)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- struct sw_flow *flow;
- unsigned long start;
-
- start = ~position->private[0];
- LIST_FOR_EACH (flow, struct sw_flow, iter_node, &tl->iter_flows) {
- if (flow->serial <= start
- && flow_matches_2wild(key, &flow->key)
- && flow_has_out_port(flow, out_port)) {
- int error = callback(flow, private);
- if (error) {
- position->private[0] = ~(flow->serial - 1);
- return error;
- }
- }
- }
- return 0;
-}
-
-static void table_linear_stats(struct sw_table *swt,
- struct sw_table_stats *stats)
-{
- struct sw_table_linear *tl = (struct sw_table_linear *) swt;
- stats->name = "linear";
- stats->wildcards = OFPFW_ALL;
- stats->n_flows = tl->n_flows;
- stats->max_flows = tl->max_flows;
- stats->n_lookup = swt->n_lookup;
- stats->n_matched = swt->n_matched;
-}
-
-
-struct sw_table *table_linear_create(unsigned int max_flows)
-{
- struct sw_table_linear *tl;
- struct sw_table *swt;
-
- tl = calloc(1, sizeof *tl);
- if (tl == NULL)
- return NULL;
-
- swt = &tl->swt;
- swt->lookup = table_linear_lookup;
- swt->insert = table_linear_insert;
- swt->modify = table_linear_modify;
- swt->delete = table_linear_delete;
- swt->timeout = table_linear_timeout;
- swt->destroy = table_linear_destroy;
- swt->iterate = table_linear_iterate;
- swt->stats = table_linear_stats;
-
- tl->max_flows = max_flows;
- tl->n_flows = 0;
- list_init(&tl->flows);
- list_init(&tl->iter_flows);
- tl->next_serial = 0;
-
- return swt;
-}
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-/* Individual switching tables. Generally grouped together in a chain (see
- * chain.h). */
-
-#ifndef TABLE_H
-#define TABLE_H 1
-
-#include <stddef.h>
-#include <stdint.h>
-#include "datapath.h"
-
-struct sw_flow;
-struct sw_flow_key;
-struct ofp_action_header;
-struct list;
-
-/* Table statistics. */
-struct sw_table_stats {
- const char *name; /* Human-readable name. */
- uint32_t wildcards; /* Bitmap of OFPFW_* wildcards that are
- supported by the table. */
- unsigned int n_flows; /* Number of active flows. */
- unsigned int max_flows; /* Flow capacity. */
- unsigned long int n_lookup; /* Number of packets looked up. */
- unsigned long int n_matched; /* Number of packets that have hit. */
-};
-
-/* Position within an iteration of a sw_table.
- *
- * The contents are private to the table implementation, except that a position
- * initialized to all-zero-bits represents the start of a table. */
-struct sw_table_position {
- unsigned long private[4];
-};
-
-/* A single table of flows. */
-struct sw_table {
- /* The number of packets that have been looked up and matched,
- * respecitvely. To make these 100% accurate, they should be atomic.
- * However, we're primarily concerned about speed. */
- unsigned long long n_lookup;
- unsigned long long n_matched;
-
- /* Searches 'table' for a flow matching 'key', which must not have any
- * wildcard fields. Returns the flow if successful, a null pointer
- * otherwise. */
- struct sw_flow *(*lookup)(struct sw_table *table,
- const struct sw_flow_key *key);
-
- /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns
- * 0 if successful or a negative error. Error can be due to an
- * over-capacity table or because the flow is not one of the kind that
- * the table accepts.
- *
- * If successful, 'flow' becomes owned by 'table', otherwise it is
- * retained by the caller. */
- int (*insert)(struct sw_table *table, struct sw_flow *flow);
-
- /* Modifies the actions in 'table' that match 'key'. If 'strict'
- * set, wildcards and priority must match. Returns the number of flows
- * that were modified. */
- int (*modify)(struct sw_table *table, const struct sw_flow_key *key,
- uint16_t priority, int strict,
- const struct ofp_action_header *actions, size_t actions_len);
-
- /* Deletes from 'table' any and all flows that match 'key' from
- * 'table'. If 'out_port' is not OFPP_NONE, then matching entries
- * must have that port as an argument for an output action. If
- * 'strict' is set, wildcards and priority must match. Returns the
- * number of flows that were deleted. */
- int (*delete)(struct datapath *dp, struct sw_table *table,
- const struct sw_flow_key *key,
- uint16_t out_port, uint16_t priority, int strict);
-
- /* Performs timeout processing on all the flow entries in 'table'.
- * Appends all the flow entries removed from 'table' to 'deleted' for the
- * caller to free. */
- void (*timeout)(struct sw_table *table, struct list *deleted);
-
- /* Destroys 'table', which must not have any users. */
- void (*destroy)(struct sw_table *table);
-
- /* Iterates through the flow entries in 'table', passing each one
- * matches 'key' and output port 'out_port' to 'callback'. The
- * callback function should return 0 to continue iteration or a
- * nonzero error code to stop. The iterator function returns either
- * 0 if the table iteration completed or the value returned by the
- * callback function otherwise.
- *
- * The iteration starts at 'position', which may be initialized to
- * all-zero-bits to iterate from the beginning of the table. If the
- * iteration terminates due to an error from the callback function,
- * 'position' is updated to a value that can be passed back to the
- * iterator function to resume iteration later with the following
- * flow. */
- int (*iterate)(struct sw_table *table,
- const struct sw_flow_key *key, uint16_t out_port,
- struct sw_table_position *position,
- int (*callback)(struct sw_flow *flow, void *private),
- void *private);
-
- /* Dumps statistics for 'table' into 'stats'. */
- void (*stats)(struct sw_table *table, struct sw_table_stats *stats);
-};
-
-struct sw_table *table_hash_create(unsigned int polynomial,
- unsigned int n_buckets);
-struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
- unsigned int poly1, unsigned int buckets1);
-struct sw_table *table_linear_create(unsigned int max_flows);
-
-#endif /* table.h */
+++ /dev/null
-.ds PN udatapath
-
-.TH udatapath 8 "May 2008" "OpenFlow" "OpenFlow Manual"
-
-.SH NAME
-udatapath \- userspace implementation of datapath for OpenFlow switch
-
-.SH SYNOPSIS
-.B udatapath
-[\fIoptions\fR]
-\fB-i\fR \fInetdev\fR[\fB,\fInetdev\fR].\|.\|.
-\fImethod\fR [\fImethod\fR].\|.\|.
-
-.SH DESCRIPTION
-The \fBudatapath\fR is a userspace implementation of an OpenFlow
-datapath. It monitors one or more network device interfaces,
-forwarding packets between them according to the entries in the flow
-table that it maintains. When it is used with \fBsecchan\fR(8), to
-connect the datapath to an OpenFlow controller, the combination is an
-OpenFlow switch.
-
-For access to network devices, the udatapath program must normally run as
-root.
-
-The mandatory \fImethod\fR argument specifies how \fBsecchan\fR(8)
-communicates with \fBudatapath\fR, as a passive OpenFlow connection
-method. Ordinarily \fImethod\fR takes the following form:
-
-.TP
-\fBpunix:\fIfile\fR
-Listens for connections on the Unix domain server socket named
-\fIfile\fR.
-
-.PP
-The following connection methods are also supported, but their use
-would be unusual because \fBudatapath\fR and \fBsecchan\fR should run
-on the same machine:
-
-.TP
-\fBpssl:\fR[\fIport\fR]
-Listens for SSL connections \fIport\fR (default: 976). The
-\fB--private-key\fR, \fB--certificate\fR, and \fB--ca-cert\fR options
-are mandatory when this form is used. (\fBofp\-pki\fR(8) does not set
-up a suitable PKI for use with this option.)
-
-.TP
-\fBptcp:\fR[\fIport\fR]
-Listens for TCP connections from remote OpenFlow switches on
-\fIport\fR (default: 975).
-
-.SH OPTIONS
-.TP
-\fB-i\fR, \fB--interfaces=\fR\fInetdev\fR[\fB,\fInetdev\fR].\|.\|.
-Specifies each \fInetdev\fR (e.g., \fBeth0\fR) as a switch port. The
-specified network devices should not have any configured IP addresses.
-This option may be given any number of times to specify additional
-network devices.
-
-.TP
-\fB-L\fR, \fB--local-port=\fInetdev\fR
-Specifies the network device to use as the userspace datapath's
-``local port,'' which is a network device that \fBsecchan\fR(8)
-bridges to the physical switch ports for use in in-band control. When
-this option is not specified, the default is \fBtap:\fR, which causes
-a new TAP virtual network device to be allocated with a default name
-assigned by the kernel. To do the same, but assign a specific name
-\fBname\fR to the TAP network device, specify the option as
-\fB--local-port=tap:\fIname\fR.
-
-Either way, the existence of TAP devices created by \fBudatapath\fR is
-temporary: they are destroyed when \fBudatapath\fR exits. If this is
-undesirable, you may use \fBtunctl\fR(8) to create a persistent TAP
-network device and then pass it to \fBudatapath\fR, like so:
-
-.RS
-.IP 1.
-Create a persistent TAP network device: \fBtunctl -t mytap\fR. (The
-\fBtunctl\fR(8) utility is part of User Mode Linux. It is not
-included with the OpenFlow reference implementation.)
-.IP 2.
-Invoke \fBudatapath\fR(8) using \fBmytap\fR, e.g. \fBudatapath
---local-port=mytap\fR .\|.\|. (Note the lack of \fBtap:\fR prefix on
-the \fB--local-port\fR argument.)
-.IP 3.
-Invoke \fBsecchan\fR(8), etc., and use the switch as desired.
-.IP 4.
-When \fBsecchan\fR and \fBudatapath\fR have terminated and the TAP
-network device is no longer needed, you may destroy it with: \fBtunctl
--d mytap\fR
-.RE
-
-.IP
-It does not ordinarily make sense to specify the name of a physical
-network device on \fB-L\fR or \fB--local-port\fR.
-
-.TP
-\fB--no-local-port\fR
-Do not provide a local port as part of the datapath. When this option
-is used, the switch will not support in-band control.
-
-.TP
-\fB-d\fR, \fB--datapath-id=\fIdpid\fR
-Specifies the OpenFlow datapath ID (a 48-bit number that uniquely
-identifies a controller) as \fIdpid\fR, which consists of exactly 12
-hex digits. Without this option, \fBudatapath\fR picks an ID randomly.
-
-.TP
-\fB-p\fR, \fB--private-key=\fIprivkey.pem\fR
-Specifies a PEM file containing the private key used as the datapath's
-identity for SSL connections to \fBsecchan\fR(8).
-
-.TP
-\fB-c\fR, \fB--certificate=\fIcert.pem\fR
-Specifies a PEM file containing a certificate, signed by the
-datapath's certificate authority (CA), that certifies the datapath's
-private key to identify a trustworthy datapath.
-
-.TP
-\fB-C\fR, \fB--ca-cert=\fIcacert.pem\fR
-Specifies a PEM file containing the CA certificate used to verify that
-the datapath is connected to a trustworthy secure channel.
-
-.so lib/daemon.man
-.so lib/vlog.man
-.so lib/common.man
-
-.SH BUGS
-The userspace datapath's performance lags significantly behind that of
-the kernel-based switch. It should only be used when the kernel-based
-switch cannot be.
-
-On Linux, general-purpose support for VLAN tag rewriting is precluded
-by the Linux kernel AF_PACKET implementation.
-
-.SH "SEE ALSO"
-
-.BR secchan (8),
-.BR dpctl (8),
-.BR controller (8),
-.BR vlogconf (8).
+++ /dev/null
-/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
- * Junior University
- *
- * We are making the OpenFlow specification and associated documentation
- * (Software) available for public use and benefit with the expectation
- * that others will use, modify and enhance the Software and contribute
- * those enhancements back to the community. However, since we would
- * like to make the Software available for broadest use, with as few
- * restrictions as possible permission is hereby granted, free of
- * charge, to any person obtaining a copy of this Software to deal in
- * the Software under the copyrights without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * The name and trademarks of copyright holder(s) may NOT be used in
- * advertising or publicity pertaining to the Software or any
- * derivatives without specific, written prior permission.
- */
-
-#include <config.h>
-#include <errno.h>
-#include <getopt.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "command-line.h"
-#include "daemon.h"
-#include "datapath.h"
-#include "fault.h"
-#include "openflow/openflow.h"
-#include "poll-loop.h"
-#include "queue.h"
-#include "util.h"
-#include "rconn.h"
-#include "timeval.h"
-#include "vconn.h"
-#include "dirs.h"
-#include "vconn-ssl.h"
-#include "vlog-socket.h"
-
-#define THIS_MODULE VLM_udatapath
-#include "vlog.h"
-
-/* Strings to describe the manufacturer, hardware, and software. This data
- * is queriable through the switch description stats message. */
-char mfr_desc[DESC_STR_LEN] = "Nicira Networks";
-char hw_desc[DESC_STR_LEN] = "Reference User-Space Switch";
-char sw_desc[DESC_STR_LEN] = VERSION BUILDNR;
-char serial_num[SERIAL_NUM_LEN] = "None";
-
-static void parse_options(int argc, char *argv[]);
-static void usage(void) NO_RETURN;
-
-static struct datapath *dp;
-static uint64_t dpid = UINT64_MAX;
-static char *port_list;
-static char *local_port = "tap:";
-
-static void add_ports(struct datapath *dp, char *port_list);
-
-int
-main(int argc, char *argv[])
-{
- int n_listeners;
- int error;
- int i;
-
- set_program_name(argv[0]);
- register_fault_handlers();
- time_init();
- vlog_init();
- parse_options(argc, argv);
- signal(SIGPIPE, SIG_IGN);
-
- if (argc - optind < 1) {
- ofp_fatal(0, "at least one listener argument is required; "
- "use --help for usage");
- }
-
- error = dp_new(&dp, dpid);
-
- n_listeners = 0;
- for (i = optind; i < argc; i++) {
- const char *pvconn_name = argv[i];
- struct pvconn *pvconn;
- int retval;
-
- retval = pvconn_open(pvconn_name, &pvconn);
- if (!retval || retval == EAGAIN) {
- dp_add_pvconn(dp, pvconn);
- n_listeners++;
- } else {
- ofp_error(retval, "opening %s", pvconn_name);
- }
- }
- if (!n_listeners) {
- ofp_fatal(0, "could not listen for any connections");
- }
-
- if (port_list) {
- add_ports(dp, port_list);
- }
- if (local_port) {
- error = dp_add_local_port(dp, local_port);
- if (error) {
- ofp_fatal(error, "failed to add local port %s", local_port);
- }
- }
-
- error = vlog_server_listen(NULL, NULL);
- if (error) {
- ofp_fatal(error, "could not listen for vlog connections");
- }
-
- die_if_already_running();
- daemonize();
-
- for (;;) {
- dp_run(dp);
- dp_wait(dp);
- poll_block();
- }
-
- return 0;
-}
-
-static void
-add_ports(struct datapath *dp, char *port_list)
-{
- char *port, *save_ptr;
-
- /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that
- * can cause segfaults here:
- * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614.
- * Using ",," instead of the obvious "," works around it. */
- for (port = strtok_r(port_list, ",,", &save_ptr); port;
- port = strtok_r(NULL, ",,", &save_ptr)) {
- int error = dp_add_port(dp, port);
- if (error) {
- ofp_fatal(error, "failed to add port %s", port);
- }
- }
-}
-
-static void
-parse_options(int argc, char *argv[])
-{
- enum {
- OPT_MFR_DESC = UCHAR_MAX + 1,
- OPT_HW_DESC,
- OPT_SW_DESC,
- OPT_SERIAL_NUM,
- OPT_BOOTSTRAP_CA_CERT,
- OPT_NO_LOCAL_PORT
- };
-
- static struct option long_options[] = {
- {"interfaces", required_argument, 0, 'i'},
- {"local-port", required_argument, 0, 'L'},
- {"no-local-port", no_argument, 0, OPT_NO_LOCAL_PORT},
- {"datapath-id", required_argument, 0, 'd'},
- {"verbose", optional_argument, 0, 'v'},
- {"help", no_argument, 0, 'h'},
- {"version", no_argument, 0, 'V'},
- {"mfr-desc", required_argument, 0, OPT_MFR_DESC},
- {"hw-desc", required_argument, 0, OPT_HW_DESC},
- {"sw-desc", required_argument, 0, OPT_SW_DESC},
- {"serial_num", required_argument, 0, OPT_SERIAL_NUM},
- DAEMON_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
- VCONN_SSL_LONG_OPTIONS
- {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
-#endif
- {0, 0, 0, 0},
- };
- char *short_options = long_options_to_short_options(long_options);
-
- for (;;) {
- int indexptr;
- int c;
-
- c = getopt_long(argc, argv, short_options, long_options, &indexptr);
- if (c == -1) {
- break;
- }
-
- switch (c) {
- case 'd':
- if (strlen(optarg) != 12
- || strspn(optarg, "0123456789abcdefABCDEF") != 12) {
- ofp_fatal(0, "argument to -d or --datapath-id must be "
- "exactly 12 hex digits");
- }
- dpid = strtoll(optarg, NULL, 16);
- if (!dpid) {
- ofp_fatal(0, "argument to -d or --datapath-id must "
- "be nonzero");
- }
- break;
-
- case 'h':
- usage();
-
- case 'V':
- printf("%s %s compiled "__DATE__" "__TIME__"\n",
- program_name, VERSION BUILDNR);
- exit(EXIT_SUCCESS);
-
- case 'v':
- vlog_set_verbosity(optarg);
- break;
-
- case 'i':
- if (!port_list) {
- port_list = optarg;
- } else {
- port_list = xasprintf("%s,%s", port_list, optarg);
- }
- break;
-
- case 'L':
- local_port = optarg;
- break;
-
- case OPT_NO_LOCAL_PORT:
- local_port = NULL;
- break;
-
- case OPT_MFR_DESC:
- strncpy(mfr_desc, optarg, sizeof mfr_desc);
- break;
-
- case OPT_HW_DESC:
- strncpy(hw_desc, optarg, sizeof hw_desc);
- break;
-
- case OPT_SW_DESC:
- strncpy(sw_desc, optarg, sizeof sw_desc);
- break;
-
- case OPT_SERIAL_NUM:
- strncpy(serial_num, optarg, sizeof serial_num);
- break;
-
- DAEMON_OPTION_HANDLERS
-
-#ifdef HAVE_OPENSSL
- VCONN_SSL_OPTION_HANDLERS
-
- case OPT_BOOTSTRAP_CA_CERT:
- vconn_ssl_set_ca_cert_file(optarg, true);
- break;
-#endif
-
- case '?':
- exit(EXIT_FAILURE);
-
- default:
- abort();
- }
- }
- free(short_options);
-}
-
-static void
-usage(void)
-{
- printf("%s: userspace OpenFlow datapath\n"
- "usage: %s [OPTIONS] LISTEN...\n"
- "where LISTEN is a passive OpenFlow connection method on which\n"
- "to listen for incoming connections from the secure channel.\n",
- program_name, program_name);
- vconn_usage(false, true, false);
- printf("\nConfiguration options:\n"
- " -i, --interfaces=NETDEV[,NETDEV]...\n"
- " add specified initial switch ports\n"
- " -L, --local-port=NETDEV set network device for local port\n"
- " --no-local-port disable local port\n"
- " -d, --datapath-id=ID Use ID as the OpenFlow switch ID\n"
- " (ID must consist of 12 hex digits)\n"
- "\nOther options:\n"
- " -D, --detach run in background as daemon\n"
- " -P, --pidfile[=FILE] create pidfile (default: %s/udatapath.pid)\n"
- " -f, --force with -P, start even if already running\n"
- " -v, --verbose=MODULE[:FACILITY[:LEVEL]] set logging levels\n"
- " -v, --verbose set maximum verbosity level\n"
- " -h, --help display this help message\n"
- " -V, --version display version information\n",
- ofp_rundir);
- exit(EXIT_SUCCESS);
-}
utilities/dpctl \
utilities/ofp-discover \
utilities/ofp-kill
+noinst_PROGRAMS += utilities/nlmon
bin_SCRIPTS += utilities/ofp-pki
noinst_SCRIPTS += utilities/ofp-pki-cgi utilities/ofp-parse-leaks
utilities_ofp_kill_SOURCES = utilities/ofp-kill.c
utilities_ofp_kill_LDADD = lib/libopenflow.a
+
+utilities_nlmon_SOURCES = utilities/nlmon.c
+utilities_nlmon_LDADD = lib/libopenflow.a
#include <unistd.h>
#include <sys/time.h>
-#ifdef HAVE_NETLINK
-#include "netdev.h"
-#include "netlink.h"
-#include "openflow/openflow-netlink.h"
-#endif
-
#include "command-line.h"
#include "compiler.h"
#include "dpif.h"
-#include "openflow/nicira-ext.h"
+#include "netdev.h"
+#include "netlink.h"
#include "ofp-print.h"
#include "ofpbuf.h"
+#include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
#include "packets.h"
#include "random.h"
{
printf("%s: OpenFlow switch management utility\n"
"usage: %s [OPTIONS] COMMAND [ARG...]\n"
-#ifdef HAVE_NETLINK
- "\nFor local datapaths only:\n"
- " adddp nl:DP_ID add a new local datapath DP_ID\n"
- " deldp nl:DP_ID delete local datapath DP_ID\n"
- " addif nl:DP_ID IFACE... add each IFACE as a port on DP_ID\n"
- " delif nl:DP_ID IFACE... delete each IFACE from DP_ID\n"
- " get-idx OF_DEV get datapath index for OF_DEV\n"
-#endif
- "\nFor local datapaths and remote switches:\n"
- " show SWITCH show basic information\n"
+ "\nFor local datapaths:\n"
+ " adddp DP add a new local datapath DP\n"
+ " deldp DP delete local datapath DP\n"
+ " addif DP IFACE... add each IFACE as a port on DP\n"
+ " delif DP IFACE... delete each IFACE from DP\n"
+ " showdp show basic info on all datapaths\n"
+ " showdp DP... show basic info on each DP\n"
+ "\nFor OpenFlow switches:\n"
+ " show SWITCH show OpenFlow information\n"
" status SWITCH [KEY] report statistics (about KEY)\n"
" dump-desc SWITCH print switch description\n"
" dump-tables SWITCH print table stats\n"
" del-flows SWITCH [FLOW] delete matching FLOWs\n"
" monitor SWITCH print packets received from SWITCH\n"
" execute SWITCH CMD [ARG...] execute CMD with ARGS on SWITCH\n"
- "\nFor local datapaths, remote switches, and controllers:\n"
+ "\nFor OpenFlow switches and controllers:\n"
" probe VCONN probe whether VCONN is up\n"
" ping VCONN [N] latency of N-byte echos\n"
" benchmark VCONN N COUNT bandwidth of COUNT N-byte echos\n"
}
}
\f
-#ifdef HAVE_NETLINK
-/* Netlink-only commands. */
-
static int if_up(const char *netdev_name)
{
struct netdev *netdev;
static void
do_get_idx(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
{
- int dp_idx;
-
struct dpif dpif;
- run(dpif_open(-1, &dpif), "opening management socket");
- dp_idx = dpif_get_idx(argv[1]);
- if (dp_idx == -1) {
- dpif_close(&dpif);
- ofp_fatal(0, "unknown OpenFlow device: %s", argv[1]);
- }
- printf("%d\n", dp_idx);
+ run(dpif_open(argv[1], &dpif), "opening datapath");
+ printf("%u\n", dpif.minor);
dpif_close(&dpif);
}
-static int
-get_dp_idx(const char *name)
-{
- if (strncmp(name, "nl:", 3)
- || strlen(name) < 4
- || name[strspn(name + 3, "0123456789") + 3]) {
- ofp_fatal(0, "%s: argument is not of the form \"nl:DP_ID\"", name);
- }
- return atoi(name + 3);
-}
-
static void
do_add_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
{
struct dpif dpif;
- run(dpif_open(-1, &dpif), "opening management socket");
- run(dpif_add_dp(&dpif, get_dp_idx(argv[1]), NULL), "add_dp");
+ run(dpif_create(argv[1], &dpif), "add_dp");
dpif_close(&dpif);
}
do_del_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
{
struct dpif dpif;
- run(dpif_open(-1, &dpif), "opening management socket");
- run(dpif_del_dp(&dpif, get_dp_idx(argv[1]), NULL), "del_dp");
+ run(dpif_open(argv[1], &dpif), "opening datapath");
+ run(dpif_delete(&dpif), "del_dp");
dpif_close(&dpif);
}
-static void add_del_ports(int argc UNUSED, char *argv[],
- int (*function)(struct dpif *, int dp_idx,
- const char *netdev),
- const char *operation, const char *preposition)
+static int
+compare_ports(const void *a_, const void *b_)
+{
+ const struct odp_port *a = a_;
+ const struct odp_port *b = b_;
+ return a->port < b->port ? -1 : a->port > b->port;
+}
+
+static void
+query_ports(struct dpif *dpif, struct odp_port **ports, size_t *n_ports)
+{
+ run(dpif_port_list(dpif, ports, n_ports), "listing ports");
+ qsort(*ports, *n_ports, sizeof **ports, compare_ports);
+}
+
+static uint16_t
+get_free_port(struct dpif *dpif)
+{
+ struct odp_port *ports;
+ size_t n_ports;
+ int port_no;
+
+ query_ports(dpif, &ports, &n_ports);
+ for (port_no = 0; port_no <= UINT16_MAX; port_no++) {
+ size_t i;
+ for (i = 0; i < n_ports; i++) {
+ if (ports[i].port == port_no) {
+ goto next_portno;
+ }
+ }
+ free(ports);
+ return port_no;
+
+ next_portno: ;
+ }
+ ofp_fatal(0, "no free datapath ports");
+}
+
+static void
+do_add_port(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
{
bool failure = false;
struct dpif dpif;
- int dp_idx;
int i;
- run(dpif_open(-1, &dpif), "opening management socket");
- dp_idx = get_dp_idx(argv[1]);
+ run(dpif_open(argv[1], &dpif), "opening datapath");
for (i = 2; i < argc; i++) {
- int retval = function(&dpif, dp_idx, argv[i]);
- if (retval) {
- ofp_error(retval, "failed to %s %s %s %s",
- operation, argv[i], preposition, argv[1]);
+ char *save_ptr = NULL;
+ char *devname, *port_s;
+ uint16_t port;
+ int error;
+
+ devname = strtok_r(argv[i], "@@", &save_ptr);
+ if (!devname) {
+ ofp_error(0, "%s is not a valid network device name", argv[i]);
+ continue;
+ }
+
+ if (if_up(devname)) {
+ failure = true;
+ continue;
+ }
+
+ port_s = strtok_r(NULL, "", &save_ptr);
+ port = port_s ? atoi(port_s) : get_free_port(&dpif);
+
+ error = dpif_port_add(&dpif, devname, port);
+ if (error) {
+ ofp_error(error, "adding %s as port %"PRIu16" of %s failed",
+ devname, port, argv[1]);
failure = true;
}
}
}
}
-static int ifup_and_add_port(struct dpif *dpif, int dp_idx, const char *netdev)
+static bool
+get_port_number(struct dpif *dpif, const char *name, uint16_t *port)
{
- int retval = if_up(netdev);
- return retval ? retval : dpif_add_port(dpif, dp_idx, netdev);
+ struct odp_port *ports;
+ size_t n_ports;
+ size_t i;
+
+ query_ports(dpif, &ports, &n_ports);
+ for (i = 0; i < n_ports; i++) {
+ if (!strcmp(name, ports[i].devname)) {
+ *port = ports[i].port;
+ free(ports);
+ return true;
+ }
+ }
+ free(ports);
+ ofp_error(0, "no port named %s", name);
+ return false;
}
-static void do_add_port(const struct settings *s UNUSED, int argc UNUSED,
- char *argv[])
+static void
+do_del_port(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
{
- add_del_ports(argc, argv, ifup_and_add_port, "add", "to");
+ bool failure = false;
+ struct dpif dpif;
+ int i;
+
+ run(dpif_open(argv[1], &dpif), "opening datapath");
+ for (i = 2; i < argc; i++) {
+ const char *name = argv[i];
+ uint16_t port;
+ int error;
+
+ if (!name[strspn(name, "0123456789")]) {
+ port = atoi(name);
+ } else if (!get_port_number(&dpif, name, &port)) {
+ failure = true;
+ continue;
+ }
+
+ error = dpif_port_del(&dpif, port);
+ if (error) {
+ ofp_error(error, "deleting port %s from %s failed", name, argv[1]);
+ failure = true;
+ }
+ }
+ dpif_close(&dpif);
+ if (failure) {
+ exit(EXIT_FAILURE);
+ }
}
-static void do_del_port(const struct settings *s UNUSED, int argc UNUSED,
- char *argv[])
+static void
+show_dpif(struct dpif *dpif)
{
- add_del_ports(argc, argv, dpif_del_port, "remove", "from");
+ struct odp_port *ports;
+ size_t n_ports;
+ size_t i;
+
+ printf("datapath %u:\n", dpif->minor);
+ query_ports(dpif, &ports, &n_ports);
+ for (i = 0; i < n_ports; i++) {
+ printf("\tport %u: %s\n", ports[i].port, ports[i].devname);
+ }
+ free(ports);
+ dpif_close(dpif);
}
-#endif /* HAVE_NETLINK */
+
+static void
+do_show_dp(const struct settings *s UNUSED, int argc UNUSED, char *argv[])
+{
+ bool failure = false;
+ if (argc > 1) {
+ int i;
+ for (i = 1; i < argc; i++) {
+ const char *name = argv[i];
+ struct dpif dpif;
+ int error;
+
+ error = dpif_open(name, &dpif);
+ if (!error) {
+ show_dpif(&dpif);
+ } else {
+ ofp_error(error, "opening datapath %s failed", name);
+ failure = true;
+ }
+ }
+ } else {
+ unsigned int i;
+ for (i = 0; i < ODP_MAX; i++) {
+ char name[128];
+ struct dpif dpif;
+ int error;
+
+ sprintf(name, "dp%u", i);
+ error = dpif_open(name, &dpif);
+ if (!error) {
+ show_dpif(&dpif);
+ } else if (error != ENODEV) {
+ ofp_error(error, "opening datapath %s failed", name);
+ failure = true;
+ }
+ }
+ }
+ if (failure) {
+ exit(EXIT_FAILURE);
+ }
+}
+
\f
/* Generic commands. */
{ "addif", 2, INT_MAX, do_add_port },
{ "delif", 2, INT_MAX, do_del_port },
{ "get-idx", 1, 1, do_get_idx },
+ { "showdp", 0, INT_MAX, do_show_dp },
#endif
{ "show", 1, 1, do_show },
--- /dev/null
+#include <config.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <net/if.h>
+#include <poll.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <stddef.h>
+#include <linux/rtnetlink.h>
+#include "netlink.h"
+#include "ofpbuf.h"
+#include "poll-loop.h"
+#include "timeval.h"
+#include "util.h"
+#include "vlog.h"
+
+static const struct nl_policy rtnlgrp_link_policy[] = {
+ [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
+ [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
+};
+
+int
+main(int argc UNUSED, char *argv[])
+{
+ struct nl_sock *sock;
+ int error;
+
+ set_program_name(argv[0]);
+ time_init();
+ vlog_init();
+ vlog_set_levels(VLM_ANY_MODULE, VLF_ANY_FACILITY, VLL_DBG);
+
+ error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &sock);
+ if (error) {
+ ofp_fatal(error, "could not create rtnetlink socket");
+ }
+
+ for (;;) {
+ struct ofpbuf *buf;
+
+ error = nl_sock_recv(sock, &buf, false);
+ if (error == EAGAIN) {
+ /* Nothing to do. */
+ } else if (error == ENOBUFS) {
+ ofp_error(0, "network monitor socket overflowed");
+ } else if (error) {
+ ofp_fatal(error, "error on network monitor socket");
+ } else {
+ struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *iim;
+
+ nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
+ iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim);
+ if (!iim) {
+ ofp_error(0, "received bad rtnl message (no ifinfomsg)");
+ ofpbuf_delete(buf);
+ continue;
+ }
+
+ if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
+ rtnlgrp_link_policy,
+ attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
+ ofp_error(0, "received bad rtnl message (policy)");
+ ofpbuf_delete(buf);
+ continue;
+ }
+ printf("netdev %s changed (%s):\n",
+ nl_attr_get_string(attrs[IFLA_IFNAME]),
+ (nlh->nlmsg_type == RTM_NEWLINK ? "RTM_NEWLINK"
+ : nlh->nlmsg_type == RTM_DELLINK ? "RTM_DELLINK"
+ : nlh->nlmsg_type == RTM_GETLINK ? "RTM_GETLINK"
+ : nlh->nlmsg_type == RTM_SETLINK ? "RTM_SETLINK"
+ : "other"));
+ if (attrs[IFLA_MASTER]) {
+ uint32_t idx = nl_attr_get_u32(attrs[IFLA_MASTER]);
+ char ifname[IFNAMSIZ];
+ if (!if_indextoname(idx, ifname)) {
+ strcpy(ifname, "unknown");
+ }
+ printf("\tmaster=%"PRIu32" (%s)\n", idx, ifname);
+ }
+ ofpbuf_delete(buf);
+ }
+
+ nl_sock_wait(sock, POLLIN);
+ poll_block();
+ }
+}
+
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60);
-/* Used for creating kernel datapaths */
-static struct dpif mgmt_dpif;
-
/* Netlink socket to kernel datapath */
struct nl_sock *nl_sock;
"\"brcompat\" kernel module.");
}
- if (dpif_open(-1, &mgmt_dpif) != 0) {
- ofp_fatal(0, "could not open datapath interface");
- }
-
config_name = file_name;
}
struct rconn *rconn; /* Connection to secchan subprocess. */
/* Kernel datapath information. */
+ struct dpif dpif; /* Kernel datapath. */
int dp_idx; /* Kernel datapath index. */
struct port_array ifaces; /* Indexed by kernel datapath port number. */
/* List of all bridges. */
static struct list all_bridges = LIST_INITIALIZER(&all_bridges);
-/* Each value is true if the corresponding datapath has been created,
- * false otherwise.*/
-static bool in_use_dps[DP_MAX];
-
-/* Used for creating and destroying kernel datapaths, etc. */
-static struct dpif mgmt_dpif;
+/* Maximum number of datapaths. */
+enum { DP_MAX = 256 };
static struct bridge *bridge_create(const char *name);
static void bridge_destroy(struct bridge *);
static void bridge_reconfigure_one(struct bridge *);
static void bridge_get_all_ifaces(const struct bridge *, struct svec *ifaces);
static bool bridge_is_backlogged(const struct bridge *);
-static int bridge_fetch_dp_ifaces(struct bridge *, struct svec *iface_names);
+static void bridge_fetch_dp_ifaces(struct bridge *);
static void bridge_flush(struct bridge *);
static void bridge_process_msg(struct bridge *, struct ofpbuf *);
static void brstp_adjust_timers(struct bridge *);
static void brstp_run(struct bridge *);
static void brstp_wait(struct bridge *);
-static void brstp_receive(struct bridge *, const struct flow *,
+static void brstp_receive(struct bridge *, const flow_t *,
const struct ofpbuf *);
static void iface_create(struct port *, const char *name);
bridge_init(void)
{
int retval;
- size_t i;
-
- retval = dpif_open(-1, &mgmt_dpif);
- if (retval) {
- ofp_fatal(retval, "could not create datapath management socket");
- }
+ int i;
for (i = 0; i < DP_MAX; i++) {
- int retval = dpif_del_dp(&mgmt_dpif, i, NULL);
- if (retval && retval != ENOENT) {
+ struct dpif dpif;
+ char devname[16];
+
+ sprintf(devname, "dp%d", i);
+ retval = dpif_open(devname, &dpif);
+ if (!retval) {
+ dpif_delete(&dpif);
+ dpif_close(&dpif);
+ } else if (retval != ENODEV) {
VLOG_ERR("failed to delete datapath nl:%d: %s",
i, strerror(retval));
}
* that port already belongs to a different datapath, so we must do all
* port deletions before any port additions. */
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
- struct svec cur_ifaces, want_ifaces, del_ifaces;
+ struct odp_port *dpif_ports;
+ size_t n_dpif_ports;
+ struct svec want_ifaces;
- bridge_fetch_dp_ifaces(br, &cur_ifaces);
+ dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
bridge_get_all_ifaces(br, &want_ifaces);
- svec_diff(&want_ifaces, &cur_ifaces, NULL, NULL, &del_ifaces);
- for (i = 0; i < del_ifaces.n; i++) {
- const char *if_name = del_ifaces.names[i];
- if (strcmp(if_name, br->name)) {
- int retval = dpif_del_port(&mgmt_dpif, br->dp_idx, if_name);
+ for (i = 0; i < n_dpif_ports; i++) {
+ const struct odp_port *p = &dpif_ports[i];
+ if (!svec_contains(&want_ifaces, p->devname)
+ && strcmp(p->devname, br->name)) {
+ int retval = dpif_port_del(&br->dpif, p->port);
if (retval) {
VLOG_ERR("failed to remove %s interface from nl:%d: %s",
- if_name, br->dp_idx, strerror(retval));
+ p->devname, br->dp_idx, strerror(retval));
}
- } else {
- /* Can't remove local port. */
}
}
- svec_destroy(&cur_ifaces);
svec_destroy(&want_ifaces);
- svec_destroy(&del_ifaces);
+ free(dpif_ports);
}
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ struct odp_port *dpif_ports;
+ size_t n_dpif_ports;
struct svec cur_ifaces, want_ifaces, add_ifaces;
+ int next_port_no;
- bridge_fetch_dp_ifaces(br, &cur_ifaces);
+ dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+ svec_init(&cur_ifaces);
+ for (i = 0; i < n_dpif_ports; i++) {
+ svec_add(&cur_ifaces, dpif_ports[i].devname);
+ }
+ free(dpif_ports);
+ svec_sort_unique(&cur_ifaces);
bridge_get_all_ifaces(br, &want_ifaces);
svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL);
+
+ next_port_no = 0;
for (i = 0; i < add_ifaces.n; i++) {
const char *if_name = add_ifaces.names[i];
- int retval;
if_up(if_name);
- retval = dpif_add_port(&mgmt_dpif, br->dp_idx, if_name);
- if (retval) {
- VLOG_ERR("failed to add %s interface to nl:%d: %s",
- if_name, br->dp_idx, strerror(retval));
+ for (;;) {
+ int error = dpif_port_add(&br->dpif, if_name, next_port_no++);
+ if (error != EEXIST) {
+ if (next_port_no >= 256) {
+ VLOG_ERR("ran out of valid port numbers on nl:%d",
+ br->dp_idx);
+ goto out;
+ }
+ if (error) {
+ VLOG_ERR("failed to add %s interface to nl:%d: %s",
+ if_name, br->dp_idx, strerror(error));
+ }
+ break;
+ }
}
}
+ out:
svec_destroy(&cur_ifaces);
svec_destroy(&want_ifaces);
svec_destroy(&add_ifaces);
}
LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) {
- bridge_fetch_dp_ifaces(br, NULL);
+ bridge_fetch_dp_ifaces(br);
for (i = 0; i < br->n_ports; ) {
struct port *port = br->ports[i];
for (j = 0; j < port->n_ifaces; ) {
}
}
}
-
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
brstp_reconfigure(br);
}
\f
/* Bridge reconfiguration functions. */
-static void sanitize_opp(struct ofp_phy_port *opp);
static void run_secchan(struct bridge *);
static void start_secchan(struct bridge *);
bridge_create(const char *name)
{
struct bridge *br;
- int retval;
+ int error;
assert(!bridge_lookup(name));
br = xcalloc(1, sizeof *br);
br->stats_mgr = stats_mgr_create(br->rconn);
/* Create kernel datapath. */
- retval = dpif_add_dp(&mgmt_dpif, -1, br->name);
- if (retval) {
+ error = dpif_create(br->name, &br->dpif);
+ if (error) {
VLOG_ERR("failed to create datapath %s: %s",
- br->name, strerror(retval));
+ br->name, strerror(error));
free(br);
return NULL;
}
list_push_back(&all_bridges, &br->node);
- br->dp_idx = dpif_get_idx(br->name);
- if (br->dp_idx == -1) {
- VLOG_WARN("bad dp_idx for bridge %s", br->name);
- }
-
+ br->dp_idx = br->dpif.minor;
VLOG_INFO("created bridge %s with dp_idx %d", br->name, br->dp_idx);
return br;
}
list_remove(&br->node);
if (br->dp_idx >= 0) {
- int retval = dpif_del_dp(&mgmt_dpif, br->dp_idx, NULL);
- if (!retval || retval == ENOENT) {
- assert(br->dp_idx < DP_MAX);
- in_use_dps[br->dp_idx] = false;
- } else {
+ int retval = dpif_delete(&br->dpif);
+ if (retval && retval != ENOENT) {
VLOG_ERR("failed to delete datapath nl:%d: %s",
br->dp_idx, strerror(retval));
}
return br->txqlen >= 100;
}
-/* The kernel interface to add ports doesn't report what port numbers they were
- * assigned (XXX), so now we have to connect to the datapath and use a feature
- * request to obtain the port numbers. */
-static int
-bridge_fetch_dp_ifaces(struct bridge *br, struct svec *iface_names)
-{
- char *vconn_name;
- struct vconn *vconn = NULL;
- struct ofpbuf *request;
- struct ofpbuf *reply = NULL;
- struct ofp_switch_features *osf;
- size_t n_ports;
+/* For robustness, in case the administrator moves around datapath ports behind
+ * our back, we re-check all the datapath port numbers here. */
+static void
+bridge_fetch_dp_ifaces(struct bridge *br)
+{
+ struct odp_port *dpif_ports;
+ size_t n_dpif_ports;
size_t i, j;
- int retval;
-
- if (iface_names) {
- svec_init(iface_names);
- }
/* Reset all interface numbers. */
for (i = 0; i < br->n_ports; i++) {
}
port_array_clear(&br->ifaces);
- /* Open connection to datapath. */
- vconn_name = xasprintf("nl:%d", br->dp_idx);
- retval = vconn_open_block(vconn_name, OFP_VERSION, &vconn);
- free(vconn_name);
- if (retval) {
- VLOG_ERR("could not open connection to nl:%d: %s",
- br->dp_idx, strerror(retval));
- goto done;
- }
-
- /* Send request, receive reply. */
- make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &request);
- retval = vconn_transact(vconn, request, &reply);
- if (retval) {
- if (retval == EOF) {
- VLOG_ERR("unexpected connection close talking to nl:%d",
- br->dp_idx);
- } else {
- VLOG_ERR("error requesting features from nl:%d: %s",
- br->dp_idx, strerror(retval));
- }
- goto done;
- }
-
- /* Parse reply. */
- osf = reply->data;
- retval = check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY,
- sizeof *osf, sizeof *osf->ports,
- &n_ports);
- if (retval) {
- goto done;
- }
- for (i = 0; i < n_ports; i++) {
- struct ofp_phy_port *opp = &osf->ports[i];
- int port_no = ntohs(opp->port_no);
- struct iface *iface;
-
- sanitize_opp(opp);
-
- iface = iface_lookup(br, (const char *) opp->name);
+ dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+ for (i = 0; i < n_dpif_ports; i++) {
+ struct odp_port *p = &dpif_ports[i];
+ struct iface *iface = iface_lookup(br, p->devname);
if (iface) {
if (iface->dp_ifidx >= 0) {
VLOG_WARN("datapath nl:%d reported interface %s twice",
- br->dp_idx, opp->name);
- } else if (iface_from_dp_ifidx(br, port_no)) {
- VLOG_WARN("datapath nl:%d reported interface %d twice",
- br->dp_idx, port_no);
+ br->dp_idx, p->devname);
+ } else if (iface_from_dp_ifidx(br, p->port)) {
+ VLOG_WARN("datapath nl:%d reported interface %"PRIu16" twice",
+ br->dp_idx, p->port);
} else {
- port_array_set(&br->ifaces, port_no, iface);
- iface->dp_ifidx = port_no;
+ uint16_t ofp_port = p->port == ODPP_LOCAL ? OFPP_LOCAL : p->port;
+ port_array_set(&br->ifaces, ofp_port, iface);
+ iface->dp_ifidx = p->port;
}
}
- if (iface_names) {
- svec_add(iface_names, (const char *) opp->name);
- }
- }
- retval = 0;
-
- if (iface_names) {
- svec_sort(iface_names);
- if (!svec_is_unique(iface_names)) {
- VLOG_WARN("datapath nl:%d reported interface named %s twice",
- br->dp_idx, svec_get_duplicate(iface_names));
- svec_unique(iface_names);
- }
- }
-
-done:
- vconn_close(vconn);
- ofpbuf_delete(reply);
- return retval;
-}
-
-static void
-sanitize_opp(struct ofp_phy_port *opp)
-{
- size_t i;
-
- for (i = 0; i < sizeof opp->name; i++) {
- char c = opp->name[i];
- if (c && (c < 0x20 || c > 0x7e)) {
- opp->name[i] = '.';
- }
}
- opp->name[sizeof opp->name - 1] = '\0';
+ free(dpif_ports);
}
/* Returns the idle time that the bridge is currently using. We reduce the
}
static bool
-choose_output_iface(const struct port *port, const struct flow *flow,
+choose_output_iface(const struct port *port, const flow_t *flow,
uint16_t *dp_ifidx, tag_type *tags)
{
struct iface *iface;
}
static bool
-set_dst(struct ft_dst *p, const struct flow *flow,
+set_dst(struct ft_dst *p, const flow_t *flow,
const struct port *in_port, const struct port *out_port,
tag_type *tags)
{
}
static size_t
-compose_dsts(const struct bridge *br, const struct flow *flow, uint16_t vlan,
+compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
struct ft_dst dsts[], tag_type *tags)
{
};
static void
-send_packets(struct bridge *br, const struct flow *flow,
+send_packets(struct bridge *br, const flow_t *flow,
const struct received_packet *pkt, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
tag_type tags, bool setup_flow)
}
static bool
-is_bcast_arp_reply(const struct flow *flow, const struct ofpbuf *pkt)
+is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *pkt)
{
return (flow->dl_type == htons(ETH_TYPE_ARP)
&& eth_addr_is_broadcast(flow->dl_dst)
}
static void
-process_flow(struct bridge *br, const struct flow *flow,
+process_flow(struct bridge *br, const flow_t *flow,
struct received_packet *pkt)
{
uint16_t in_ifidx = ntohs(flow->in_port);
/* MAC learning. */
out_port = FLOOD_PORT;
if (br->ml) {
- uint16_t out_port_idx;
+ int out_port_idx;
bool may_learn;
if (!pkt->buf) {
* (because we probably sent the packet on one bonded interface and
* got it back on the other). */
/* XXX invalidation? */
- uint16_t src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
- may_learn = src_idx == OFPP_FLOOD || src_idx == in_port->port_idx;
+ int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+ may_learn = src_idx < 0 || src_idx == in_port->port_idx;
/* Broadcast ARP replies are an exception to this rule: the host
* has moved to another switch. */
/* Determine output port. */
out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
&tags);
- if (out_port_idx < br->n_ports) {
+ if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
out_port = br->ports[out_port_idx];
}
}
struct ofp_packet_in *opi = opi_;
struct received_packet pkt;
struct ofpbuf buf;
- struct flow flow;
+ flow_t flow;
if (check_ofp_message_array(&opi->header, OFPT_PACKET_IN,
offsetof(struct ofp_packet_in, data),
buf.data = opi->data;
pkt.buf = &buf;
pkt.buffer_id = ntohl(opi->buffer_id);
- flow_extract(&buf, ntohs(opi->in_port), &flow);
+ flow_extract(&buf, ntohs(opi->in_port), &flow); /* XXX port number translation */
if (opi->reason == OFPR_NO_MATCH) {
/* Delete any existing flow from the flow table. It must not really be
{
struct ofp_flow_expired *ofe = ofe_;
struct ft_flow *f;
- struct flow flow;
+ flow_t flow;
if (check_ofp_message(&ofe->header, OFPT_FLOW_EXPIRED, sizeof *ofe)) {
return;
- offsetof(struct ofp_flow_stats, actions))
/ sizeof(struct ofp_action_header));
struct ft_flow *f;
- struct flow flow;
+ flow_t flow;
size_t hash;
if (fs->match.wildcards != htonl(0)) {
}
static void
-brstp_receive(struct bridge *br, const struct flow *flow,
- const struct ofpbuf *pkt)
+brstp_receive(struct bridge *br, const flow_t *flow, const struct ofpbuf *pkt)
{
struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
struct ofpbuf payload = *pkt;
}
struct ft_flow *
-ftf_create(const struct flow *flow,
+ftf_create(const flow_t *flow,
const struct ft_dst dsts[], size_t n_dsts,
tag_type tags)
{
}
struct ft_flow *
-ft_lookup(const struct ft *ft, const struct flow *target, size_t hash)
+ft_lookup(const struct ft *ft, const flow_t *target, size_t hash)
{
struct ft_flow *f;
struct ft_flow {
tag_type tags;
struct hmap_node node;
- struct flow flow;
+ flow_t flow;
bool need_drop;
/* Statistics. */
struct ft_dst one_dst;
};
-struct ft_flow *ftf_create(const struct flow *,
+struct ft_flow *ftf_create(const flow_t *,
const struct ft_dst[], size_t n_dsts,
tag_type tags);
void ftf_destroy(struct ft_flow *);
struct ft *ft_create(void);
void ft_destroy(struct ft *);
void ft_swap(struct ft *, struct ft *);
-struct ft_flow *ft_lookup(const struct ft *, const struct flow *, size_t hash);
+struct ft_flow *ft_lookup(const struct ft *, const flow_t *, size_t hash);
void ft_remove(struct ft *, struct ft_flow *);
void ft_insert(struct ft *, struct ft_flow *);