--- /dev/null
+/*
+ * Copyright (c) 2012 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "ofproto-dpif-governor.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "coverage.h"
+#include "poll-loop.h"
+#include "random.h"
+#include "timeval.h"
+#include "util.h"
+#include "valgrind.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(ofproto_dpif_governor);
+
+/* Minimum number of observed packets before setting up a flow.
+ *
+ * This value seems OK empirically. */
+#define FLOW_SETUP_THRESHOLD 5
+BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD > 1);
+BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD < 16);
+
+/* Minimum and maximum size of a governor, in bytes. */
+enum { MIN_SIZE = 16 * 1024 };
+enum { MAX_SIZE = 256 * 1024 };
+BUILD_ASSERT_DECL(IS_POW2(MIN_SIZE));
+BUILD_ASSERT_DECL(IS_POW2(MAX_SIZE));
+
+/* Minimum and maximum time to process the number of packets that make up a
+ * given generation. If a generation completes faster than the minimum time,
+ * we double the table size (but no more than MAX_SIZE). If a generation take
+ * more than the maximum time to complete, we halve the table size (but no
+ * smaller than MIN_SIZE). */
+enum { MIN_ELAPSED = 1000 }; /* In milliseconds. */
+enum { MAX_ELAPSED = 5000 }; /* In milliseconds. */
+
+static void governor_new_generation(struct governor *, unsigned int size);
+
+/* Creates and returns a new governor named 'name' (which is used only for log
+ * messages). */
+struct governor *
+governor_create(const char *name)
+{
+ struct governor *g = xzalloc(sizeof *g);
+ g->name = xstrdup(name);
+ governor_new_generation(g, MIN_SIZE);
+ return g;
+}
+
+/* Destroys 'g'. */
+void
+governor_destroy(struct governor *g)
+{
+ if (g) {
+ VLOG_INFO("%s: disengaging", g->name);
+ free(g->table);
+ free(g);
+ }
+}
+
+/* Performs periodic maintenance work on 'g'. */
+void
+governor_run(struct governor *g)
+{
+ if (time_msec() - g->start > MAX_ELAPSED) {
+ if (g->size > MIN_SIZE) {
+ governor_new_generation(g, g->size / 2);
+ } else {
+ /* Don't start a new generation (we'd never go idle). */
+ }
+ }
+}
+
+/* Arranges for the poll loop to wake up when 'g' needs to do some work. */
+void
+governor_wait(struct governor *g)
+{
+ poll_timer_wait_until(g->start + MAX_ELAPSED);
+}
+
+/* Returns true if 'g' has been doing only a minimal amount of work and thus
+ * the client should consider getting rid of it entirely. */
+bool
+governor_is_idle(struct governor *g)
+{
+ return g->size == MIN_SIZE && time_msec() - g->start > MAX_ELAPSED;
+}
+
+/* Tests whether a flow whose hash is 'hash' and for which 'n' packets have
+ * just arrived should be set up in the datapath or just processed on a
+ * packet-by-packet basis. Returns true to set up a datapath flow, false to
+ * process the packets individually.
+ *
+ * One would expect 'n' to ordinarily be 1, if batching leads multiple packets
+ * to be processed at a time then it could be greater. */
+bool
+governor_should_install_flow(struct governor *g, uint32_t hash, int n)
+{
+ bool install_flow;
+ uint8_t *e;
+ int count;
+
+ assert(n > 0);
+
+ /* Count these packets and begin a new generation if necessary. */
+ g->n_packets += n;
+ if (g->n_packets >= g->size / 4) {
+ unsigned int new_size;
+ long long elapsed;
+
+ elapsed = time_msec() - g->start;
+ new_size = (elapsed < MIN_ELAPSED && g->size < MAX_SIZE ? g->size * 2
+ : elapsed > MAX_ELAPSED && g->size > MIN_SIZE ? g->size / 2
+ : g->size);
+ governor_new_generation(g, new_size);
+ }
+
+ /* Do hash table processing.
+ *
+ * Even-numbered hash values use high-order nibbles.
+ * Odd-numbered hash values use low-order nibbles. */
+ e = &g->table[(hash >> 1) & (g->size - 1)];
+ count = n + (hash & 1 ? *e >> 4 : *e & 0x0f);
+ if (count >= FLOW_SETUP_THRESHOLD) {
+ install_flow = true;
+ count = 0;
+ } else {
+ install_flow = false;
+ }
+ *e = hash & 1 ? (count << 4) | (*e & 0x0f) : (*e & 0xf0) | count;
+
+ return install_flow;
+}
+\f
+/* Starts a new generation in 'g' with a table size of 'size' bytes. 'size'
+ * must be a power of two between MIN_SIZE and MAX_SIZE, inclusive. */
+static void
+governor_new_generation(struct governor *g, unsigned int size)
+{
+ assert(size >= MIN_SIZE && size <= MAX_SIZE);
+ assert(is_pow2(size));
+
+ /* Allocate new table, if necessary. */
+ if (g->size != size) {
+ if (!g->size) {
+ VLOG_INFO("%s: engaging governor with %u kB hash table",
+ g->name, g->size / 1024);
+ } else {
+ VLOG_INFO("%s: processed %u packets in %.2f s, "
+ "%s hash table to %u kB",
+ g->name, g->n_packets,
+ (time_msec() - g->start) / 1000.0,
+ size > g->size ? "enlarging" : "shrinking",
+ size / 1024);
+ }
+
+ free(g->table);
+ g->table = xmalloc(size * sizeof *g->table);
+ g->size = size;
+ } else {
+ VLOG_DBG("%s: processed %u packets in %.2f s with %u kB hash table",
+ g->name, g->n_packets, (time_msec() - g->start) / 1000.0,
+ size / 1024);
+ }
+
+ /* Clear data for next generation. */
+ memset(g->table, 0, size * sizeof *g->table);
+ g->start = time_msec();
+ g->n_packets = 0;
+}
#include "ofp-util.h"
#include "ofpbuf.h"
#include "ofp-print.h"
+#include "ofproto-dpif-governor.h"
#include "ofproto-dpif-sflow.h"
#include "poll-loop.h"
#include "timer.h"
COVERAGE_DEFINE(facet_invalidated);
COVERAGE_DEFINE(facet_revalidate);
COVERAGE_DEFINE(facet_unexpected);
+COVERAGE_DEFINE(facet_suppress);
/* Maximum depth of flow table recursion (due to resubmit actions) in a
* flow translation. */
/* Facets. */
struct hmap facets;
struct hmap subfacets;
+ struct governor *governor;
/* Revalidation. */
struct table_dpif tables[N_TABLES];
hmap_init(&ofproto->facets);
hmap_init(&ofproto->subfacets);
+ ofproto->governor = NULL;
for (i = 0; i < N_TABLES; i++) {
struct table_dpif *table = &ofproto->tables[i];
hmap_destroy(&ofproto->facets);
hmap_destroy(&ofproto->subfacets);
+ governor_destroy(ofproto->governor);
hmap_destroy(&ofproto->vlandev_map);
hmap_destroy(&ofproto->realdev_vid_map);
}
}
+ if (ofproto->governor) {
+ size_t n_subfacets;
+
+ governor_run(ofproto->governor);
+
+ /* If the governor has shrunk to its minimum size and the number of
+ * subfacets has dwindled, then drop the governor entirely.
+ *
+ * For hysteresis, the number of subfacets to drop the governor is
+ * smaller than the number needed to trigger its creation. */
+ n_subfacets = hmap_count(&ofproto->subfacets);
+ if (n_subfacets * 4 < ofproto->up.flow_eviction_threshold
+ && governor_is_idle(ofproto->governor)) {
+ governor_destroy(ofproto->governor);
+ ofproto->governor = NULL;
+ }
+ }
+
return 0;
}
} else {
timer_wait(&ofproto->next_expiration);
}
+ if (ofproto->governor) {
+ governor_wait(ofproto->governor);
+ }
}
static void
return NULL;
}
+/* Partially Initializes 'op' as an "execute" operation for 'miss' and
+ * 'packet'. The caller must initialize op->actions and op->actions_len. If
+ * 'miss' is associated with a subfacet the caller must also initialize the
+ * returned op->subfacet, and if anything needs to be freed after processing
+ * the op, the caller must initialize op->garbage also. */
static void
-handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
- struct flow_miss_op *ops, size_t *n_ops)
+init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet,
+ struct flow_miss_op *op)
+{
+ if (miss->flow.vlan_tci != miss->initial_tci) {
+ /* This packet was received on a VLAN splinter port. We
+ * added a VLAN to the packet to make the packet resemble
+ * the flow, but the actions were composed assuming that
+ * the packet contained no VLAN. So, we must remove the
+ * VLAN header from the packet before trying to execute the
+ * actions. */
+ eth_pop_vlan(packet);
+ }
+
+ op->subfacet = NULL;
+ op->garbage = NULL;
+ op->dpif_op.type = DPIF_OP_EXECUTE;
+ op->dpif_op.u.execute.key = miss->key;
+ op->dpif_op.u.execute.key_len = miss->key_len;
+ op->dpif_op.u.execute.packet = packet;
+}
+
+/* Helper for handle_flow_miss_without_facet() and
+ * handle_flow_miss_with_facet(). */
+static void
+handle_flow_miss_common(struct rule_dpif *rule,
+ struct ofpbuf *packet, const struct flow *flow)
{
- const struct flow *flow = &miss->flow;
- struct subfacet *subfacet;
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
+
+ ofproto->n_matches++;
+
+ if (rule->up.cr.priority == FAIL_OPEN_PRIORITY) {
+ /*
+ * Extra-special case for fail-open mode.
+ *
+ * We are in fail-open mode and the packet matched the fail-open
+ * rule, but we are connected to a controller too. We should send
+ * the packet up to the controller in the hope that it will try to
+ * set up a flow and thereby allow us to exit fail-open.
+ *
+ * See the top-level comment in fail-open.c for more information.
+ */
+ send_packet_in_miss(ofproto, packet, flow);
+ }
+}
+
+/* Figures out whether a flow that missed in 'ofproto', whose details are in
+ * 'miss', is likely to be worth tracking in detail in userspace and (usually)
+ * installing a datapath flow. The answer is usually "yes" (a return value of
+ * true). However, for short flows the cost of bookkeeping is much higher than
+ * the benefits, so when the datapath holds a large number of flows we impose
+ * some heuristics to decide which flows are likely to be worth tracking. */
+static bool
+flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
+ struct flow_miss *miss, uint32_t hash)
+{
+ if (!ofproto->governor) {
+ size_t n_subfacets;
+
+ n_subfacets = hmap_count(&ofproto->subfacets);
+ if (n_subfacets * 2 <= ofproto->up.flow_eviction_threshold) {
+ return true;
+ }
+
+ ofproto->governor = governor_create(ofproto->up.name);
+ }
+
+ return governor_should_install_flow(ofproto->governor, hash,
+ list_size(&miss->packets));
+}
+
+/* Handles 'miss', which matches 'rule', without creating a facet or subfacet
+ * or creating any datapath flow. May add an "execute" operation to 'ops' and
+ * increment '*n_ops'. */
+static void
+handle_flow_miss_without_facet(struct flow_miss *miss,
+ struct rule_dpif *rule,
+ struct flow_miss_op *ops, size_t *n_ops)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
+ struct action_xlate_ctx ctx;
struct ofpbuf *packet;
- struct facet *facet;
- uint32_t hash;
- /* The caller must ensure that miss->hmap_node.hash contains
- * flow_hash(miss->flow, 0). */
- hash = miss->hmap_node.hash;
+ LIST_FOR_EACH (packet, list_node, &miss->packets) {
+ struct flow_miss_op *op = &ops[*n_ops];
+ struct dpif_flow_stats stats;
+ struct ofpbuf odp_actions;
- facet = facet_lookup_valid(ofproto, flow, hash);
- if (!facet) {
- struct rule_dpif *rule;
+ COVERAGE_INC(facet_suppress);
- rule = rule_dpif_lookup(ofproto, flow, 0);
- if (!rule) {
- /* Don't send a packet-in if OFPUTIL_PC_NO_PACKET_IN asserted. */
- struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port);
- if (port) {
- if (port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN) {
- COVERAGE_INC(ofproto_dpif_no_packet_in);
- /* XXX install 'drop' flow entry */
- return;
- }
- } else {
- VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16,
- flow->in_port);
- }
+ ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
- LIST_FOR_EACH (packet, list_node, &miss->packets) {
- send_packet_in_miss(ofproto, packet, flow);
- }
+ dpif_flow_stats_extract(&miss->flow, packet, &stats);
+ rule_credit_stats(rule, &stats);
- return;
- }
+ action_xlate_ctx_init(&ctx, ofproto, &miss->flow, miss->initial_tci,
+ rule, 0, packet);
+ ctx.resubmit_stats = &stats;
+ xlate_actions(&ctx, rule->up.actions, rule->up.n_actions,
+ &odp_actions);
+
+ if (odp_actions.size) {
+ struct dpif_execute *execute = &op->dpif_op.u.execute;
+
+ init_flow_miss_execute_op(miss, packet, op);
+ execute->actions = odp_actions.data;
+ execute->actions_len = odp_actions.size;
+ op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
- facet = facet_create(rule, flow, hash);
+ (*n_ops)++;
+ } else {
+ ofpbuf_uninit(&odp_actions);
+ }
}
+}
+
+/* Handles 'miss', which matches 'facet'. May add any required datapath
+ * operations to 'ops', incrementing '*n_ops' for each new op. */
+static void
+handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
+ struct flow_miss_op *ops, size_t *n_ops)
+{
+ struct subfacet *subfacet;
+ struct ofpbuf *packet;
subfacet = subfacet_create(facet,
miss->key_fitness, miss->key, miss->key_len,
LIST_FOR_EACH (packet, list_node, &miss->packets) {
struct flow_miss_op *op = &ops[*n_ops];
- struct dpif_execute *execute = &op->dpif_op.u.execute;
struct dpif_flow_stats stats;
-
struct ofpbuf odp_actions;
- ofproto->n_matches++;
-
- if (facet->rule->up.cr.priority == FAIL_OPEN_PRIORITY) {
- /*
- * Extra-special case for fail-open mode.
- *
- * We are in fail-open mode and the packet matched the fail-open
- * rule, but we are connected to a controller too. We should send
- * the packet up to the controller in the hope that it will try to
- * set up a flow and thereby allow us to exit fail-open.
- *
- * See the top-level comment in fail-open.c for more information.
- */
- send_packet_in_miss(ofproto, packet, flow);
- }
+ handle_flow_miss_common(facet->rule, packet, &miss->flow);
ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
if (!facet->may_install || !subfacet->actions) {
dpif_flow_stats_extract(&facet->flow, packet, &stats);
subfacet_update_stats(subfacet, &stats);
- if (!subfacet->actions_len) {
- /* No actions to execute, so skip talking to the dpif. */
- ofpbuf_uninit(&odp_actions);
- continue;
- }
+ if (subfacet->actions_len) {
+ struct dpif_execute *execute = &op->dpif_op.u.execute;
- if (flow->vlan_tci != subfacet->initial_tci) {
- /* This packet was received on a VLAN splinter port. We added
- * a VLAN to the packet to make the packet resemble the flow,
- * but the actions were composed assuming that the packet
- * contained no VLAN. So, we must remove the VLAN header from
- * the packet before trying to execute the actions. */
- eth_pop_vlan(packet);
- }
+ init_flow_miss_execute_op(miss, packet, op);
+ op->subfacet = subfacet;
+ if (facet->may_install) {
+ execute->actions = subfacet->actions;
+ execute->actions_len = subfacet->actions_len;
+ ofpbuf_uninit(&odp_actions);
+ } else {
+ execute->actions = odp_actions.data;
+ execute->actions_len = odp_actions.size;
+ op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
+ }
- /* Set up operation. */
- op->dpif_op.type = DPIF_OP_EXECUTE;
- execute->key = miss->key;
- execute->key_len = miss->key_len;
- if (facet->may_install) {
- execute->actions = subfacet->actions;
- execute->actions_len = subfacet->actions_len;
- ofpbuf_uninit(&odp_actions);
- op->garbage = NULL;
+ (*n_ops)++;
} else {
- execute->actions = odp_actions.data;
- execute->actions_len = odp_actions.size;
- op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
+ ofpbuf_uninit(&odp_actions);
}
- execute->packet = packet;
-
- (*n_ops)++;
}
if (facet->may_install && subfacet->key_fitness != ODP_FIT_TOO_LITTLE) {
}
}
+/* Handles flow miss 'miss' on 'ofproto'. The flow does not match any flow in
+ * the OpenFlow flow table. */
+static void
+handle_flow_miss_no_rule(struct ofproto_dpif *ofproto, struct flow_miss *miss)
+{
+ uint16_t in_port = miss->flow.in_port;
+ struct ofport_dpif *port = get_ofp_port(ofproto, in_port);
+
+ if (!port) {
+ VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, in_port);
+ }
+
+ if (port && port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN) {
+ /* XXX install 'drop' flow entry */
+ COVERAGE_INC(ofproto_dpif_no_packet_in);
+ } else {
+ const struct ofpbuf *packet;
+
+ LIST_FOR_EACH (packet, list_node, &miss->packets) {
+ send_packet_in_miss(ofproto, packet, &miss->flow);
+ }
+ }
+}
+
+/* Handles flow miss 'miss' on 'ofproto'. May add any required datapath
+ * operations to 'ops', incrementing '*n_ops' for each new op. */
+static void
+handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
+ struct flow_miss_op *ops, size_t *n_ops)
+{
+ struct facet *facet;
+ uint32_t hash;
+
+ /* The caller must ensure that miss->hmap_node.hash contains
+ * flow_hash(miss->flow, 0). */
+ hash = miss->hmap_node.hash;
+
+ facet = facet_lookup_valid(ofproto, &miss->flow, hash);
+ if (!facet) {
+ struct rule_dpif *rule = rule_dpif_lookup(ofproto, &miss->flow, 0);
+ if (!rule) {
+ handle_flow_miss_no_rule(ofproto, miss);
+ return;
+ } else if (!flow_miss_should_make_facet(ofproto, miss, hash)) {
+ handle_flow_miss_without_facet(miss, rule, ops, n_ops);
+ return;
+ }
+
+ facet = facet_create(rule, &miss->flow, hash);
+ }
+ handle_flow_miss_with_facet(miss, facet, ops, n_ops);
+}
+
/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of
* OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns
* an ODP_FIT_* value that indicates how well 'key' fits our expectations for