#include "fail-open.h"
#include "hmapx.h"
#include "lacp.h"
+#include "learn.h"
#include "mac-learning.h"
#include "multipath.h"
#include "netdev.h"
* flow translation. */
#define MAX_RESUBMIT_RECURSION 16
+/* Number of implemented OpenFlow tables. */
+enum { N_TABLES = 255 };
+BUILD_ASSERT_DECL(N_TABLES >= 1 && N_TABLES <= 255);
+
struct ofport_dpif;
struct ofproto_dpif;
uint64_t packet_count; /* Number of packets received. */
uint64_t byte_count; /* Number of bytes received. */
+ tag_type tag; /* Caches rule_calculate_tag() result. */
+
struct list facets; /* List of "struct facet"s. */
};
* revalidating without a packet to refer to. */
const struct ofpbuf *packet;
+ /* Should OFPP_NORMAL MAC learning and NXAST_LEARN actions execute? We
+ * want to execute them if we are actually processing a packet, or if we
+ * are accounting for packets that the datapath has processed, but not if
+ * we are just revalidating. */
+ bool may_learn;
+
/* If nonnull, called just before executing a resubmit action.
*
* This is normally null so the client has to set it manually after
* to look at them after it returns. */
struct ofpbuf *odp_actions; /* Datapath actions. */
- tag_type tags; /* Tags associated with OFPP_NORMAL actions. */
+ tag_type tags; /* Tags associated with actions. */
bool may_set_up_flow; /* True ordinarily; false if the actions must
* be reassessed for every packet. */
+ bool has_learn; /* Actions include NXAST_LEARN? */
+ bool has_normal; /* Actions output to OFPP_NORMAL? */
uint16_t nf_output_iface; /* Output interface index for NetFlow. */
/* xlate_actions() initializes and uses these members, but the client has no
bool installed; /* Installed in datapath? */
bool may_install; /* True ordinarily; false if actions must
* be reassessed for every packet. */
+ bool has_learn; /* Actions include NXAST_LEARN? */
+ bool has_normal; /* Actions output to OFPP_NORMAL? */
size_t actions_len; /* Number of bytes in actions[]. */
struct nlattr *actions; /* Datapath actions. */
tag_type tags; /* Tags. */
struct flow *, uint64_t packets, uint64_t bytes,
long long int used);
+static uint32_t rule_calculate_tag(const struct flow *,
+ const struct flow_wildcards *,
+ uint32_t basis);
+static void rule_invalidate(const struct rule_dpif *);
+
struct ofport_dpif {
struct ofport up;
struct ofoperation *op;
};
+/* Extra information about a classifier table.
+ * Currently used just for optimized flow revalidation. */
+struct table_dpif {
+ /* If either of these is nonnull, then this table has a form that allows
+ * flows to be tagged to avoid revalidating most flows for the most common
+ * kinds of flow table changes. */
+ struct cls_table *catchall_table; /* Table that wildcards all fields. */
+ struct cls_table *other_table; /* Table with any other wildcard set. */
+ uint32_t basis; /* Keeps each table's tags separate. */
+};
+
struct ofproto_dpif {
struct ofproto up;
struct dpif *dpif;
/* Facets. */
struct hmap facets;
+
+ /* Revalidation. */
+ struct table_dpif tables[N_TABLES];
bool need_revalidate;
struct tag_set revalidate_set;
ofproto->max_ports = dpif_get_max_ports(ofproto->dpif);
ofproto->n_matches = 0;
+ dpif_flow_flush(ofproto->dpif);
+ dpif_recv_purge(ofproto->dpif);
+
error = dpif_recv_set_mask(ofproto->dpif,
((1u << DPIF_UC_MISS) |
(1u << DPIF_UC_ACTION) |
dpif_close(ofproto->dpif);
return error;
}
- dpif_flow_flush(ofproto->dpif);
- dpif_recv_purge(ofproto->dpif);
ofproto->netflow = NULL;
ofproto->sflow = NULL;
timer_set_duration(&ofproto->next_expiration, 1000);
hmap_init(&ofproto->facets);
+
+ for (i = 0; i < N_TABLES; i++) {
+ struct table_dpif *table = &ofproto->tables[i];
+
+ table->catchall_table = NULL;
+ table->other_table = NULL;
+ table->basis = random_uint32();
+ }
ofproto->need_revalidate = false;
tag_set_init(&ofproto->revalidate_set);
ofproto->has_bundle_action = false;
- *n_tablesp = 255;
+ *n_tablesp = N_TABLES;
return 0;
}
error = 0;
} else {
if (!ofport->cfm) {
+ struct ofproto_dpif *ofproto;
+
+ ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ ofproto->need_revalidate = true;
ofport->cfm = cfm_create(netdev_get_name(ofport->up.netdev));
}
/* LACP. */
if (s->lacp) {
if (!bundle->lacp) {
+ ofproto->need_revalidate = true;
bundle->lacp = lacp_create();
}
lacp_configure(bundle->lacp, s->lacp);
}
/* Get trunked VLANs. */
- trunks = s->vlan == -1 ? NULL : s->trunks;
+ trunks = s->vlan == -1 ? s->trunks : NULL;
if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
free(bundle->trunks);
bundle->trunks = vlan_bitmap_clone(trunks);
/* Has 'rule' expired? */
now = time_msec();
if (rule->up.hard_timeout
- && now > rule->up.created + rule->up.hard_timeout * 1000) {
+ && now > rule->up.modified + rule->up.hard_timeout * 1000) {
reason = OFPRR_HARD_TIMEOUT;
} else if (rule->up.idle_timeout && list_is_empty(&rule->facets)
&& now > rule->used + rule->up.idle_timeout * 1000) {
odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions);
facet->tags = ctx.tags;
facet->may_install = ctx.may_set_up_flow;
+ facet->has_learn = ctx.has_learn;
+ facet->has_normal = ctx.has_normal;
facet->nf_flow.output_iface = ctx.nf_output_iface;
if (facet->actions_len != odp_actions->size
facet_account(struct ofproto_dpif *ofproto, struct facet *facet)
{
uint64_t n_bytes;
- struct ofbundle *in_bundle;
const struct nlattr *a;
- tag_type dummy = 0;
unsigned int left;
ovs_be16 vlan_tci;
- int vlan;
if (facet->byte_count <= facet->accounted_bytes) {
return;
n_bytes = facet->byte_count - facet->accounted_bytes;
facet->accounted_bytes = facet->byte_count;
- /* Test that 'tags' is nonzero to ensure that only flows that include an
- * OFPP_NORMAL action are used for learning and bond slave rebalancing.
- * This works because OFPP_NORMAL always sets a nonzero tag value.
- *
- * Feed information from the active flows back into the learning table to
+ /* Feed information from the active flows back into the learning table to
* ensure that table is always in sync with what is actually flowing
* through the datapath. */
- if (!facet->tags
- || !is_admissible(ofproto, &facet->flow, false, &dummy,
- &vlan, &in_bundle)) {
- return;
- }
+ if (facet->has_learn || facet->has_normal) {
+ struct action_xlate_ctx ctx;
- update_learning_table(ofproto, &facet->flow, vlan, in_bundle);
+ action_xlate_ctx_init(&ctx, ofproto, &facet->flow, NULL);
+ ctx.may_learn = true;
+ ofpbuf_delete(xlate_actions(&ctx, facet->rule->up.actions,
+ facet->rule->up.n_actions));
+ }
- if (!ofproto->has_bonded_bundles) {
+ if (!facet->has_normal || !ofproto->has_bonded_bundles) {
return;
}
facet->tags = ctx.tags;
facet->nf_flow.output_iface = ctx.nf_output_iface;
facet->may_install = ctx.may_set_up_flow;
+ facet->has_learn = ctx.has_learn;
+ facet->has_normal = ctx.has_normal;
if (actions_changed) {
free(facet->actions);
facet->actions_len = odp_actions->size;
rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
uint8_t table_id)
{
+ if (table_id >= N_TABLES) {
+ return NULL;
+ }
+
return rule_dpif_cast(rule_from_cls_rule(
classifier_lookup(&ofproto->up.tables[table_id],
flow)));
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
- ofproto->need_revalidate = true;
+ rule_invalidate(rule);
if (clogged) {
struct dpif_completion *c = xmalloc(sizeof *c);
c->op = rule->up.pending;
struct rule_dpif *rule = rule_dpif_cast(rule_);
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
struct rule_dpif *victim;
+ uint8_t table_id;
int error;
error = validate_actions(rule->up.actions, rule->up.n_actions,
list_init(&rule->facets);
}
+ table_id = rule->up.table_id;
+ rule->tag = (victim ? victim->tag
+ : table_id == 0 ? 0
+ : rule_calculate_tag(&rule->up.cr.flow, &rule->up.cr.wc,
+ ofproto->tables[table_id].basis));
+
complete_operation(rule);
return 0;
}
struct action_xlate_ctx *ctx);
static void xlate_normal(struct action_xlate_ctx *);
+static void
+commit_vlan_tci(struct action_xlate_ctx *ctx, ovs_be16 vlan_tci)
+{
+ struct flow *base = &ctx->base_flow;
+ struct ofpbuf *odp_actions = ctx->odp_actions;
+
+ if (base->vlan_tci != vlan_tci) {
+ if (!(vlan_tci & htons(VLAN_CFI))) {
+ nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
+ } else {
+ if (base->vlan_tci != htons(0)) {
+ nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
+ }
+ nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN,
+ vlan_tci & ~htons(VLAN_CFI));
+ }
+ base->vlan_tci = vlan_tci;
+ }
+}
+
static void
commit_odp_actions(struct action_xlate_ctx *ctx)
{
base->nw_tos = flow->nw_tos;
}
- if (base->vlan_tci != flow->vlan_tci) {
- if (!(flow->vlan_tci & htons(VLAN_CFI))) {
- nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
- } else {
- if (base->vlan_tci != OFP_VLAN_NONE) {
- nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
- }
- nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN,
- flow->vlan_tci & ~htons(VLAN_CFI));
- }
- base->vlan_tci = flow->vlan_tci;
- }
+ commit_vlan_tci(ctx, flow->vlan_tci);
if (base->tp_src != flow->tp_src) {
nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_SET_TP_SRC, flow->tp_src);
uint16_t in_port, uint8_t table_id)
{
if (ctx->recurse < MAX_RESUBMIT_RECURSION) {
+ struct ofproto_dpif *ofproto = ctx->ofproto;
struct rule_dpif *rule;
uint16_t old_in_port;
uint8_t old_table_id;
old_table_id = ctx->table_id;
ctx->table_id = table_id;
- /* Look up a flow with 'in_port' as the input port. Then restore the
- * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
- * have surprising behavior). */
+ /* Look up a flow with 'in_port' as the input port. */
old_in_port = ctx->flow.in_port;
ctx->flow.in_port = in_port;
- rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow, table_id);
+ rule = rule_dpif_lookup(ofproto, &ctx->flow, table_id);
+
+ /* Tag the flow. */
+ if (table_id > 0 && table_id < N_TABLES) {
+ struct table_dpif *table = &ofproto->tables[table_id];
+ if (table->other_table) {
+ ctx->tags |= (rule
+ ? rule->tag
+ : rule_calculate_tag(&ctx->flow,
+ &table->other_table->wc,
+ table->basis));
+ }
+ }
+
+ /* Restore the original input port. Otherwise OFPP_NORMAL and
+ * OFPP_IN_PORT will have surprising behavior. */
ctx->flow.in_port = old_in_port;
if (ctx->resubmit_hook) {
}
}
+static void
+xlate_learn_action(struct action_xlate_ctx *ctx,
+ const struct nx_action_learn *learn)
+{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ struct ofputil_flow_mod fm;
+ int error;
+
+ learn_execute(learn, &ctx->flow, &fm);
+
+ error = ofproto_flow_mod(&ctx->ofproto->up, &fm);
+ if (error && !VLOG_DROP_WARN(&rl)) {
+ char *msg = ofputil_error_to_string(error);
+ VLOG_WARN("learning action failed to modify flow table (%s)", msg);
+ free(msg);
+ }
+
+ free(fm.actions);
+}
+
static void
do_xlate_actions(const union ofp_action *in, size_t n_in,
struct action_xlate_ctx *ctx)
naor = (const struct nx_action_output_reg *) ia;
xlate_output_reg_action(ctx, naor);
break;
+
+ case OFPUTIL_NXAST_LEARN:
+ ctx->has_learn = true;
+ if (ctx->may_learn) {
+ xlate_learn_action(ctx, (const struct nx_action_learn *) ia);
+ }
+ break;
}
}
}
ctx->ofproto = ofproto;
ctx->flow = *flow;
ctx->packet = packet;
+ ctx->may_learn = packet != NULL;
ctx->resubmit_hook = NULL;
}
ctx->odp_actions = ofpbuf_new(512);
ctx->tags = 0;
ctx->may_set_up_flow = true;
+ ctx->has_learn = false;
+ ctx->has_normal = false;
ctx->nf_output_iface = NF_OUT_DROP;
ctx->recurse = 0;
ctx->priority = 0;
ctx->base_priority = 0;
ctx->base_flow = ctx->flow;
+ ctx->base_flow.tun_id = 0;
ctx->table_id = 0;
if (process_special(ctx->ofproto, &ctx->flow, ctx->packet)) {
dst_set_init(&set);
compose_dsts(ctx, vlan, in_bundle, out_bundle, &set);
compose_mirror_dsts(ctx, vlan, in_bundle, &set);
+ if (!set.n) {
+ dst_set_free(&set);
+ return;
+ }
/* Output all the packets we can without having to change the VLAN. */
+ commit_odp_actions(ctx);
initial_vlan = vlan_tci_to_vid(ctx->flow.vlan_tci);
if (initial_vlan == 0) {
initial_vlan = OFP_VLAN_NONE;
continue;
}
if (dst->vlan != cur_vlan) {
- if (dst->vlan == OFP_VLAN_NONE) {
- nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_VLAN);
- } else {
- ovs_be16 tci;
+ ovs_be16 tci;
- if (cur_vlan != OFP_VLAN_NONE) {
- nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_VLAN);
- }
- tci = htons(dst->vlan & VLAN_VID_MASK);
- tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK);
- nl_msg_put_be16(ctx->odp_actions,
- OVS_ACTION_ATTR_PUSH_VLAN, tci);
+ tci = htons(dst->vlan == OFP_VLAN_NONE ? 0 : dst->vlan);
+ tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK);
+ if (tci) {
+ tci |= htons(VLAN_CFI);
}
+ commit_vlan_tci(ctx, tci);
+
cur_vlan = dst->vlan;
}
nl_msg_put_u32(ctx->odp_actions,
"port %"PRIu16,
ofproto->up.name, flow->in_port);
}
+ *vlanp = -1;
return false;
}
*vlanp = vlan = flow_get_vlan(ofproto, flow, in_bundle, have_packet);
struct mac_entry *mac;
int vlan;
+ ctx->has_normal = true;
+
/* Check whether we should drop packets in this flow. */
if (!is_admissible(ctx->ofproto, &ctx->flow, ctx->packet != NULL,
&ctx->tags, &vlan, &in_bundle)) {
goto done;
}
- /* Learn source MAC (but don't try to learn from revalidation). */
- if (ctx->packet) {
+ /* Learn source MAC. */
+ if (ctx->may_learn) {
update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
}
}
}
\f
+/* Optimized flow revalidation.
+ *
+ * It's a difficult problem, in general, to tell which facets need to have
+ * their actions recalculated whenever the OpenFlow flow table changes. We
+ * don't try to solve that general problem: for most kinds of OpenFlow flow
+ * table changes, we recalculate the actions for every facet. This is
+ * relatively expensive, but it's good enough if the OpenFlow flow table
+ * doesn't change very often.
+ *
+ * However, we can expect one particular kind of OpenFlow flow table change to
+ * happen frequently: changes caused by MAC learning. To avoid wasting a lot
+ * of CPU on revalidating every facet whenever MAC learning modifies the flow
+ * table, we add a special case that applies to flow tables in which every rule
+ * has the same form (that is, the same wildcards), except that the table is
+ * also allowed to have a single "catch-all" flow that matches all packets. We
+ * optimize this case by tagging all of the facets that resubmit into the table
+ * and invalidating the same tag whenever a flow changes in that table. The
+ * end result is that we revalidate just the facets that need it (and sometimes
+ * a few more, but not all of the facets or even all of the facets that
+ * resubmit to the table modified by MAC learning). */
+
+/* Calculates the tag to use for 'flow' and wildcards 'wc' when it is inserted
+ * into an OpenFlow table with the given 'basis'. */
+static uint32_t
+rule_calculate_tag(const struct flow *flow, const struct flow_wildcards *wc,
+ uint32_t secret)
+{
+ if (flow_wildcards_is_catchall(wc)) {
+ return 0;
+ } else {
+ struct flow tag_flow = *flow;
+ flow_zero_wildcards(&tag_flow, wc);
+ return tag_create_deterministic(flow_hash(&tag_flow, secret));
+ }
+}
+
+/* Following a change to OpenFlow table 'table_id' in 'ofproto', update the
+ * taggability of that table.
+ *
+ * This function must be called after *each* change to a flow table. If you
+ * skip calling it on some changes then the pointer comparisons at the end can
+ * be invalid if you get unlucky. For example, if a flow removal causes a
+ * cls_table to be destroyed and then a flow insertion causes a cls_table with
+ * different wildcards to be created with the same address, then this function
+ * will incorrectly skip revalidation. */
+static void
+table_update_taggable(struct ofproto_dpif *ofproto, uint8_t table_id)
+{
+ struct table_dpif *table = &ofproto->tables[table_id];
+ const struct classifier *cls = &ofproto->up.tables[table_id];
+ struct cls_table *catchall, *other;
+ struct cls_table *t;
+
+ catchall = other = NULL;
+
+ switch (hmap_count(&cls->tables)) {
+ case 0:
+ /* We could tag this OpenFlow table but it would make the logic a
+ * little harder and it's a corner case that doesn't seem worth it
+ * yet. */
+ break;
+
+ case 1:
+ case 2:
+ HMAP_FOR_EACH (t, hmap_node, &cls->tables) {
+ if (cls_table_is_catchall(t)) {
+ catchall = t;
+ } else if (!other) {
+ other = t;
+ } else {
+ /* Indicate that we can't tag this by setting both tables to
+ * NULL. (We know that 'catchall' is already NULL.) */
+ other = NULL;
+ }
+ }
+ break;
+
+ default:
+ /* Can't tag this table. */
+ break;
+ }
+
+ if (table->catchall_table != catchall || table->other_table != other) {
+ table->catchall_table = catchall;
+ table->other_table = other;
+ ofproto->need_revalidate = true;
+ }
+}
+
+/* Given 'rule' that has changed in some way (either it is a rule being
+ * inserted, a rule being deleted, or a rule whose actions are being
+ * modified), marks facets for revalidation to ensure that packets will be
+ * forwarded correctly according to the new state of the flow table.
+ *
+ * This function must be called after *each* change to a flow table. See
+ * the comment on table_update_taggable() for more information. */
+static void
+rule_invalidate(const struct rule_dpif *rule)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
+
+ table_update_taggable(ofproto, rule->up.table_id);
+
+ if (!ofproto->need_revalidate) {
+ struct table_dpif *table = &ofproto->tables[rule->up.table_id];
+
+ if (table->other_table && rule->tag) {
+ tag_set_add(&ofproto->revalidate_set, rule->tag);
+ } else {
+ ofproto->need_revalidate = true;
+ }
+ }
+}
+\f
static bool
get_drop_frags(struct ofproto *ofproto_)
{
arg1 = strtok_r(NULL, " ", &save_ptr);
arg2 = strtok_r(NULL, " ", &save_ptr);
arg3 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */
- if (dpname && arg1 && !arg2 && !arg3) {
- /* ofproto/trace dpname flow */
+ if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) {
+ /* ofproto/trace dpname flow [-generate] */
int error;
/* Convert string to datapath key. */
unixctl_command_reply(conn, 501, "Invalid flow");
goto exit;
}
+
+ /* Generate a packet, if requested. */
+ if (arg2) {
+ packet = ofpbuf_new(0);
+ flow_compose(packet, &flow);
+ }
} else if (dpname && arg1 && arg2 && arg3) {
/* ofproto/trace dpname tun_id in_port packet */
uint16_t in_port;