2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "ofproto/private.h"
25 #include "byte-order.h"
30 #include "dynamic-string.h"
31 #include "fail-open.h"
34 #include "mac-learning.h"
35 #include "multipath.h"
42 #include "ofp-print.h"
43 #include "ofproto-sflow.h"
44 #include "poll-loop.h"
47 #include "vlan-bitmap.h"
50 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
52 COVERAGE_DEFINE(ofproto_dpif_ctlr_action);
53 COVERAGE_DEFINE(ofproto_dpif_expired);
54 COVERAGE_DEFINE(ofproto_dpif_no_packet_in);
55 COVERAGE_DEFINE(ofproto_dpif_xlate);
56 COVERAGE_DEFINE(facet_changed_rule);
57 COVERAGE_DEFINE(facet_invalidated);
58 COVERAGE_DEFINE(facet_revalidate);
59 COVERAGE_DEFINE(facet_unexpected);
61 /* Maximum depth of flow table recursion (due to NXAST_RESUBMIT actions) in a
62 * flow translation. */
63 #define MAX_RESUBMIT_RECURSION 16
71 long long int used; /* Time last used; time created if not used. */
75 * - Do include packets and bytes from facets that have been deleted or
76 * whose own statistics have been folded into the rule.
78 * - Do include packets and bytes sent "by hand" that were accounted to
79 * the rule without any facet being involved (this is a rare corner
80 * case in rule_execute()).
82 * - Do not include packet or bytes that can be obtained from any facet's
83 * packet_count or byte_count member or that can be obtained from the
84 * datapath by, e.g., dpif_flow_get() for any facet.
86 uint64_t packet_count; /* Number of packets received. */
87 uint64_t byte_count; /* Number of bytes received. */
89 struct list facets; /* List of "struct facet"s. */
92 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
94 return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
97 static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *ofproto,
98 const struct flow *flow);
100 #define MAX_MIRRORS 32
101 typedef uint32_t mirror_mask_t;
102 #define MIRROR_MASK_C(X) UINT32_C(X)
103 BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS);
105 struct ofproto_dpif *ofproto; /* Owning ofproto. */
106 size_t idx; /* In ofproto's "mirrors" array. */
107 void *aux; /* Key supplied by ofproto's client. */
108 char *name; /* Identifier for log messages. */
110 /* Selection criteria. */
111 struct hmapx srcs; /* Contains "struct ofbundle *"s. */
112 struct hmapx dsts; /* Contains "struct ofbundle *"s. */
113 unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */
115 /* Output (mutually exclusive). */
116 struct ofbundle *out; /* Output port or NULL. */
117 int out_vlan; /* Output VLAN or -1. */
120 static void mirror_destroy(struct ofmirror *);
122 /* A group of one or more OpenFlow ports. */
123 #define OFBUNDLE_FLOOD ((struct ofbundle *) 1)
125 struct ofproto_dpif *ofproto; /* Owning ofproto. */
126 struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
127 void *aux; /* Key supplied by ofproto's client. */
128 char *name; /* Identifier for log messages. */
131 struct list ports; /* Contains "struct ofport"s. */
132 int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
133 unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
134 * NULL if all VLANs are trunked. */
135 struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */
136 struct bond *bond; /* Nonnull iff more than one port. */
139 bool floodable; /* True if no port has OFPPC_NO_FLOOD set. */
141 /* Port mirroring info. */
142 mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */
143 mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */
144 mirror_mask_t mirror_out; /* Mirrors that output to this bundle. */
147 static void bundle_remove(struct ofport *);
148 static void bundle_destroy(struct ofbundle *);
149 static void bundle_del_port(struct ofport_dpif *);
150 static void bundle_run(struct ofbundle *);
151 static void bundle_wait(struct ofbundle *);
153 struct action_xlate_ctx {
154 /* action_xlate_ctx_init() initializes these members. */
157 struct ofproto_dpif *ofproto;
159 /* Flow to which the OpenFlow actions apply. xlate_actions() will modify
160 * this flow when actions change header fields. */
163 /* The packet corresponding to 'flow', or a null pointer if we are
164 * revalidating without a packet to refer to. */
165 const struct ofpbuf *packet;
167 /* If nonnull, called just before executing a resubmit action.
169 * This is normally null so the client has to set it manually after
170 * calling action_xlate_ctx_init(). */
171 void (*resubmit_hook)(struct action_xlate_ctx *, struct rule_dpif *);
173 /* If true, the speciality of 'flow' should be checked before executing
174 * its actions. If special_cb returns false on 'flow' rendered
175 * uninstallable and no actions will be executed. */
178 /* xlate_actions() initializes and uses these members. The client might want
179 * to look at them after it returns. */
181 struct ofpbuf *odp_actions; /* Datapath actions. */
182 tag_type tags; /* Tags associated with OFPP_NORMAL actions. */
183 bool may_set_up_flow; /* True ordinarily; false if the actions must
184 * be reassessed for every packet. */
185 uint16_t nf_output_iface; /* Output interface index for NetFlow. */
187 /* xlate_actions() initializes and uses these members, but the client has no
188 * reason to look at them. */
190 int recurse; /* Recursion level, via xlate_table_action. */
191 int last_pop_priority; /* Offset in 'odp_actions' just past most
192 * recent ODP_ACTION_ATTR_SET_PRIORITY. */
195 static void action_xlate_ctx_init(struct action_xlate_ctx *,
196 struct ofproto_dpif *, const struct flow *,
197 const struct ofpbuf *);
198 static struct ofpbuf *xlate_actions(struct action_xlate_ctx *,
199 const union ofp_action *in, size_t n_in);
201 /* An exact-match instantiation of an OpenFlow flow. */
203 long long int used; /* Time last used; time created if not used. */
207 * - Do include packets and bytes sent "by hand", e.g. with
210 * - Do include packets and bytes that were obtained from the datapath
211 * when a flow was deleted (e.g. dpif_flow_del()) or when its
212 * statistics were reset (e.g. dpif_flow_put() with
213 * DPIF_FP_ZERO_STATS).
215 * - Do not include any packets or bytes that can currently be obtained
216 * from the datapath by, e.g., dpif_flow_get().
218 uint64_t packet_count; /* Number of packets received. */
219 uint64_t byte_count; /* Number of bytes received. */
221 uint64_t dp_packet_count; /* Last known packet count in the datapath. */
222 uint64_t dp_byte_count; /* Last known byte count in the datapath. */
224 uint64_t rs_packet_count; /* Packets pushed to resubmit children. */
225 uint64_t rs_byte_count; /* Bytes pushed to resubmit children. */
226 long long int rs_used; /* Used time pushed to resubmit children. */
228 /* Number of bytes passed to account_cb. This may include bytes that can
229 * currently obtained from the datapath (thus, it can be greater than
231 uint64_t accounted_bytes;
233 struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */
234 struct list list_node; /* In owning rule's 'facets' list. */
235 struct rule_dpif *rule; /* Owning rule. */
236 struct flow flow; /* Exact-match flow. */
237 bool installed; /* Installed in datapath? */
238 bool may_install; /* True ordinarily; false if actions must
239 * be reassessed for every packet. */
240 size_t actions_len; /* Number of bytes in actions[]. */
241 struct nlattr *actions; /* Datapath actions. */
242 tag_type tags; /* Tags. */
243 struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */
246 static struct facet *facet_create(struct rule_dpif *, const struct flow *,
247 const struct ofpbuf *packet);
248 static void facet_remove(struct ofproto_dpif *, struct facet *);
249 static void facet_free(struct facet *);
251 static struct facet *facet_find(struct ofproto_dpif *, const struct flow *);
252 static struct facet *facet_lookup_valid(struct ofproto_dpif *,
253 const struct flow *);
254 static bool facet_revalidate(struct ofproto_dpif *, struct facet *);
256 static void facet_execute(struct ofproto_dpif *, struct facet *,
257 struct ofpbuf *packet);
259 static int facet_put__(struct ofproto_dpif *, struct facet *,
260 const struct nlattr *actions, size_t actions_len,
261 struct dpif_flow_stats *);
262 static void facet_install(struct ofproto_dpif *, struct facet *,
264 static void facet_uninstall(struct ofproto_dpif *, struct facet *);
265 static void facet_flush_stats(struct ofproto_dpif *, struct facet *);
267 static void facet_make_actions(struct ofproto_dpif *, struct facet *,
268 const struct ofpbuf *packet);
269 static void facet_update_time(struct ofproto_dpif *, struct facet *,
271 static void facet_update_stats(struct ofproto_dpif *, struct facet *,
272 const struct dpif_flow_stats *);
273 static void facet_push_stats(struct facet *);
274 static void facet_account(struct ofproto_dpif *, struct facet *,
275 uint64_t extra_bytes);
277 static bool facet_is_controller_flow(struct facet *);
279 static void flow_push_stats(const struct rule_dpif *,
280 struct flow *, uint64_t packets, uint64_t bytes,
287 struct ofbundle *bundle; /* Bundle that contains this port, if any. */
288 struct list bundle_node; /* In struct ofbundle's "ports" list. */
289 struct cfm *cfm; /* Connectivity Fault Management, if any. */
290 tag_type tag; /* Tag associated with this port. */
293 static struct ofport_dpif *
294 ofport_dpif_cast(const struct ofport *ofport)
296 assert(ofport->ofproto->ofproto_class == &ofproto_dpif_class);
297 return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
300 static void port_run(struct ofport_dpif *);
301 static void port_wait(struct ofport_dpif *);
302 static int set_cfm(struct ofport *, const struct cfm *,
303 const uint16_t *remote_mps, size_t n_remote_mps);
305 struct ofproto_dpif {
311 struct netflow *netflow;
312 struct ofproto_sflow *sflow;
313 struct hmap bundles; /* Contains "struct ofbundle"s. */
314 struct mac_learning *ml;
315 struct ofmirror *mirrors[MAX_MIRRORS];
316 bool has_bonded_bundles;
319 struct timer next_expiration;
323 bool need_revalidate;
324 struct tag_set revalidate_set;
327 static void ofproto_dpif_unixctl_init(void);
329 static struct ofproto_dpif *
330 ofproto_dpif_cast(const struct ofproto *ofproto)
332 assert(ofproto->ofproto_class == &ofproto_dpif_class);
333 return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
336 static struct ofport_dpif *get_ofp_port(struct ofproto_dpif *,
338 static struct ofport_dpif *get_odp_port(struct ofproto_dpif *,
341 /* Packet processing. */
342 static void update_learning_table(struct ofproto_dpif *,
343 const struct flow *, int vlan,
345 static bool is_admissible(struct ofproto_dpif *, const struct flow *,
346 bool have_packet, tag_type *, int *vlanp,
347 struct ofbundle **in_bundlep);
348 static void handle_upcall(struct ofproto_dpif *, struct dpif_upcall *);
350 /* Flow expiration. */
351 static int expire(struct ofproto_dpif *);
354 static int send_packet(struct ofproto_dpif *,
355 uint32_t odp_port, uint16_t vlan_tci,
356 const struct ofpbuf *packet);
358 /* Global variables. */
359 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
361 /* Factory functions. */
364 enumerate_types(struct sset *types)
366 dp_enumerate_types(types);
370 enumerate_names(const char *type, struct sset *names)
372 return dp_enumerate_names(type, names);
376 del(const char *type, const char *name)
381 error = dpif_open(name, type, &dpif);
383 error = dpif_delete(dpif);
389 /* Basic life-cycle. */
391 static struct ofproto *
394 struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
399 dealloc(struct ofproto *ofproto_)
401 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
406 construct(struct ofproto *ofproto_)
408 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
409 const char *name = ofproto->up.name;
413 error = dpif_create_and_open(name, ofproto->up.type, &ofproto->dpif);
415 VLOG_ERR("failed to open datapath %s: %s", name, strerror(error));
419 ofproto->max_ports = dpif_get_max_ports(ofproto->dpif);
421 error = dpif_recv_set_mask(ofproto->dpif,
422 ((1u << DPIF_UC_MISS) |
423 (1u << DPIF_UC_ACTION) |
424 (1u << DPIF_UC_SAMPLE)));
426 VLOG_ERR("failed to listen on datapath %s: %s", name, strerror(error));
427 dpif_close(ofproto->dpif);
430 dpif_flow_flush(ofproto->dpif);
431 dpif_recv_purge(ofproto->dpif);
433 ofproto->netflow = NULL;
434 ofproto->sflow = NULL;
435 hmap_init(&ofproto->bundles);
436 ofproto->ml = mac_learning_create();
437 for (i = 0; i < MAX_MIRRORS; i++) {
438 ofproto->mirrors[i] = NULL;
440 ofproto->has_bonded_bundles = false;
442 timer_set_duration(&ofproto->next_expiration, 1000);
444 hmap_init(&ofproto->facets);
445 ofproto->need_revalidate = false;
446 tag_set_init(&ofproto->revalidate_set);
448 ofproto_dpif_unixctl_init();
454 destruct(struct ofproto *ofproto_)
456 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
459 for (i = 0; i < MAX_MIRRORS; i++) {
460 mirror_destroy(ofproto->mirrors[i]);
463 netflow_destroy(ofproto->netflow);
464 ofproto_sflow_destroy(ofproto->sflow);
465 hmap_destroy(&ofproto->bundles);
466 mac_learning_destroy(ofproto->ml);
468 hmap_destroy(&ofproto->facets);
470 dpif_close(ofproto->dpif);
474 run(struct ofproto *ofproto_)
476 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
477 struct ofport_dpif *ofport;
478 struct ofbundle *bundle;
481 dpif_run(ofproto->dpif);
483 for (i = 0; i < 50; i++) {
484 struct dpif_upcall packet;
487 error = dpif_recv(ofproto->dpif, &packet);
489 if (error == ENODEV) {
490 /* Datapath destroyed. */
496 handle_upcall(ofproto, &packet);
499 if (timer_expired(&ofproto->next_expiration)) {
500 int delay = expire(ofproto);
501 timer_set_duration(&ofproto->next_expiration, delay);
504 if (ofproto->netflow) {
505 netflow_run(ofproto->netflow);
507 if (ofproto->sflow) {
508 ofproto_sflow_run(ofproto->sflow);
511 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
514 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
518 /* Now revalidate if there's anything to do. */
519 if (ofproto->need_revalidate
520 || !tag_set_is_empty(&ofproto->revalidate_set)) {
521 struct tag_set revalidate_set = ofproto->revalidate_set;
522 bool revalidate_all = ofproto->need_revalidate;
523 struct facet *facet, *next;
525 /* Clear the revalidation flags. */
526 tag_set_init(&ofproto->revalidate_set);
527 ofproto->need_revalidate = false;
529 HMAP_FOR_EACH_SAFE (facet, next, hmap_node, &ofproto->facets) {
531 || tag_set_intersects(&revalidate_set, facet->tags)) {
532 facet_revalidate(ofproto, facet);
541 wait(struct ofproto *ofproto_)
543 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
544 struct ofport_dpif *ofport;
545 struct ofbundle *bundle;
547 dpif_wait(ofproto->dpif);
548 dpif_recv_wait(ofproto->dpif);
549 if (ofproto->sflow) {
550 ofproto_sflow_wait(ofproto->sflow);
552 if (!tag_set_is_empty(&ofproto->revalidate_set)) {
553 poll_immediate_wake();
555 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
558 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
561 if (ofproto->need_revalidate) {
562 /* Shouldn't happen, but if it does just go around again. */
563 VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
564 poll_immediate_wake();
566 timer_wait(&ofproto->next_expiration);
571 flush(struct ofproto *ofproto_)
573 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
574 struct facet *facet, *next_facet;
576 HMAP_FOR_EACH_SAFE (facet, next_facet, hmap_node, &ofproto->facets) {
577 /* Mark the facet as not installed so that facet_remove() doesn't
578 * bother trying to uninstall it. There is no point in uninstalling it
579 * individually since we are about to blow away all the facets with
580 * dpif_flow_flush(). */
581 facet->installed = false;
582 facet->dp_packet_count = 0;
583 facet->dp_byte_count = 0;
584 facet_remove(ofproto, facet);
586 dpif_flow_flush(ofproto->dpif);
590 set_netflow(struct ofproto *ofproto_,
591 const struct netflow_options *netflow_options)
593 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
595 if (netflow_options) {
596 if (!ofproto->netflow) {
597 ofproto->netflow = netflow_create();
599 return netflow_set_options(ofproto->netflow, netflow_options);
601 netflow_destroy(ofproto->netflow);
602 ofproto->netflow = NULL;
607 static struct ofport *
610 struct ofport_dpif *port = xmalloc(sizeof *port);
615 port_dealloc(struct ofport *port_)
617 struct ofport_dpif *port = ofport_dpif_cast(port_);
622 port_construct(struct ofport *port_)
624 struct ofport_dpif *port = ofport_dpif_cast(port_);
625 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
627 port->odp_port = ofp_port_to_odp_port(port->up.ofp_port);
630 port->tag = tag_create_random();
632 if (ofproto->sflow) {
633 ofproto_sflow_add_port(ofproto->sflow, port->odp_port,
634 netdev_get_name(port->up.netdev));
641 port_destruct(struct ofport *port_)
643 struct ofport_dpif *port = ofport_dpif_cast(port_);
644 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
646 bundle_remove(port_);
647 set_cfm(port_, NULL, NULL, 0);
648 if (ofproto->sflow) {
649 ofproto_sflow_del_port(ofproto->sflow, port->odp_port);
654 port_modified(struct ofport *port_)
656 struct ofport_dpif *port = ofport_dpif_cast(port_);
658 if (port->bundle && port->bundle->bond) {
659 bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
664 port_reconfigured(struct ofport *port_, ovs_be32 old_config)
666 struct ofport_dpif *port = ofport_dpif_cast(port_);
667 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
668 ovs_be32 changed = old_config ^ port->up.opp.config;
670 if (changed & htonl(OFPPC_NO_RECV | OFPPC_NO_RECV_STP |
671 OFPPC_NO_FWD | OFPPC_NO_FLOOD)) {
672 ofproto->need_revalidate = true;
677 set_sflow(struct ofproto *ofproto_,
678 const struct ofproto_sflow_options *sflow_options)
680 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
681 struct ofproto_sflow *os = ofproto->sflow;
684 struct ofport_dpif *ofport;
686 os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif);
687 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
688 ofproto_sflow_add_port(os, ofport->odp_port,
689 netdev_get_name(ofport->up.netdev));
692 ofproto_sflow_set_options(os, sflow_options);
694 ofproto_sflow_destroy(os);
695 ofproto->sflow = NULL;
701 set_cfm(struct ofport *ofport_, const struct cfm *cfm,
702 const uint16_t *remote_mps, size_t n_remote_mps)
704 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
711 ofport->cfm = cfm_create();
714 ofport->cfm->mpid = cfm->mpid;
715 ofport->cfm->interval = cfm->interval;
716 memcpy(ofport->cfm->maid, cfm->maid, CCM_MAID_LEN);
718 cfm_update_remote_mps(ofport->cfm, remote_mps, n_remote_mps);
720 if (cfm_configure(ofport->cfm)) {
726 cfm_destroy(ofport->cfm);
732 get_cfm(struct ofport *ofport_, const struct cfm **cfmp)
734 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
741 /* Expires all MAC learning entries associated with 'port' and forces ofproto
742 * to revalidate every flow. */
744 bundle_flush_macs(struct ofbundle *bundle)
746 struct ofproto_dpif *ofproto = bundle->ofproto;
747 struct mac_learning *ml = ofproto->ml;
748 struct mac_entry *mac, *next_mac;
750 ofproto->need_revalidate = true;
751 LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
752 if (mac->port.p == bundle) {
753 mac_learning_expire(ml, mac);
758 static struct ofbundle *
759 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
761 struct ofbundle *bundle;
763 HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
765 if (bundle->aux == aux) {
772 /* Looks up each of the 'n_auxes' pointers in 'auxes' as bundles and adds the
773 * ones that are found to 'bundles'. */
775 bundle_lookup_multiple(struct ofproto_dpif *ofproto,
776 void **auxes, size_t n_auxes,
777 struct hmapx *bundles)
782 for (i = 0; i < n_auxes; i++) {
783 struct ofbundle *bundle = bundle_lookup(ofproto, auxes[i]);
785 hmapx_add(bundles, bundle);
791 bundle_del_port(struct ofport_dpif *port)
793 struct ofbundle *bundle = port->bundle;
795 list_remove(&port->bundle_node);
799 lacp_slave_unregister(bundle->lacp, port);
802 bond_slave_unregister(bundle->bond, port);
805 bundle->floodable = true;
806 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
807 if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) {
808 bundle->floodable = false;
814 bundle_add_port(struct ofbundle *bundle, uint32_t ofp_port,
815 struct lacp_slave_settings *lacp)
817 struct ofport_dpif *port;
819 port = get_ofp_port(bundle->ofproto, ofp_port);
824 if (port->bundle != bundle) {
826 bundle_del_port(port);
829 port->bundle = bundle;
830 list_push_back(&bundle->ports, &port->bundle_node);
831 if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) {
832 bundle->floodable = false;
836 lacp_slave_register(bundle->lacp, port, lacp);
843 bundle_destroy(struct ofbundle *bundle)
845 struct ofproto_dpif *ofproto;
846 struct ofport_dpif *port, *next_port;
853 ofproto = bundle->ofproto;
854 for (i = 0; i < MAX_MIRRORS; i++) {
855 struct ofmirror *m = ofproto->mirrors[i];
857 if (m->out == bundle) {
859 } else if (hmapx_find_and_delete(&m->srcs, bundle)
860 || hmapx_find_and_delete(&m->dsts, bundle)) {
861 ofproto->need_revalidate = true;
866 LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
867 bundle_del_port(port);
870 bundle_flush_macs(bundle);
871 hmap_remove(&ofproto->bundles, &bundle->hmap_node);
873 free(bundle->trunks);
874 lacp_destroy(bundle->lacp);
875 bond_destroy(bundle->bond);
880 bundle_set(struct ofproto *ofproto_, void *aux,
881 const struct ofproto_bundle_settings *s)
883 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
884 bool need_flush = false;
885 const unsigned long *trunks;
886 struct ofport_dpif *port;
887 struct ofbundle *bundle;
892 bundle_destroy(bundle_lookup(ofproto, aux));
896 assert(s->n_slaves == 1 || s->bond != NULL);
897 assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
899 bundle = bundle_lookup(ofproto, aux);
901 bundle = xmalloc(sizeof *bundle);
903 bundle->ofproto = ofproto;
904 hmap_insert(&ofproto->bundles, &bundle->hmap_node,
905 hash_pointer(aux, 0));
909 list_init(&bundle->ports);
911 bundle->trunks = NULL;
915 bundle->floodable = true;
917 bundle->src_mirrors = 0;
918 bundle->dst_mirrors = 0;
919 bundle->mirror_out = 0;
922 if (!bundle->name || strcmp(s->name, bundle->name)) {
924 bundle->name = xstrdup(s->name);
930 bundle->lacp = lacp_create();
932 lacp_configure(bundle->lacp, s->lacp);
934 lacp_destroy(bundle->lacp);
938 /* Update set of ports. */
940 for (i = 0; i < s->n_slaves; i++) {
941 if (!bundle_add_port(bundle, s->slaves[i],
942 s->lacp ? &s->lacp_slaves[i] : NULL)) {
946 if (!ok || list_size(&bundle->ports) != s->n_slaves) {
947 struct ofport_dpif *next_port;
949 LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
950 for (i = 0; i < s->n_slaves; i++) {
951 if (s->slaves[i] == odp_port_to_ofp_port(port->odp_port)) {
956 bundle_del_port(port);
960 assert(list_size(&bundle->ports) <= s->n_slaves);
962 if (list_is_empty(&bundle->ports)) {
963 bundle_destroy(bundle);
968 if (s->vlan != bundle->vlan) {
969 bundle->vlan = s->vlan;
973 /* Get trunked VLANs. */
974 trunks = s->vlan == -1 ? NULL : s->trunks;
975 if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
976 free(bundle->trunks);
977 bundle->trunks = vlan_bitmap_clone(trunks);
982 if (!list_is_short(&bundle->ports)) {
983 bundle->ofproto->has_bonded_bundles = true;
985 if (bond_reconfigure(bundle->bond, s->bond)) {
986 ofproto->need_revalidate = true;
989 bundle->bond = bond_create(s->bond);
992 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
993 uint16_t stable_id = (bundle->lacp
994 ? lacp_slave_get_port_id(bundle->lacp, port)
996 bond_slave_register(bundle->bond, port, stable_id,
1000 bond_destroy(bundle->bond);
1001 bundle->bond = NULL;
1004 /* If we changed something that would affect MAC learning, un-learn
1005 * everything on this port and force flow revalidation. */
1007 bundle_flush_macs(bundle);
1014 bundle_remove(struct ofport *port_)
1016 struct ofport_dpif *port = ofport_dpif_cast(port_);
1017 struct ofbundle *bundle = port->bundle;
1020 bundle_del_port(port);
1021 if (list_is_empty(&bundle->ports)) {
1022 bundle_destroy(bundle);
1023 } else if (list_is_short(&bundle->ports)) {
1024 bond_destroy(bundle->bond);
1025 bundle->bond = NULL;
1031 send_pdu_cb(void *port_, const struct lacp_pdu *pdu)
1033 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
1034 struct ofport_dpif *port = port_;
1035 uint8_t ea[ETH_ADDR_LEN];
1038 error = netdev_get_etheraddr(port->up.netdev, ea);
1040 struct lacp_pdu *packet_pdu;
1041 struct ofpbuf packet;
1043 ofpbuf_init(&packet, 0);
1044 packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
1045 sizeof *packet_pdu);
1047 error = netdev_send(port->up.netdev, &packet);
1049 VLOG_WARN_RL(&rl, "port %s: sending LACP PDU on iface %s failed "
1050 "(%s)", port->bundle->name,
1051 netdev_get_name(port->up.netdev), strerror(error));
1053 ofpbuf_uninit(&packet);
1055 VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
1056 "%s (%s)", port->bundle->name,
1057 netdev_get_name(port->up.netdev), strerror(error));
1062 bundle_send_learning_packets(struct ofbundle *bundle)
1064 struct ofproto_dpif *ofproto = bundle->ofproto;
1065 int error, n_packets, n_errors;
1066 struct mac_entry *e;
1068 error = n_packets = n_errors = 0;
1069 LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
1070 if (e->port.p != bundle) {
1071 int ret = bond_send_learning_packet(bundle->bond, e->mac, e->vlan);
1081 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1082 VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
1083 "packets, last error was: %s",
1084 bundle->name, n_errors, n_packets, strerror(error));
1086 VLOG_DBG("bond %s: sent %d gratuitous learning packets",
1087 bundle->name, n_packets);
1092 bundle_run(struct ofbundle *bundle)
1095 lacp_run(bundle->lacp, send_pdu_cb);
1098 struct ofport_dpif *port;
1100 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
1101 bool may_enable = lacp_slave_may_enable(bundle->lacp, port);
1102 bond_slave_set_lacp_may_enable(bundle->bond, port, may_enable);
1105 bond_run(bundle->bond, &bundle->ofproto->revalidate_set,
1106 lacp_negotiated(bundle->lacp));
1107 if (bond_should_send_learning_packets(bundle->bond)) {
1108 bundle_send_learning_packets(bundle);
1114 bundle_wait(struct ofbundle *bundle)
1117 lacp_wait(bundle->lacp);
1120 bond_wait(bundle->bond);
1127 mirror_scan(struct ofproto_dpif *ofproto)
1131 for (idx = 0; idx < MAX_MIRRORS; idx++) {
1132 if (!ofproto->mirrors[idx]) {
1139 static struct ofmirror *
1140 mirror_lookup(struct ofproto_dpif *ofproto, void *aux)
1144 for (i = 0; i < MAX_MIRRORS; i++) {
1145 struct ofmirror *mirror = ofproto->mirrors[i];
1146 if (mirror && mirror->aux == aux) {
1155 mirror_set(struct ofproto *ofproto_, void *aux,
1156 const struct ofproto_mirror_settings *s)
1158 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1159 mirror_mask_t mirror_bit;
1160 struct ofbundle *bundle;
1161 struct ofmirror *mirror;
1162 struct ofbundle *out;
1163 struct hmapx srcs; /* Contains "struct ofbundle *"s. */
1164 struct hmapx dsts; /* Contains "struct ofbundle *"s. */
1167 mirror = mirror_lookup(ofproto, aux);
1169 mirror_destroy(mirror);
1175 idx = mirror_scan(ofproto);
1177 VLOG_WARN("bridge %s: maximum of %d port mirrors reached, "
1179 ofproto->up.name, MAX_MIRRORS, s->name);
1183 mirror = ofproto->mirrors[idx] = xzalloc(sizeof *mirror);
1184 mirror->ofproto = ofproto;
1186 mirror->out_vlan = -1;
1187 mirror->name = NULL;
1190 if (!mirror->name || strcmp(s->name, mirror->name)) {
1192 mirror->name = xstrdup(s->name);
1195 /* Get the new configuration. */
1196 if (s->out_bundle) {
1197 out = bundle_lookup(ofproto, s->out_bundle);
1199 mirror_destroy(mirror);
1205 out_vlan = s->out_vlan;
1207 bundle_lookup_multiple(ofproto, s->srcs, s->n_srcs, &srcs);
1208 bundle_lookup_multiple(ofproto, s->dsts, s->n_dsts, &dsts);
1210 /* If the configuration has not changed, do nothing. */
1211 if (hmapx_equals(&srcs, &mirror->srcs)
1212 && hmapx_equals(&dsts, &mirror->dsts)
1213 && vlan_bitmap_equal(mirror->vlans, s->src_vlans)
1214 && mirror->out == out
1215 && mirror->out_vlan == out_vlan)
1217 hmapx_destroy(&srcs);
1218 hmapx_destroy(&dsts);
1222 hmapx_swap(&srcs, &mirror->srcs);
1223 hmapx_destroy(&srcs);
1225 hmapx_swap(&dsts, &mirror->dsts);
1226 hmapx_destroy(&dsts);
1228 free(mirror->vlans);
1229 mirror->vlans = vlan_bitmap_clone(s->src_vlans);
1232 mirror->out_vlan = out_vlan;
1234 /* Update bundles. */
1235 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
1236 HMAP_FOR_EACH (bundle, hmap_node, &mirror->ofproto->bundles) {
1237 if (hmapx_contains(&mirror->srcs, bundle)) {
1238 bundle->src_mirrors |= mirror_bit;
1240 bundle->src_mirrors &= ~mirror_bit;
1243 if (hmapx_contains(&mirror->dsts, bundle)) {
1244 bundle->dst_mirrors |= mirror_bit;
1246 bundle->dst_mirrors &= ~mirror_bit;
1249 if (mirror->out == bundle) {
1250 bundle->mirror_out |= mirror_bit;
1252 bundle->mirror_out &= ~mirror_bit;
1256 ofproto->need_revalidate = true;
1257 mac_learning_flush(ofproto->ml);
1263 mirror_destroy(struct ofmirror *mirror)
1265 struct ofproto_dpif *ofproto;
1266 mirror_mask_t mirror_bit;
1267 struct ofbundle *bundle;
1273 ofproto = mirror->ofproto;
1274 ofproto->need_revalidate = true;
1275 mac_learning_flush(ofproto->ml);
1277 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
1278 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1279 bundle->src_mirrors &= ~mirror_bit;
1280 bundle->dst_mirrors &= ~mirror_bit;
1281 bundle->mirror_out &= ~mirror_bit;
1284 hmapx_destroy(&mirror->srcs);
1285 hmapx_destroy(&mirror->dsts);
1286 free(mirror->vlans);
1288 ofproto->mirrors[mirror->idx] = NULL;
1294 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
1296 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1297 if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
1298 ofproto->need_revalidate = true;
1299 mac_learning_flush(ofproto->ml);
1305 is_mirror_output_bundle(struct ofproto *ofproto_, void *aux)
1307 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1308 struct ofbundle *bundle = bundle_lookup(ofproto, aux);
1309 return bundle && bundle->mirror_out != 0;
1314 static struct ofport_dpif *
1315 get_ofp_port(struct ofproto_dpif *ofproto, uint16_t ofp_port)
1317 return ofport_dpif_cast(ofproto_get_port(&ofproto->up, ofp_port));
1320 static struct ofport_dpif *
1321 get_odp_port(struct ofproto_dpif *ofproto, uint32_t odp_port)
1323 return get_ofp_port(ofproto, odp_port_to_ofp_port(odp_port));
1327 ofproto_port_from_dpif_port(struct ofproto_port *ofproto_port,
1328 struct dpif_port *dpif_port)
1330 ofproto_port->name = dpif_port->name;
1331 ofproto_port->type = dpif_port->type;
1332 ofproto_port->ofp_port = odp_port_to_ofp_port(dpif_port->port_no);
1336 port_run(struct ofport_dpif *ofport)
1339 cfm_run(ofport->cfm);
1341 if (cfm_should_send_ccm(ofport->cfm)) {
1342 struct ofpbuf packet;
1345 ofpbuf_init(&packet, 0);
1346 ccm = eth_compose(&packet, eth_addr_ccm, ofport->up.opp.hw_addr,
1347 ETH_TYPE_CFM, sizeof *ccm);
1348 cfm_compose_ccm(ofport->cfm, ccm);
1349 send_packet(ofproto_dpif_cast(ofport->up.ofproto),
1350 ofport->odp_port, 0, &packet);
1351 ofpbuf_uninit(&packet);
1357 port_wait(struct ofport_dpif *ofport)
1360 cfm_wait(ofport->cfm);
1365 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
1366 struct ofproto_port *ofproto_port)
1368 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1369 struct dpif_port dpif_port;
1372 error = dpif_port_query_by_name(ofproto->dpif, devname, &dpif_port);
1374 ofproto_port_from_dpif_port(ofproto_port, &dpif_port);
1380 port_add(struct ofproto *ofproto_, struct netdev *netdev, uint16_t *ofp_portp)
1382 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1386 error = dpif_port_add(ofproto->dpif, netdev, &odp_port);
1388 *ofp_portp = odp_port_to_ofp_port(odp_port);
1394 port_del(struct ofproto *ofproto_, uint16_t ofp_port)
1396 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1399 error = dpif_port_del(ofproto->dpif, ofp_port_to_odp_port(ofp_port));
1401 struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
1403 /* The caller is going to close ofport->up.netdev. If this is a
1404 * bonded port, then the bond is using that netdev, so remove it
1405 * from the bond. The client will need to reconfigure everything
1406 * after deleting ports, so then the slave will get re-added. */
1407 bundle_remove(&ofport->up);
1413 struct port_dump_state {
1414 struct dpif_port_dump dump;
1419 port_dump_start(const struct ofproto *ofproto_, void **statep)
1421 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1422 struct port_dump_state *state;
1424 *statep = state = xmalloc(sizeof *state);
1425 dpif_port_dump_start(&state->dump, ofproto->dpif);
1426 state->done = false;
1431 port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_,
1432 struct ofproto_port *port)
1434 struct port_dump_state *state = state_;
1435 struct dpif_port dpif_port;
1437 if (dpif_port_dump_next(&state->dump, &dpif_port)) {
1438 ofproto_port_from_dpif_port(port, &dpif_port);
1441 int error = dpif_port_dump_done(&state->dump);
1443 return error ? error : EOF;
1448 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
1450 struct port_dump_state *state = state_;
1453 dpif_port_dump_done(&state->dump);
1460 port_poll(const struct ofproto *ofproto_, char **devnamep)
1462 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1463 return dpif_port_poll(ofproto->dpif, devnamep);
1467 port_poll_wait(const struct ofproto *ofproto_)
1469 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1470 dpif_port_poll_wait(ofproto->dpif);
1474 port_is_lacp_current(const struct ofport *ofport_)
1476 const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1477 return (ofport->bundle && ofport->bundle->lacp
1478 ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
1482 /* Upcall handling. */
1484 /* Given 'upcall', of type DPIF_UC_ACTION or DPIF_UC_MISS, sends an
1485 * OFPT_PACKET_IN message to each OpenFlow controller as necessary according to
1486 * their individual configurations.
1488 * If 'clone' is true, the caller retains ownership of 'upcall->packet'.
1489 * Otherwise, ownership is transferred to this function. */
1491 send_packet_in(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall,
1492 const struct flow *flow, bool clone)
1494 struct ofputil_packet_in pin;
1496 pin.packet = upcall->packet;
1497 pin.in_port = flow->in_port;
1498 pin.reason = upcall->type == DPIF_UC_MISS ? OFPR_NO_MATCH : OFPR_ACTION;
1499 pin.buffer_id = 0; /* not yet known */
1500 pin.send_len = upcall->userdata;
1501 connmgr_send_packet_in(ofproto->up.connmgr, upcall, flow,
1502 clone ? NULL : upcall->packet);
1506 process_special(struct ofproto_dpif *ofproto, const struct flow *flow,
1507 const struct ofpbuf *packet)
1509 if (cfm_should_process_flow(flow)) {
1510 struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port);
1511 if (ofport && ofport->cfm) {
1512 cfm_process_heartbeat(ofport->cfm, packet);
1515 } else if (flow->dl_type == htons(ETH_TYPE_LACP)) {
1516 struct ofport_dpif *port = get_ofp_port(ofproto, flow->in_port);
1517 if (port && port->bundle && port->bundle->lacp) {
1518 const struct lacp_pdu *pdu = parse_lacp_packet(packet);
1520 lacp_process_pdu(port->bundle->lacp, port, pdu);
1529 handle_miss_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall)
1531 struct facet *facet;
1534 /* Obtain in_port and tun_id, at least. */
1535 odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow);
1537 /* Set header pointers in 'flow'. */
1538 flow_extract(upcall->packet, flow.tun_id, flow.in_port, &flow);
1540 /* Handle 802.1ag and LACP. */
1541 if (process_special(ofproto, &flow, upcall->packet)) {
1542 ofpbuf_delete(upcall->packet);
1546 /* Check with in-band control to see if this packet should be sent
1547 * to the local port regardless of the flow table. */
1548 if (connmgr_msg_in_hook(ofproto->up.connmgr, &flow, upcall->packet)) {
1549 send_packet(ofproto, OFPP_LOCAL, 0, upcall->packet);
1552 facet = facet_lookup_valid(ofproto, &flow);
1554 struct rule_dpif *rule = rule_dpif_lookup(ofproto, &flow);
1556 /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */
1557 struct ofport_dpif *port = get_ofp_port(ofproto, flow.in_port);
1559 if (port->up.opp.config & htonl(OFPPC_NO_PACKET_IN)) {
1560 COVERAGE_INC(ofproto_dpif_no_packet_in);
1561 /* XXX install 'drop' flow entry */
1562 ofpbuf_delete(upcall->packet);
1566 VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16,
1570 send_packet_in(ofproto, upcall, &flow, false);
1574 facet = facet_create(rule, &flow, upcall->packet);
1575 } else if (!facet->may_install) {
1576 /* The facet is not installable, that is, we need to process every
1577 * packet, so process the current packet's actions into 'facet'. */
1578 facet_make_actions(ofproto, facet, upcall->packet);
1581 if (facet->rule->up.cr.priority == FAIL_OPEN_PRIORITY) {
1583 * Extra-special case for fail-open mode.
1585 * We are in fail-open mode and the packet matched the fail-open rule,
1586 * but we are connected to a controller too. We should send the packet
1587 * up to the controller in the hope that it will try to set up a flow
1588 * and thereby allow us to exit fail-open.
1590 * See the top-level comment in fail-open.c for more information.
1592 send_packet_in(ofproto, upcall, &flow, true);
1595 facet_execute(ofproto, facet, upcall->packet);
1596 facet_install(ofproto, facet, false);
1600 handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall)
1604 switch (upcall->type) {
1605 case DPIF_UC_ACTION:
1606 COVERAGE_INC(ofproto_dpif_ctlr_action);
1607 odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow);
1608 send_packet_in(ofproto, upcall, &flow, false);
1611 case DPIF_UC_SAMPLE:
1612 if (ofproto->sflow) {
1613 odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow);
1614 ofproto_sflow_received(ofproto->sflow, upcall, &flow);
1616 ofpbuf_delete(upcall->packet);
1620 handle_miss_upcall(ofproto, upcall);
1623 case DPIF_N_UC_TYPES:
1625 VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, upcall->type);
1630 /* Flow expiration. */
1632 static int facet_max_idle(const struct ofproto_dpif *);
1633 static void update_stats(struct ofproto_dpif *);
1634 static void rule_expire(struct rule_dpif *);
1635 static void expire_facets(struct ofproto_dpif *, int dp_max_idle);
1637 /* This function is called periodically by run(). Its job is to collect
1638 * updates for the flows that have been installed into the datapath, most
1639 * importantly when they last were used, and then use that information to
1640 * expire flows that have not been used recently.
1642 * Returns the number of milliseconds after which it should be called again. */
1644 expire(struct ofproto_dpif *ofproto)
1646 struct rule_dpif *rule, *next_rule;
1647 struct cls_cursor cursor;
1650 /* Update stats for each flow in the datapath. */
1651 update_stats(ofproto);
1653 /* Expire facets that have been idle too long. */
1654 dp_max_idle = facet_max_idle(ofproto);
1655 expire_facets(ofproto, dp_max_idle);
1657 /* Expire OpenFlow flows whose idle_timeout or hard_timeout has passed. */
1658 cls_cursor_init(&cursor, &ofproto->up.cls, NULL);
1659 CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
1663 /* All outstanding data in existing flows has been accounted, so it's a
1664 * good time to do bond rebalancing. */
1665 if (ofproto->has_bonded_bundles) {
1666 struct ofbundle *bundle;
1668 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1670 bond_rebalance(bundle->bond, &ofproto->revalidate_set);
1675 return MIN(dp_max_idle, 1000);
1678 /* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
1680 * This function also pushes statistics updates to rules which each facet
1681 * resubmits into. Generally these statistics will be accurate. However, if a
1682 * facet changes the rule it resubmits into at some time in between
1683 * update_stats() runs, it is possible that statistics accrued to the
1684 * old rule will be incorrectly attributed to the new rule. This could be
1685 * avoided by calling update_stats() whenever rules are created or
1686 * deleted. However, the performance impact of making so many calls to the
1687 * datapath do not justify the benefit of having perfectly accurate statistics.
1690 update_stats(struct ofproto_dpif *p)
1692 const struct dpif_flow_stats *stats;
1693 struct dpif_flow_dump dump;
1694 const struct nlattr *key;
1697 dpif_flow_dump_start(&dump, p->dpif);
1698 while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) {
1699 struct facet *facet;
1702 if (odp_flow_key_to_flow(key, key_len, &flow)) {
1706 odp_flow_key_format(key, key_len, &s);
1707 VLOG_WARN_RL(&rl, "failed to convert ODP flow key to flow: %s",
1713 facet = facet_find(p, &flow);
1715 if (facet && facet->installed) {
1717 if (stats->n_packets >= facet->dp_packet_count) {
1718 uint64_t extra = stats->n_packets - facet->dp_packet_count;
1719 facet->packet_count += extra;
1721 VLOG_WARN_RL(&rl, "unexpected packet count from the datapath");
1724 if (stats->n_bytes >= facet->dp_byte_count) {
1725 facet->byte_count += stats->n_bytes - facet->dp_byte_count;
1727 VLOG_WARN_RL(&rl, "unexpected byte count from datapath");
1730 facet->dp_packet_count = stats->n_packets;
1731 facet->dp_byte_count = stats->n_bytes;
1733 facet_update_time(p, facet, stats->used);
1734 facet_account(p, facet, stats->n_bytes);
1735 facet_push_stats(facet);
1737 /* There's a flow in the datapath that we know nothing about.
1739 COVERAGE_INC(facet_unexpected);
1740 dpif_flow_del(p->dpif, key, key_len, NULL);
1743 dpif_flow_dump_done(&dump);
1746 /* Calculates and returns the number of milliseconds of idle time after which
1747 * facets should expire from the datapath and we should fold their statistics
1748 * into their parent rules in userspace. */
1750 facet_max_idle(const struct ofproto_dpif *ofproto)
1753 * Idle time histogram.
1755 * Most of the time a switch has a relatively small number of facets. When
1756 * this is the case we might as well keep statistics for all of them in
1757 * userspace and to cache them in the kernel datapath for performance as
1760 * As the number of facets increases, the memory required to maintain
1761 * statistics about them in userspace and in the kernel becomes
1762 * significant. However, with a large number of facets it is likely that
1763 * only a few of them are "heavy hitters" that consume a large amount of
1764 * bandwidth. At this point, only heavy hitters are worth caching in the
1765 * kernel and maintaining in userspaces; other facets we can discard.
1767 * The technique used to compute the idle time is to build a histogram with
1768 * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each facet
1769 * that is installed in the kernel gets dropped in the appropriate bucket.
1770 * After the histogram has been built, we compute the cutoff so that only
1771 * the most-recently-used 1% of facets (but at least 1000 flows) are kept
1772 * cached. At least the most-recently-used bucket of facets is kept, so
1773 * actually an arbitrary number of facets can be kept in any given
1774 * expiration run (though the next run will delete most of those unless
1775 * they receive additional data).
1777 * This requires a second pass through the facets, in addition to the pass
1778 * made by update_stats(), because the former function never looks
1779 * at uninstallable facets.
1781 enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
1782 enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
1783 int buckets[N_BUCKETS] = { 0 };
1784 struct facet *facet;
1789 total = hmap_count(&ofproto->facets);
1790 if (total <= 1000) {
1791 return N_BUCKETS * BUCKET_WIDTH;
1794 /* Build histogram. */
1796 HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
1797 long long int idle = now - facet->used;
1798 int bucket = (idle <= 0 ? 0
1799 : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
1800 : (unsigned int) idle / BUCKET_WIDTH);
1804 /* Find the first bucket whose flows should be expired. */
1805 for (bucket = 0; bucket < N_BUCKETS; bucket++) {
1806 if (buckets[bucket]) {
1809 subtotal += buckets[bucket++];
1810 } while (bucket < N_BUCKETS && subtotal < MAX(1000, total / 100));
1815 if (VLOG_IS_DBG_ENABLED()) {
1819 ds_put_cstr(&s, "keep");
1820 for (i = 0; i < N_BUCKETS; i++) {
1822 ds_put_cstr(&s, ", drop");
1825 ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
1828 VLOG_INFO("%s: %s (msec:count)", ofproto->up.name, ds_cstr(&s));
1832 return bucket * BUCKET_WIDTH;
1836 facet_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet)
1838 if (ofproto->netflow && !facet_is_controller_flow(facet) &&
1839 netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) {
1840 struct ofexpired expired;
1842 if (facet->installed) {
1843 struct dpif_flow_stats stats;
1845 facet_put__(ofproto, facet, facet->actions, facet->actions_len,
1847 facet_update_stats(ofproto, facet, &stats);
1850 expired.flow = facet->flow;
1851 expired.packet_count = facet->packet_count;
1852 expired.byte_count = facet->byte_count;
1853 expired.used = facet->used;
1854 netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
1859 expire_facets(struct ofproto_dpif *ofproto, int dp_max_idle)
1861 long long int cutoff = time_msec() - dp_max_idle;
1862 struct facet *facet, *next_facet;
1864 HMAP_FOR_EACH_SAFE (facet, next_facet, hmap_node, &ofproto->facets) {
1865 facet_active_timeout(ofproto, facet);
1866 if (facet->used < cutoff) {
1867 facet_remove(ofproto, facet);
1872 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
1873 * then delete it entirely. */
1875 rule_expire(struct rule_dpif *rule)
1877 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
1878 struct facet *facet, *next_facet;
1882 /* Has 'rule' expired? */
1884 if (rule->up.hard_timeout
1885 && now > rule->up.created + rule->up.hard_timeout * 1000) {
1886 reason = OFPRR_HARD_TIMEOUT;
1887 } else if (rule->up.idle_timeout && list_is_empty(&rule->facets)
1888 && now > rule->used + rule->up.idle_timeout * 1000) {
1889 reason = OFPRR_IDLE_TIMEOUT;
1894 COVERAGE_INC(ofproto_dpif_expired);
1896 /* Update stats. (This is a no-op if the rule expired due to an idle
1897 * timeout, because that only happens when the rule has no facets left.) */
1898 LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) {
1899 facet_remove(ofproto, facet);
1902 /* Get rid of the rule. */
1903 ofproto_rule_expire(&rule->up, reason);
1908 /* Creates and returns a new facet owned by 'rule', given a 'flow' and an
1909 * example 'packet' within that flow.
1911 * The caller must already have determined that no facet with an identical
1912 * 'flow' exists in 'ofproto' and that 'flow' is the best match for 'rule' in
1913 * the ofproto's classifier table. */
1914 static struct facet *
1915 facet_create(struct rule_dpif *rule, const struct flow *flow,
1916 const struct ofpbuf *packet)
1918 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
1919 struct facet *facet;
1921 facet = xzalloc(sizeof *facet);
1922 facet->used = time_msec();
1923 hmap_insert(&ofproto->facets, &facet->hmap_node, flow_hash(flow, 0));
1924 list_push_back(&rule->facets, &facet->list_node);
1926 facet->flow = *flow;
1927 netflow_flow_init(&facet->nf_flow);
1928 netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
1930 facet_make_actions(ofproto, facet, packet);
1936 facet_free(struct facet *facet)
1938 free(facet->actions);
1942 /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on
1943 * 'packet', which arrived on 'in_port'.
1945 * Takes ownership of 'packet'. */
1947 execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
1948 const struct nlattr *odp_actions, size_t actions_len,
1949 struct ofpbuf *packet)
1951 if (actions_len == NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t))
1952 && odp_actions->nla_type == ODP_ACTION_ATTR_CONTROLLER) {
1953 /* As an optimization, avoid a round-trip from userspace to kernel to
1954 * userspace. This also avoids possibly filling up kernel packet
1955 * buffers along the way. */
1956 struct dpif_upcall upcall;
1958 upcall.type = DPIF_UC_ACTION;
1959 upcall.packet = packet;
1962 upcall.userdata = nl_attr_get_u64(odp_actions);
1963 upcall.sample_pool = 0;
1964 upcall.actions = NULL;
1965 upcall.actions_len = 0;
1967 send_packet_in(ofproto, &upcall, flow, false);
1973 error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet);
1974 ofpbuf_delete(packet);
1979 /* Executes the actions indicated by 'facet' on 'packet' and credits 'facet''s
1980 * statistics appropriately. 'packet' must have at least sizeof(struct
1981 * ofp_packet_in) bytes of headroom.
1983 * For correct results, 'packet' must actually be in 'facet''s flow; that is,
1984 * applying flow_extract() to 'packet' would yield the same flow as
1987 * 'facet' must have accurately composed ODP actions; that is, it must not be
1988 * in need of revalidation.
1990 * Takes ownership of 'packet'. */
1992 facet_execute(struct ofproto_dpif *ofproto, struct facet *facet,
1993 struct ofpbuf *packet)
1995 struct dpif_flow_stats stats;
1997 assert(ofpbuf_headroom(packet) >= sizeof(struct ofp_packet_in));
1999 flow_extract_stats(&facet->flow, packet, &stats);
2000 stats.used = time_msec();
2001 if (execute_odp_actions(ofproto, &facet->flow,
2002 facet->actions, facet->actions_len, packet)) {
2003 facet_update_stats(ofproto, facet, &stats);
2007 /* Remove 'facet' from 'ofproto' and free up the associated memory:
2009 * - If 'facet' was installed in the datapath, uninstalls it and updates its
2010 * rule's statistics, via facet_uninstall().
2012 * - Removes 'facet' from its rule and from ofproto->facets.
2015 facet_remove(struct ofproto_dpif *ofproto, struct facet *facet)
2017 facet_uninstall(ofproto, facet);
2018 facet_flush_stats(ofproto, facet);
2019 hmap_remove(&ofproto->facets, &facet->hmap_node);
2020 list_remove(&facet->list_node);
2024 /* Composes the ODP actions for 'facet' based on its rule's actions. */
2026 facet_make_actions(struct ofproto_dpif *p, struct facet *facet,
2027 const struct ofpbuf *packet)
2029 const struct rule_dpif *rule = facet->rule;
2030 struct ofpbuf *odp_actions;
2031 struct action_xlate_ctx ctx;
2033 action_xlate_ctx_init(&ctx, p, &facet->flow, packet);
2034 odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions);
2035 facet->tags = ctx.tags;
2036 facet->may_install = ctx.may_set_up_flow;
2037 facet->nf_flow.output_iface = ctx.nf_output_iface;
2039 if (facet->actions_len != odp_actions->size
2040 || memcmp(facet->actions, odp_actions->data, odp_actions->size)) {
2041 free(facet->actions);
2042 facet->actions_len = odp_actions->size;
2043 facet->actions = xmemdup(odp_actions->data, odp_actions->size);
2046 ofpbuf_delete(odp_actions);
2050 facet_put__(struct ofproto_dpif *ofproto, struct facet *facet,
2051 const struct nlattr *actions, size_t actions_len,
2052 struct dpif_flow_stats *stats)
2054 struct odputil_keybuf keybuf;
2055 enum dpif_flow_put_flags flags;
2058 flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
2060 flags |= DPIF_FP_ZERO_STATS;
2061 facet->dp_packet_count = 0;
2062 facet->dp_byte_count = 0;
2065 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2066 odp_flow_key_from_flow(&key, &facet->flow);
2068 return dpif_flow_put(ofproto->dpif, flags, key.data, key.size,
2069 actions, actions_len, stats);
2072 /* If 'facet' is installable, inserts or re-inserts it into 'p''s datapath. If
2073 * 'zero_stats' is true, clears any existing statistics from the datapath for
2076 facet_install(struct ofproto_dpif *p, struct facet *facet, bool zero_stats)
2078 struct dpif_flow_stats stats;
2080 if (facet->may_install
2081 && !facet_put__(p, facet, facet->actions, facet->actions_len,
2082 zero_stats ? &stats : NULL)) {
2083 facet->installed = true;
2088 facet_account(struct ofproto_dpif *ofproto,
2089 struct facet *facet, uint64_t extra_bytes)
2091 uint64_t total_bytes, n_bytes;
2092 struct ofbundle *in_bundle;
2093 const struct nlattr *a;
2098 total_bytes = facet->byte_count + extra_bytes;
2099 if (total_bytes <= facet->accounted_bytes) {
2102 n_bytes = total_bytes - facet->accounted_bytes;
2103 facet->accounted_bytes = total_bytes;
2105 /* Test that 'tags' is nonzero to ensure that only flows that include an
2106 * OFPP_NORMAL action are used for learning and bond slave rebalancing.
2107 * This works because OFPP_NORMAL always sets a nonzero tag value.
2109 * Feed information from the active flows back into the learning table to
2110 * ensure that table is always in sync with what is actually flowing
2111 * through the datapath. */
2113 || !is_admissible(ofproto, &facet->flow, false, &dummy,
2114 &vlan, &in_bundle)) {
2118 update_learning_table(ofproto, &facet->flow, vlan, in_bundle);
2120 if (!ofproto->has_bonded_bundles) {
2123 NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->actions, facet->actions_len) {
2124 if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT) {
2125 struct ofport_dpif *port;
2127 port = get_odp_port(ofproto, nl_attr_get_u32(a));
2128 if (port && port->bundle && port->bundle->bond) {
2129 bond_account(port->bundle->bond, &facet->flow, vlan, n_bytes);
2135 /* If 'rule' is installed in the datapath, uninstalls it. */
2137 facet_uninstall(struct ofproto_dpif *p, struct facet *facet)
2139 if (facet->installed) {
2140 struct odputil_keybuf keybuf;
2141 struct dpif_flow_stats stats;
2144 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2145 odp_flow_key_from_flow(&key, &facet->flow);
2147 if (!dpif_flow_del(p->dpif, key.data, key.size, &stats)) {
2148 facet_update_stats(p, facet, &stats);
2150 facet->installed = false;
2151 facet->dp_packet_count = 0;
2152 facet->dp_byte_count = 0;
2154 assert(facet->dp_packet_count == 0);
2155 assert(facet->dp_byte_count == 0);
2159 /* Returns true if the only action for 'facet' is to send to the controller.
2160 * (We don't report NetFlow expiration messages for such facets because they
2161 * are just part of the control logic for the network, not real traffic). */
2163 facet_is_controller_flow(struct facet *facet)
2166 && facet->rule->up.n_actions == 1
2167 && action_outputs_to_port(&facet->rule->up.actions[0],
2168 htons(OFPP_CONTROLLER)));
2171 /* Folds all of 'facet''s statistics into its rule. Also updates the
2172 * accounting ofhook and emits a NetFlow expiration if appropriate. All of
2173 * 'facet''s statistics in the datapath should have been zeroed and folded into
2174 * its packet and byte counts before this function is called. */
2176 facet_flush_stats(struct ofproto_dpif *ofproto, struct facet *facet)
2178 assert(!facet->dp_byte_count);
2179 assert(!facet->dp_packet_count);
2181 facet_push_stats(facet);
2182 facet_account(ofproto, facet, 0);
2184 if (ofproto->netflow && !facet_is_controller_flow(facet)) {
2185 struct ofexpired expired;
2186 expired.flow = facet->flow;
2187 expired.packet_count = facet->packet_count;
2188 expired.byte_count = facet->byte_count;
2189 expired.used = facet->used;
2190 netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
2193 facet->rule->packet_count += facet->packet_count;
2194 facet->rule->byte_count += facet->byte_count;
2196 /* Reset counters to prevent double counting if 'facet' ever gets
2198 facet->packet_count = 0;
2199 facet->byte_count = 0;
2200 facet->rs_packet_count = 0;
2201 facet->rs_byte_count = 0;
2202 facet->accounted_bytes = 0;
2204 netflow_flow_clear(&facet->nf_flow);
2207 /* Searches 'ofproto''s table of facets for one exactly equal to 'flow'.
2208 * Returns it if found, otherwise a null pointer.
2210 * The returned facet might need revalidation; use facet_lookup_valid()
2211 * instead if that is important. */
2212 static struct facet *
2213 facet_find(struct ofproto_dpif *ofproto, const struct flow *flow)
2215 struct facet *facet;
2217 HMAP_FOR_EACH_WITH_HASH (facet, hmap_node, flow_hash(flow, 0),
2219 if (flow_equal(flow, &facet->flow)) {
2227 /* Searches 'ofproto''s table of facets for one exactly equal to 'flow'.
2228 * Returns it if found, otherwise a null pointer.
2230 * The returned facet is guaranteed to be valid. */
2231 static struct facet *
2232 facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
2234 struct facet *facet = facet_find(ofproto, flow);
2236 /* The facet we found might not be valid, since we could be in need of
2237 * revalidation. If it is not valid, don't return it. */
2239 && ofproto->need_revalidate
2240 && !facet_revalidate(ofproto, facet)) {
2241 COVERAGE_INC(facet_invalidated);
2248 /* Re-searches 'ofproto''s classifier for a rule matching 'facet':
2250 * - If the rule found is different from 'facet''s current rule, moves
2251 * 'facet' to the new rule and recompiles its actions.
2253 * - If the rule found is the same as 'facet''s current rule, leaves 'facet'
2254 * where it is and recompiles its actions anyway.
2256 * - If there is none, destroys 'facet'.
2258 * Returns true if 'facet' still exists, false if it has been destroyed. */
2260 facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet)
2262 struct action_xlate_ctx ctx;
2263 struct ofpbuf *odp_actions;
2264 struct rule_dpif *new_rule;
2265 bool actions_changed;
2267 COVERAGE_INC(facet_revalidate);
2269 /* Determine the new rule. */
2270 new_rule = rule_dpif_lookup(ofproto, &facet->flow);
2272 /* No new rule, so delete the facet. */
2273 facet_remove(ofproto, facet);
2277 /* Calculate new ODP actions.
2279 * We do not modify any 'facet' state yet, because we might need to, e.g.,
2280 * emit a NetFlow expiration and, if so, we need to have the old state
2281 * around to properly compose it. */
2282 action_xlate_ctx_init(&ctx, ofproto, &facet->flow, NULL);
2283 odp_actions = xlate_actions(&ctx,
2284 new_rule->up.actions, new_rule->up.n_actions);
2285 actions_changed = (facet->actions_len != odp_actions->size
2286 || memcmp(facet->actions, odp_actions->data,
2287 facet->actions_len));
2289 /* If the ODP actions changed or the installability changed, then we need
2290 * to talk to the datapath. */
2291 if (actions_changed || ctx.may_set_up_flow != facet->installed) {
2292 if (ctx.may_set_up_flow) {
2293 struct dpif_flow_stats stats;
2295 facet_put__(ofproto, facet,
2296 odp_actions->data, odp_actions->size, &stats);
2297 facet_update_stats(ofproto, facet, &stats);
2299 facet_uninstall(ofproto, facet);
2302 /* The datapath flow is gone or has zeroed stats, so push stats out of
2303 * 'facet' into 'rule'. */
2304 facet_flush_stats(ofproto, facet);
2307 /* Update 'facet' now that we've taken care of all the old state. */
2308 facet->tags = ctx.tags;
2309 facet->nf_flow.output_iface = ctx.nf_output_iface;
2310 facet->may_install = ctx.may_set_up_flow;
2311 if (actions_changed) {
2312 free(facet->actions);
2313 facet->actions_len = odp_actions->size;
2314 facet->actions = xmemdup(odp_actions->data, odp_actions->size);
2316 if (facet->rule != new_rule) {
2317 COVERAGE_INC(facet_changed_rule);
2318 list_remove(&facet->list_node);
2319 list_push_back(&new_rule->facets, &facet->list_node);
2320 facet->rule = new_rule;
2321 facet->used = new_rule->up.created;
2322 facet->rs_used = facet->used;
2325 ofpbuf_delete(odp_actions);
2330 /* Updates 'facet''s used time. Caller is responsible for calling
2331 * facet_push_stats() to update the flows which 'facet' resubmits into. */
2333 facet_update_time(struct ofproto_dpif *ofproto, struct facet *facet,
2336 if (used > facet->used) {
2338 if (used > facet->rule->used) {
2339 facet->rule->used = used;
2341 netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, used);
2345 /* Folds the statistics from 'stats' into the counters in 'facet'.
2347 * Because of the meaning of a facet's counters, it only makes sense to do this
2348 * if 'stats' are not tracked in the datapath, that is, if 'stats' represents a
2349 * packet that was sent by hand or if it represents statistics that have been
2350 * cleared out of the datapath. */
2352 facet_update_stats(struct ofproto_dpif *ofproto, struct facet *facet,
2353 const struct dpif_flow_stats *stats)
2355 if (stats->n_packets || stats->used > facet->used) {
2356 facet_update_time(ofproto, facet, stats->used);
2357 facet->packet_count += stats->n_packets;
2358 facet->byte_count += stats->n_bytes;
2359 facet_push_stats(facet);
2360 netflow_flow_update_flags(&facet->nf_flow, stats->tcp_flags);
2365 facet_push_stats(struct facet *facet)
2367 uint64_t rs_packets, rs_bytes;
2369 assert(facet->packet_count >= facet->rs_packet_count);
2370 assert(facet->byte_count >= facet->rs_byte_count);
2371 assert(facet->used >= facet->rs_used);
2373 rs_packets = facet->packet_count - facet->rs_packet_count;
2374 rs_bytes = facet->byte_count - facet->rs_byte_count;
2376 if (rs_packets || rs_bytes || facet->used > facet->rs_used) {
2377 facet->rs_packet_count = facet->packet_count;
2378 facet->rs_byte_count = facet->byte_count;
2379 facet->rs_used = facet->used;
2381 flow_push_stats(facet->rule, &facet->flow,
2382 rs_packets, rs_bytes, facet->used);
2386 struct ofproto_push {
2387 struct action_xlate_ctx ctx;
2394 push_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule)
2396 struct ofproto_push *push = CONTAINER_OF(ctx, struct ofproto_push, ctx);
2399 rule->packet_count += push->packets;
2400 rule->byte_count += push->bytes;
2401 rule->used = MAX(push->used, rule->used);
2405 /* Pushes flow statistics to the rules which 'flow' resubmits into given
2406 * 'rule''s actions. */
2408 flow_push_stats(const struct rule_dpif *rule,
2409 struct flow *flow, uint64_t packets, uint64_t bytes,
2412 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2413 struct ofproto_push push;
2415 push.packets = packets;
2419 action_xlate_ctx_init(&push.ctx, ofproto, flow, NULL);
2420 push.ctx.resubmit_hook = push_resubmit;
2421 ofpbuf_delete(xlate_actions(&push.ctx,
2422 rule->up.actions, rule->up.n_actions));
2427 static struct rule_dpif *
2428 rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow)
2430 return rule_dpif_cast(ofproto_rule_lookup(&ofproto->up, flow));
2433 static struct rule *
2436 struct rule_dpif *rule = xmalloc(sizeof *rule);
2441 rule_dealloc(struct rule *rule_)
2443 struct rule_dpif *rule = rule_dpif_cast(rule_);
2448 rule_construct(struct rule *rule_)
2450 struct rule_dpif *rule = rule_dpif_cast(rule_);
2451 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2452 struct cls_rule *displaced_rule;
2454 rule->used = rule->up.created;
2455 rule->packet_count = 0;
2456 rule->byte_count = 0;
2457 list_init(&rule->facets);
2459 displaced_rule = classifier_insert(&ofproto->up.cls, &rule->up.cr);
2460 if (displaced_rule) {
2461 ofproto_rule_destroy(rule_from_cls_rule(displaced_rule));
2463 ofproto->need_revalidate = true;
2469 rule_destruct(struct rule *rule_)
2471 struct rule_dpif *rule = rule_dpif_cast(rule_);
2472 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2473 struct facet *facet, *next_facet;
2475 ofproto->need_revalidate = true;
2476 LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) {
2477 facet_revalidate(ofproto, facet);
2482 rule_remove(struct rule *rule_)
2484 struct rule_dpif *rule = rule_dpif_cast(rule_);
2485 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2487 ofproto->need_revalidate = true;
2488 classifier_remove(&ofproto->up.cls, &rule->up.cr);
2492 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
2494 struct rule_dpif *rule = rule_dpif_cast(rule_);
2495 struct facet *facet;
2497 /* Start from historical data for 'rule' itself that are no longer tracked
2498 * in facets. This counts, for example, facets that have expired. */
2499 *packets = rule->packet_count;
2500 *bytes = rule->byte_count;
2502 /* Add any statistics that are tracked by facets. This includes
2503 * statistical data recently updated by ofproto_update_stats() as well as
2504 * stats for packets that were executed "by hand" via dpif_execute(). */
2505 LIST_FOR_EACH (facet, list_node, &rule->facets) {
2506 *packets += facet->packet_count;
2507 *bytes += facet->byte_count;
2512 rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet)
2514 struct rule_dpif *rule = rule_dpif_cast(rule_);
2515 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2516 struct action_xlate_ctx ctx;
2517 struct ofpbuf *odp_actions;
2518 struct facet *facet;
2521 /* First look for a related facet. If we find one, account it to that. */
2522 facet = facet_lookup_valid(ofproto, flow);
2523 if (facet && facet->rule == rule) {
2524 facet_execute(ofproto, facet, packet);
2528 /* Otherwise, if 'rule' is in fact the correct rule for 'packet', then
2529 * create a new facet for it and use that. */
2530 if (rule_dpif_lookup(ofproto, flow) == rule) {
2531 facet = facet_create(rule, flow, packet);
2532 facet_execute(ofproto, facet, packet);
2533 facet_install(ofproto, facet, true);
2537 /* We can't account anything to a facet. If we were to try, then that
2538 * facet would have a non-matching rule, busting our invariants. */
2539 action_xlate_ctx_init(&ctx, ofproto, flow, packet);
2540 odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions);
2541 size = packet->size;
2542 if (execute_odp_actions(ofproto, flow, odp_actions->data,
2543 odp_actions->size, packet)) {
2544 rule->used = time_msec();
2545 rule->packet_count++;
2546 rule->byte_count += size;
2547 flow_push_stats(rule, flow, 1, size, rule->used);
2549 ofpbuf_delete(odp_actions);
2553 rule_modify_actions(struct rule *rule_,
2554 const union ofp_action *actions, size_t n_actions)
2556 struct rule_dpif *rule = rule_dpif_cast(rule_);
2557 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2560 error = validate_actions(actions, n_actions, &rule->up.cr.flow,
2561 ofproto->max_ports);
2563 ofproto->need_revalidate = true;
2568 /* Sends 'packet' out of port 'odp_port' within 'ofproto'. If 'vlan_tci' is
2569 * zero the packet will not have any 802.1Q hader; if it is nonzero, then the
2570 * packet will be sent with the VLAN TCI specified by 'vlan_tci & ~VLAN_CFI'.
2572 * Returns 0 if successful, otherwise a positive errno value. */
2574 send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, uint16_t vlan_tci,
2575 const struct ofpbuf *packet)
2577 struct ofpbuf odp_actions;
2580 ofpbuf_init(&odp_actions, 32);
2581 if (vlan_tci != 0) {
2582 nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_SET_DL_TCI,
2583 ntohs(vlan_tci & ~VLAN_CFI));
2585 nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port);
2586 error = dpif_execute(ofproto->dpif, odp_actions.data, odp_actions.size,
2588 ofpbuf_uninit(&odp_actions);
2591 VLOG_WARN_RL(&rl, "%s: failed to send packet on port %"PRIu32" (%s)",
2592 ofproto->up.name, odp_port, strerror(error));
2597 /* OpenFlow to ODP action translation. */
2599 static void do_xlate_actions(const union ofp_action *in, size_t n_in,
2600 struct action_xlate_ctx *ctx);
2601 static bool xlate_normal(struct action_xlate_ctx *);
2604 add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port)
2606 const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
2607 uint16_t odp_port = ofp_port_to_odp_port(ofp_port);
2610 if (ofport->up.opp.config & htonl(OFPPC_NO_FWD)) {
2611 /* Forwarding disabled on port. */
2616 * We don't have an ofport record for this port, but it doesn't hurt to
2617 * allow forwarding to it anyhow. Maybe such a port will appear later
2618 * and we're pre-populating the flow table.
2622 nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port);
2623 ctx->nf_output_iface = ofp_port;
2627 xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port)
2629 if (ctx->recurse < MAX_RESUBMIT_RECURSION) {
2630 struct rule_dpif *rule;
2631 uint16_t old_in_port;
2633 /* Look up a flow with 'in_port' as the input port. Then restore the
2634 * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
2635 * have surprising behavior). */
2636 old_in_port = ctx->flow.in_port;
2637 ctx->flow.in_port = in_port;
2638 rule = rule_dpif_lookup(ctx->ofproto, &ctx->flow);
2639 ctx->flow.in_port = old_in_port;
2641 if (ctx->resubmit_hook) {
2642 ctx->resubmit_hook(ctx, rule);
2647 do_xlate_actions(rule->up.actions, rule->up.n_actions, ctx);
2651 static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1);
2653 VLOG_ERR_RL(&recurse_rl, "NXAST_RESUBMIT recursed over %d times",
2654 MAX_RESUBMIT_RECURSION);
2659 flood_packets(struct ofproto_dpif *ofproto,
2660 uint16_t ofp_in_port, ovs_be32 mask,
2661 uint16_t *nf_output_iface, struct ofpbuf *odp_actions)
2663 struct ofport_dpif *ofport;
2665 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
2666 uint16_t ofp_port = ofport->up.ofp_port;
2667 if (ofp_port != ofp_in_port && !(ofport->up.opp.config & mask)) {
2668 nl_msg_put_u32(odp_actions, ODP_ACTION_ATTR_OUTPUT,
2672 *nf_output_iface = NF_OUT_FLOOD;
2676 xlate_output_action__(struct action_xlate_ctx *ctx,
2677 uint16_t port, uint16_t max_len)
2679 uint16_t prev_nf_output_iface = ctx->nf_output_iface;
2681 ctx->nf_output_iface = NF_OUT_DROP;
2685 add_output_action(ctx, ctx->flow.in_port);
2688 xlate_table_action(ctx, ctx->flow.in_port);
2694 flood_packets(ctx->ofproto, ctx->flow.in_port, htonl(OFPPC_NO_FLOOD),
2695 &ctx->nf_output_iface, ctx->odp_actions);
2698 flood_packets(ctx->ofproto, ctx->flow.in_port, htonl(0),
2699 &ctx->nf_output_iface, ctx->odp_actions);
2701 case OFPP_CONTROLLER:
2702 nl_msg_put_u64(ctx->odp_actions, ODP_ACTION_ATTR_CONTROLLER, max_len);
2705 add_output_action(ctx, OFPP_LOCAL);
2708 if (port != ctx->flow.in_port) {
2709 add_output_action(ctx, port);
2714 if (prev_nf_output_iface == NF_OUT_FLOOD) {
2715 ctx->nf_output_iface = NF_OUT_FLOOD;
2716 } else if (ctx->nf_output_iface == NF_OUT_DROP) {
2717 ctx->nf_output_iface = prev_nf_output_iface;
2718 } else if (prev_nf_output_iface != NF_OUT_DROP &&
2719 ctx->nf_output_iface != NF_OUT_FLOOD) {
2720 ctx->nf_output_iface = NF_OUT_MULTI;
2725 xlate_output_action(struct action_xlate_ctx *ctx,
2726 const struct ofp_action_output *oao)
2728 xlate_output_action__(ctx, ntohs(oao->port), ntohs(oao->max_len));
2731 /* If the final ODP action in 'ctx' is "pop priority", drop it, as an
2732 * optimization, because we're going to add another action that sets the
2733 * priority immediately after, or because there are no actions following the
2736 remove_pop_action(struct action_xlate_ctx *ctx)
2738 if (ctx->odp_actions->size == ctx->last_pop_priority) {
2739 ctx->odp_actions->size -= NLA_ALIGN(NLA_HDRLEN);
2740 ctx->last_pop_priority = -1;
2745 add_pop_action(struct action_xlate_ctx *ctx)
2747 if (ctx->odp_actions->size != ctx->last_pop_priority) {
2748 nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_POP_PRIORITY);
2749 ctx->last_pop_priority = ctx->odp_actions->size;
2754 xlate_enqueue_action(struct action_xlate_ctx *ctx,
2755 const struct ofp_action_enqueue *oae)
2757 uint16_t ofp_port, odp_port;
2761 error = dpif_queue_to_priority(ctx->ofproto->dpif, ntohl(oae->queue_id),
2764 /* Fall back to ordinary output action. */
2765 xlate_output_action__(ctx, ntohs(oae->port), 0);
2769 /* Figure out ODP output port. */
2770 ofp_port = ntohs(oae->port);
2771 if (ofp_port == OFPP_IN_PORT) {
2772 ofp_port = ctx->flow.in_port;
2774 odp_port = ofp_port_to_odp_port(ofp_port);
2776 /* Add ODP actions. */
2777 remove_pop_action(ctx);
2778 nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, priority);
2779 add_output_action(ctx, odp_port);
2780 add_pop_action(ctx);
2782 /* Update NetFlow output port. */
2783 if (ctx->nf_output_iface == NF_OUT_DROP) {
2784 ctx->nf_output_iface = odp_port;
2785 } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
2786 ctx->nf_output_iface = NF_OUT_MULTI;
2791 xlate_set_queue_action(struct action_xlate_ctx *ctx,
2792 const struct nx_action_set_queue *nasq)
2797 error = dpif_queue_to_priority(ctx->ofproto->dpif, ntohl(nasq->queue_id),
2800 /* Couldn't translate queue to a priority, so ignore. A warning
2801 * has already been logged. */
2805 remove_pop_action(ctx);
2806 nl_msg_put_u32(ctx->odp_actions, ODP_ACTION_ATTR_SET_PRIORITY, priority);
2810 xlate_set_dl_tci(struct action_xlate_ctx *ctx)
2812 ovs_be16 tci = ctx->flow.vlan_tci;
2813 if (!(tci & htons(VLAN_CFI))) {
2814 nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_STRIP_VLAN);
2816 nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_TCI,
2817 tci & ~htons(VLAN_CFI));
2821 struct xlate_reg_state {
2827 save_reg_state(const struct action_xlate_ctx *ctx,
2828 struct xlate_reg_state *state)
2830 state->vlan_tci = ctx->flow.vlan_tci;
2831 state->tun_id = ctx->flow.tun_id;
2835 update_reg_state(struct action_xlate_ctx *ctx,
2836 const struct xlate_reg_state *state)
2838 if (ctx->flow.vlan_tci != state->vlan_tci) {
2839 xlate_set_dl_tci(ctx);
2841 if (ctx->flow.tun_id != state->tun_id) {
2842 nl_msg_put_be64(ctx->odp_actions,
2843 ODP_ACTION_ATTR_SET_TUNNEL, ctx->flow.tun_id);
2848 xlate_autopath(struct action_xlate_ctx *ctx,
2849 const struct nx_action_autopath *naa)
2851 uint16_t ofp_port = ntohl(naa->id);
2852 struct ofport_dpif *port = get_ofp_port(ctx->ofproto, ofp_port);
2854 if (!port || !port->bundle) {
2855 ofp_port = OFPP_NONE;
2856 } else if (port->bundle->bond) {
2857 /* Autopath does not support VLAN hashing. */
2858 struct ofport_dpif *slave = bond_choose_output_slave(
2859 port->bundle->bond, &ctx->flow, OFP_VLAN_NONE, &ctx->tags);
2861 ofp_port = slave->up.ofp_port;
2864 autopath_execute(naa, &ctx->flow, ofp_port);
2868 xlate_nicira_action(struct action_xlate_ctx *ctx,
2869 const struct nx_action_header *nah)
2871 const struct nx_action_resubmit *nar;
2872 const struct nx_action_set_tunnel *nast;
2873 const struct nx_action_set_queue *nasq;
2874 const struct nx_action_multipath *nam;
2875 const struct nx_action_autopath *naa;
2876 enum nx_action_subtype subtype = ntohs(nah->subtype);
2877 struct xlate_reg_state state;
2880 assert(nah->vendor == htonl(NX_VENDOR_ID));
2882 case NXAST_RESUBMIT:
2883 nar = (const struct nx_action_resubmit *) nah;
2884 xlate_table_action(ctx, ntohs(nar->in_port));
2887 case NXAST_SET_TUNNEL:
2888 nast = (const struct nx_action_set_tunnel *) nah;
2889 tun_id = htonll(ntohl(nast->tun_id));
2890 nl_msg_put_be64(ctx->odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, tun_id);
2891 ctx->flow.tun_id = tun_id;
2894 case NXAST_DROP_SPOOFED_ARP:
2895 if (ctx->flow.dl_type == htons(ETH_TYPE_ARP)) {
2896 nl_msg_put_flag(ctx->odp_actions,
2897 ODP_ACTION_ATTR_DROP_SPOOFED_ARP);
2901 case NXAST_SET_QUEUE:
2902 nasq = (const struct nx_action_set_queue *) nah;
2903 xlate_set_queue_action(ctx, nasq);
2906 case NXAST_POP_QUEUE:
2907 add_pop_action(ctx);
2910 case NXAST_REG_MOVE:
2911 save_reg_state(ctx, &state);
2912 nxm_execute_reg_move((const struct nx_action_reg_move *) nah,
2914 update_reg_state(ctx, &state);
2917 case NXAST_REG_LOAD:
2918 save_reg_state(ctx, &state);
2919 nxm_execute_reg_load((const struct nx_action_reg_load *) nah,
2921 update_reg_state(ctx, &state);
2925 /* Nothing to do. */
2928 case NXAST_SET_TUNNEL64:
2929 tun_id = ((const struct nx_action_set_tunnel64 *) nah)->tun_id;
2930 nl_msg_put_be64(ctx->odp_actions, ODP_ACTION_ATTR_SET_TUNNEL, tun_id);
2931 ctx->flow.tun_id = tun_id;
2934 case NXAST_MULTIPATH:
2935 nam = (const struct nx_action_multipath *) nah;
2936 multipath_execute(nam, &ctx->flow);
2939 case NXAST_AUTOPATH:
2940 naa = (const struct nx_action_autopath *) nah;
2941 xlate_autopath(ctx, naa);
2944 /* If you add a new action here that modifies flow data, don't forget to
2945 * update the flow key in ctx->flow at the same time. */
2947 case NXAST_SNAT__OBSOLETE:
2949 VLOG_DBG_RL(&rl, "unknown Nicira action type %d", (int) subtype);
2955 do_xlate_actions(const union ofp_action *in, size_t n_in,
2956 struct action_xlate_ctx *ctx)
2958 const struct ofport_dpif *port;
2959 struct actions_iterator iter;
2960 const union ofp_action *ia;
2962 port = get_ofp_port(ctx->ofproto, ctx->flow.in_port);
2964 && port->up.opp.config & htonl(OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
2965 port->up.opp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp)
2966 ? htonl(OFPPC_NO_RECV_STP)
2967 : htonl(OFPPC_NO_RECV))) {
2968 /* Drop this flow. */
2972 for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) {
2973 enum ofp_action_type type = ntohs(ia->type);
2974 const struct ofp_action_dl_addr *oada;
2978 xlate_output_action(ctx, &ia->output);
2981 case OFPAT_SET_VLAN_VID:
2982 ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK);
2983 ctx->flow.vlan_tci |= ia->vlan_vid.vlan_vid | htons(VLAN_CFI);
2984 xlate_set_dl_tci(ctx);
2987 case OFPAT_SET_VLAN_PCP:
2988 ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK);
2989 ctx->flow.vlan_tci |= htons(
2990 (ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) | VLAN_CFI);
2991 xlate_set_dl_tci(ctx);
2994 case OFPAT_STRIP_VLAN:
2995 ctx->flow.vlan_tci = htons(0);
2996 xlate_set_dl_tci(ctx);
2999 case OFPAT_SET_DL_SRC:
3000 oada = ((struct ofp_action_dl_addr *) ia);
3001 nl_msg_put_unspec(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_SRC,
3002 oada->dl_addr, ETH_ADDR_LEN);
3003 memcpy(ctx->flow.dl_src, oada->dl_addr, ETH_ADDR_LEN);
3006 case OFPAT_SET_DL_DST:
3007 oada = ((struct ofp_action_dl_addr *) ia);
3008 nl_msg_put_unspec(ctx->odp_actions, ODP_ACTION_ATTR_SET_DL_DST,
3009 oada->dl_addr, ETH_ADDR_LEN);
3010 memcpy(ctx->flow.dl_dst, oada->dl_addr, ETH_ADDR_LEN);
3013 case OFPAT_SET_NW_SRC:
3014 nl_msg_put_be32(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_SRC,
3015 ia->nw_addr.nw_addr);
3016 ctx->flow.nw_src = ia->nw_addr.nw_addr;
3019 case OFPAT_SET_NW_DST:
3020 nl_msg_put_be32(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_DST,
3021 ia->nw_addr.nw_addr);
3022 ctx->flow.nw_dst = ia->nw_addr.nw_addr;
3025 case OFPAT_SET_NW_TOS:
3026 nl_msg_put_u8(ctx->odp_actions, ODP_ACTION_ATTR_SET_NW_TOS,
3028 ctx->flow.nw_tos = ia->nw_tos.nw_tos;
3031 case OFPAT_SET_TP_SRC:
3032 nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_TP_SRC,
3033 ia->tp_port.tp_port);
3034 ctx->flow.tp_src = ia->tp_port.tp_port;
3037 case OFPAT_SET_TP_DST:
3038 nl_msg_put_be16(ctx->odp_actions, ODP_ACTION_ATTR_SET_TP_DST,
3039 ia->tp_port.tp_port);
3040 ctx->flow.tp_dst = ia->tp_port.tp_port;
3044 xlate_nicira_action(ctx, (const struct nx_action_header *) ia);
3048 xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia);
3052 VLOG_DBG_RL(&rl, "unknown action type %d", (int) type);
3059 action_xlate_ctx_init(struct action_xlate_ctx *ctx,
3060 struct ofproto_dpif *ofproto, const struct flow *flow,
3061 const struct ofpbuf *packet)
3063 ctx->ofproto = ofproto;
3065 ctx->packet = packet;
3066 ctx->resubmit_hook = NULL;
3067 ctx->check_special = true;
3070 static struct ofpbuf *
3071 xlate_actions(struct action_xlate_ctx *ctx,
3072 const union ofp_action *in, size_t n_in)
3074 COVERAGE_INC(ofproto_dpif_xlate);
3076 ctx->odp_actions = ofpbuf_new(512);
3078 ctx->may_set_up_flow = true;
3079 ctx->nf_output_iface = NF_OUT_DROP;
3081 ctx->last_pop_priority = -1;
3083 if (ctx->check_special
3084 && process_special(ctx->ofproto, &ctx->flow, ctx->packet)) {
3085 ctx->may_set_up_flow = false;
3087 do_xlate_actions(in, n_in, ctx);
3090 remove_pop_action(ctx);
3092 /* Check with in-band control to see if we're allowed to set up this
3094 if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow,
3095 ctx->odp_actions->data,
3096 ctx->odp_actions->size)) {
3097 ctx->may_set_up_flow = false;
3100 return ctx->odp_actions;
3103 /* OFPP_NORMAL implementation. */
3106 struct ofport_dpif *port;
3111 struct dst builtin[32];
3113 size_t n, allocated;
3116 static void dst_set_init(struct dst_set *);
3117 static void dst_set_add(struct dst_set *, const struct dst *);
3118 static void dst_set_free(struct dst_set *);
3120 static struct ofport_dpif *ofbundle_get_a_port(const struct ofbundle *);
3123 set_dst(struct action_xlate_ctx *ctx, struct dst *dst,
3124 const struct ofbundle *in_bundle, const struct ofbundle *out_bundle)
3126 dst->vlan = (out_bundle->vlan >= 0 ? OFP_VLAN_NONE
3127 : in_bundle->vlan >= 0 ? in_bundle->vlan
3128 : ctx->flow.vlan_tci == 0 ? OFP_VLAN_NONE
3129 : vlan_tci_to_vid(ctx->flow.vlan_tci));
3131 dst->port = (!out_bundle->bond
3132 ? ofbundle_get_a_port(out_bundle)
3133 : bond_choose_output_slave(out_bundle->bond, &ctx->flow,
3134 dst->vlan, &ctx->tags));
3136 return dst->port != NULL;
3140 mirror_mask_ffs(mirror_mask_t mask)
3142 BUILD_ASSERT_DECL(sizeof(unsigned int) >= sizeof(mask));
3147 dst_set_init(struct dst_set *set)
3149 set->dsts = set->builtin;
3151 set->allocated = ARRAY_SIZE(set->builtin);
3155 dst_set_add(struct dst_set *set, const struct dst *dst)
3157 if (set->n >= set->allocated) {
3158 size_t new_allocated;
3159 struct dst *new_dsts;
3161 new_allocated = set->allocated * 2;
3162 new_dsts = xmalloc(new_allocated * sizeof *new_dsts);
3163 memcpy(new_dsts, set->dsts, set->n * sizeof *new_dsts);
3167 set->dsts = new_dsts;
3168 set->allocated = new_allocated;
3170 set->dsts[set->n++] = *dst;
3174 dst_set_free(struct dst_set *set)
3176 if (set->dsts != set->builtin) {
3182 dst_is_duplicate(const struct dst_set *set, const struct dst *test)
3185 for (i = 0; i < set->n; i++) {
3186 if (set->dsts[i].vlan == test->vlan
3187 && set->dsts[i].port == test->port) {
3195 ofbundle_trunks_vlan(const struct ofbundle *bundle, uint16_t vlan)
3197 return bundle->vlan < 0 && vlan_bitmap_contains(bundle->trunks, vlan);
3201 ofbundle_includes_vlan(const struct ofbundle *bundle, uint16_t vlan)
3203 return vlan == bundle->vlan || ofbundle_trunks_vlan(bundle, vlan);
3206 /* Returns an arbitrary interface within 'bundle'. */
3207 static struct ofport_dpif *
3208 ofbundle_get_a_port(const struct ofbundle *bundle)
3210 return CONTAINER_OF(list_front(&bundle->ports),
3211 struct ofport_dpif, bundle_node);
3215 compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan,
3216 const struct ofbundle *in_bundle,
3217 const struct ofbundle *out_bundle, struct dst_set *set)
3221 if (out_bundle == OFBUNDLE_FLOOD) {
3222 struct ofbundle *bundle;
3224 HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) {
3225 if (bundle != in_bundle
3226 && ofbundle_includes_vlan(bundle, vlan)
3227 && bundle->floodable
3228 && !bundle->mirror_out
3229 && set_dst(ctx, &dst, in_bundle, bundle)) {
3230 dst_set_add(set, &dst);
3233 ctx->nf_output_iface = NF_OUT_FLOOD;
3234 } else if (out_bundle && set_dst(ctx, &dst, in_bundle, out_bundle)) {
3235 dst_set_add(set, &dst);
3236 ctx->nf_output_iface = dst.port->odp_port;
3241 vlan_is_mirrored(const struct ofmirror *m, int vlan)
3243 return vlan_bitmap_contains(m->vlans, vlan);
3247 compose_mirror_dsts(struct action_xlate_ctx *ctx,
3248 uint16_t vlan, const struct ofbundle *in_bundle,
3249 struct dst_set *set)
3251 struct ofproto_dpif *ofproto = ctx->ofproto;
3252 mirror_mask_t mirrors;
3256 mirrors = in_bundle->src_mirrors;
3257 for (i = 0; i < set->n; i++) {
3258 mirrors |= set->dsts[i].port->bundle->dst_mirrors;
3265 flow_vlan = vlan_tci_to_vid(ctx->flow.vlan_tci);
3266 if (flow_vlan == 0) {
3267 flow_vlan = OFP_VLAN_NONE;
3271 struct ofmirror *m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1];
3272 if (vlan_is_mirrored(m, vlan)) {
3276 if (set_dst(ctx, &dst, in_bundle, m->out)
3277 && !dst_is_duplicate(set, &dst)) {
3278 dst_set_add(set, &dst);
3281 struct ofbundle *bundle;
3283 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
3284 if (ofbundle_includes_vlan(bundle, m->out_vlan)
3285 && set_dst(ctx, &dst, in_bundle, bundle))
3287 if (bundle->vlan < 0) {
3288 dst.vlan = m->out_vlan;
3290 if (dst_is_duplicate(set, &dst)) {
3294 /* Use the vlan tag on the original flow instead of
3295 * the one passed in the vlan parameter. This ensures
3296 * that we compare the vlan from before any implicit
3297 * tagging tags place. This is necessary because
3298 * dst->vlan is the final vlan, after removing implicit
3300 if (bundle == in_bundle && dst.vlan == flow_vlan) {
3301 /* Don't send out input port on same VLAN. */
3304 dst_set_add(set, &dst);
3309 mirrors &= mirrors - 1;
3314 compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan,
3315 const struct ofbundle *in_bundle,
3316 const struct ofbundle *out_bundle)
3318 uint16_t initial_vlan, cur_vlan;
3319 const struct dst *dst;
3323 compose_dsts(ctx, vlan, in_bundle, out_bundle, &set);
3324 compose_mirror_dsts(ctx, vlan, in_bundle, &set);
3326 /* Output all the packets we can without having to change the VLAN. */
3327 initial_vlan = vlan_tci_to_vid(ctx->flow.vlan_tci);
3328 if (initial_vlan == 0) {
3329 initial_vlan = OFP_VLAN_NONE;
3331 for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) {
3332 if (dst->vlan != initial_vlan) {
3335 nl_msg_put_u32(ctx->odp_actions,
3336 ODP_ACTION_ATTR_OUTPUT, dst->port->odp_port);
3339 /* Then output the rest. */
3340 cur_vlan = initial_vlan;
3341 for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) {
3342 if (dst->vlan == initial_vlan) {
3345 if (dst->vlan != cur_vlan) {
3346 if (dst->vlan == OFP_VLAN_NONE) {
3347 nl_msg_put_flag(ctx->odp_actions, ODP_ACTION_ATTR_STRIP_VLAN);
3350 tci = htons(dst->vlan & VLAN_VID_MASK);
3351 tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK);
3352 nl_msg_put_be16(ctx->odp_actions,
3353 ODP_ACTION_ATTR_SET_DL_TCI, tci);
3355 cur_vlan = dst->vlan;
3357 nl_msg_put_u32(ctx->odp_actions,
3358 ODP_ACTION_ATTR_OUTPUT, dst->port->odp_port);
3364 /* Returns the effective vlan of a packet, taking into account both the
3365 * 802.1Q header and implicitly tagged ports. A value of 0 indicates that
3366 * the packet is untagged and -1 indicates it has an invalid header and
3367 * should be dropped. */
3369 flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow,
3370 struct ofbundle *in_bundle, bool have_packet)
3372 int vlan = vlan_tci_to_vid(flow->vlan_tci);
3373 if (in_bundle->vlan >= 0) {
3376 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3377 VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
3378 "packet received on port %s configured with "
3379 "implicit VLAN %"PRIu16,
3380 ofproto->up.name, vlan,
3381 in_bundle->name, in_bundle->vlan);
3385 vlan = in_bundle->vlan;
3387 if (!ofbundle_includes_vlan(in_bundle, vlan)) {
3389 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3390 VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
3391 "packet received on port %s not configured for "
3393 ofproto->up.name, vlan, in_bundle->name, vlan);
3402 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
3403 * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
3404 * indicate this; newer upstream kernels use gratuitous ARP requests. */
3406 is_gratuitous_arp(const struct flow *flow)
3408 return (flow->dl_type == htons(ETH_TYPE_ARP)
3409 && eth_addr_is_broadcast(flow->dl_dst)
3410 && (flow->nw_proto == ARP_OP_REPLY
3411 || (flow->nw_proto == ARP_OP_REQUEST
3412 && flow->nw_src == flow->nw_dst)));
3416 update_learning_table(struct ofproto_dpif *ofproto,
3417 const struct flow *flow, int vlan,
3418 struct ofbundle *in_bundle)
3420 struct mac_entry *mac;
3422 if (!mac_learning_may_learn(ofproto->ml, flow->dl_src, vlan)) {
3426 mac = mac_learning_insert(ofproto->ml, flow->dl_src, vlan);
3427 if (is_gratuitous_arp(flow)) {
3428 /* We don't want to learn from gratuitous ARP packets that are
3429 * reflected back over bond slaves so we lock the learning table. */
3430 if (!in_bundle->bond) {
3431 mac_entry_set_grat_arp_lock(mac);
3432 } else if (mac_entry_is_grat_arp_locked(mac)) {
3437 if (mac_entry_is_new(mac) || mac->port.p != in_bundle) {
3438 /* The log messages here could actually be useful in debugging,
3439 * so keep the rate limit relatively high. */
3440 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
3441 VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
3442 "on port %s in VLAN %d",
3443 ofproto->up.name, ETH_ADDR_ARGS(flow->dl_src),
3444 in_bundle->name, vlan);
3446 mac->port.p = in_bundle;
3447 tag_set_add(&ofproto->revalidate_set,
3448 mac_learning_changed(ofproto->ml, mac));
3452 /* Determines whether packets in 'flow' within 'br' should be forwarded or
3453 * dropped. Returns true if they may be forwarded, false if they should be
3456 * If 'have_packet' is true, it indicates that the caller is processing a
3457 * received packet. If 'have_packet' is false, then the caller is just
3458 * revalidating an existing flow because configuration has changed. Either
3459 * way, 'have_packet' only affects logging (there is no point in logging errors
3460 * during revalidation).
3462 * Sets '*in_portp' to the input port. This will be a null pointer if
3463 * flow->in_port does not designate a known input port (in which case
3464 * is_admissible() returns false).
3466 * When returning true, sets '*vlanp' to the effective VLAN of the input
3467 * packet, as returned by flow_get_vlan().
3469 * May also add tags to '*tags', although the current implementation only does
3470 * so in one special case.
3473 is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow,
3475 tag_type *tags, int *vlanp, struct ofbundle **in_bundlep)
3477 struct ofport_dpif *in_port;
3478 struct ofbundle *in_bundle;
3481 /* Find the port and bundle for the received packet. */
3482 in_port = get_ofp_port(ofproto, flow->in_port);
3483 *in_bundlep = in_bundle = in_port->bundle;
3484 if (!in_port || !in_bundle) {
3485 /* No interface? Something fishy... */
3487 /* Odd. A few possible reasons here:
3489 * - We deleted a port but there are still a few packets queued up
3492 * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
3493 * we don't know about.
3495 * - Packet arrived on the local port but the local port is not
3498 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3500 VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
3502 ofproto->up.name, flow->in_port);
3506 *vlanp = vlan = flow_get_vlan(ofproto, flow, in_bundle, have_packet);
3511 /* Drop frames for reserved multicast addresses. */
3512 if (eth_addr_is_reserved(flow->dl_dst)) {
3516 /* Drop frames on bundles reserved for mirroring. */
3517 if (in_bundle->mirror_out) {
3519 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3520 VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
3521 "%s, which is reserved exclusively for mirroring",
3522 ofproto->up.name, in_bundle->name);
3527 if (in_bundle->bond) {
3528 struct mac_entry *mac;
3530 switch (bond_check_admissibility(in_bundle->bond, in_port,
3531 flow->dl_dst, tags)) {
3538 case BV_DROP_IF_MOVED:
3539 mac = mac_learning_lookup(ofproto->ml, flow->dl_src, vlan, NULL);
3540 if (mac && mac->port.p != in_bundle &&
3541 (!is_gratuitous_arp(flow)
3542 || mac_entry_is_grat_arp_locked(mac))) {
3552 /* If the composed actions may be applied to any packet in the given 'flow',
3553 * returns true. Otherwise, the actions should only be applied to 'packet', or
3554 * not at all, if 'packet' was NULL. */
3556 xlate_normal(struct action_xlate_ctx *ctx)
3558 struct ofbundle *in_bundle;
3559 struct ofbundle *out_bundle;
3560 struct mac_entry *mac;
3563 /* Check whether we should drop packets in this flow. */
3564 if (!is_admissible(ctx->ofproto, &ctx->flow, ctx->packet != NULL,
3565 &ctx->tags, &vlan, &in_bundle)) {
3570 /* Learn source MAC (but don't try to learn from revalidation). */
3572 update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
3575 /* Determine output bundle. */
3576 mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan,
3579 out_bundle = mac->port.p;
3580 } else if (!ctx->packet && !eth_addr_is_multicast(ctx->flow.dl_dst)) {
3581 /* If we are revalidating but don't have a learning entry then eject
3582 * the flow. Installing a flow that floods packets opens up a window
3583 * of time where we could learn from a packet reflected on a bond and
3584 * blackhole packets before the learning table is updated to reflect
3585 * the correct port. */
3588 out_bundle = OFBUNDLE_FLOOD;
3591 /* Don't send packets out their input bundles. */
3592 if (in_bundle == out_bundle) {
3598 compose_actions(ctx, vlan, in_bundle, out_bundle);
3605 get_drop_frags(struct ofproto *ofproto_)
3607 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3610 dpif_get_drop_frags(ofproto->dpif, &drop_frags);
3615 set_drop_frags(struct ofproto *ofproto_, bool drop_frags)
3617 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3619 dpif_set_drop_frags(ofproto->dpif, drop_frags);
3623 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
3624 const struct flow *flow,
3625 const union ofp_action *ofp_actions, size_t n_ofp_actions)
3627 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3630 error = validate_actions(ofp_actions, n_ofp_actions, flow,
3631 ofproto->max_ports);
3633 struct action_xlate_ctx ctx;
3634 struct ofpbuf *odp_actions;
3636 action_xlate_ctx_init(&ctx, ofproto, flow, packet);
3637 odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions);
3638 dpif_execute(ofproto->dpif, odp_actions->data, odp_actions->size,
3640 ofpbuf_delete(odp_actions);
3646 get_netflow_ids(const struct ofproto *ofproto_,
3647 uint8_t *engine_type, uint8_t *engine_id)
3649 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3651 dpif_get_netflow_ids(ofproto->dpif, engine_type, engine_id);
3654 static struct ofproto_dpif *
3655 ofproto_dpif_lookup(const char *name)
3657 struct ofproto *ofproto = ofproto_lookup(name);
3658 return (ofproto && ofproto->ofproto_class == &ofproto_dpif_class
3659 ? ofproto_dpif_cast(ofproto)
3664 ofproto_unixctl_fdb_show(struct unixctl_conn *conn,
3665 const char *args, void *aux OVS_UNUSED)
3667 struct ds ds = DS_EMPTY_INITIALIZER;
3668 const struct ofproto_dpif *ofproto;
3669 const struct mac_entry *e;
3671 ofproto = ofproto_dpif_lookup(args);
3673 unixctl_command_reply(conn, 501, "no such bridge");
3677 ds_put_cstr(&ds, " port VLAN MAC Age\n");
3678 LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
3679 struct ofbundle *bundle = e->port.p;
3680 ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
3681 ofbundle_get_a_port(bundle)->odp_port,
3682 e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
3684 unixctl_command_reply(conn, 200, ds_cstr(&ds));
3688 struct ofproto_trace {
3689 struct action_xlate_ctx ctx;
3695 trace_format_rule(struct ds *result, int level, const struct rule *rule)
3697 ds_put_char_multiple(result, '\t', level);
3699 ds_put_cstr(result, "No match\n");
3703 ds_put_format(result, "Rule: cookie=%#"PRIx64" ",
3704 ntohll(rule->flow_cookie));
3705 cls_rule_format(&rule->cr, result);
3706 ds_put_char(result, '\n');
3708 ds_put_char_multiple(result, '\t', level);
3709 ds_put_cstr(result, "OpenFlow ");
3710 ofp_print_actions(result, (const struct ofp_action_header *) rule->actions,
3711 rule->n_actions * sizeof *rule->actions);
3712 ds_put_char(result, '\n');
3716 trace_format_flow(struct ds *result, int level, const char *title,
3717 struct ofproto_trace *trace)
3719 ds_put_char_multiple(result, '\t', level);
3720 ds_put_format(result, "%s: ", title);
3721 if (flow_equal(&trace->ctx.flow, &trace->flow)) {
3722 ds_put_cstr(result, "unchanged");
3724 flow_format(result, &trace->ctx.flow);
3725 trace->flow = trace->ctx.flow;
3727 ds_put_char(result, '\n');
3731 trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule)
3733 struct ofproto_trace *trace = CONTAINER_OF(ctx, struct ofproto_trace, ctx);
3734 struct ds *result = trace->result;
3736 ds_put_char(result, '\n');
3737 trace_format_flow(result, ctx->recurse + 1, "Resubmitted flow", trace);
3738 trace_format_rule(result, ctx->recurse + 1, &rule->up);
3742 ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_,
3743 void *aux OVS_UNUSED)
3745 char *dpname, *in_port_s, *tun_id_s, *packet_s;
3746 char *args = xstrdup(args_);
3747 char *save_ptr = NULL;
3748 struct ofproto_dpif *ofproto;
3749 struct ofpbuf packet;
3750 struct rule_dpif *rule;
3757 ofpbuf_init(&packet, strlen(args) / 2);
3760 dpname = strtok_r(args, " ", &save_ptr);
3761 tun_id_s = strtok_r(NULL, " ", &save_ptr);
3762 in_port_s = strtok_r(NULL, " ", &save_ptr);
3763 packet_s = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */
3764 if (!dpname || !in_port_s || !packet_s) {
3765 unixctl_command_reply(conn, 501, "Bad command syntax");
3769 ofproto = ofproto_dpif_lookup(dpname);
3771 unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list "
3776 tun_id = htonll(strtoull(tun_id_s, NULL, 0));
3777 in_port = ofp_port_to_odp_port(atoi(in_port_s));
3779 packet_s = ofpbuf_put_hex(&packet, packet_s, NULL);
3780 packet_s += strspn(packet_s, " ");
3781 if (*packet_s != '\0') {
3782 unixctl_command_reply(conn, 501, "Trailing garbage in command");
3785 if (packet.size < ETH_HEADER_LEN) {
3786 unixctl_command_reply(conn, 501, "Packet data too short for Ethernet");
3790 ds_put_cstr(&result, "Packet: ");
3791 s = ofp_packet_to_string(packet.data, packet.size, packet.size);
3792 ds_put_cstr(&result, s);
3795 flow_extract(&packet, tun_id, in_port, &flow);
3796 ds_put_cstr(&result, "Flow: ");
3797 flow_format(&result, &flow);
3798 ds_put_char(&result, '\n');
3800 rule = rule_dpif_lookup(ofproto, &flow);
3801 trace_format_rule(&result, 0, &rule->up);
3803 struct ofproto_trace trace;
3804 struct ofpbuf *odp_actions;
3806 trace.result = &result;
3808 action_xlate_ctx_init(&trace.ctx, ofproto, &flow, &packet);
3809 trace.ctx.resubmit_hook = trace_resubmit;
3810 odp_actions = xlate_actions(&trace.ctx,
3811 rule->up.actions, rule->up.n_actions);
3813 ds_put_char(&result, '\n');
3814 trace_format_flow(&result, 0, "Final flow", &trace);
3815 ds_put_cstr(&result, "Datapath actions: ");
3816 format_odp_actions(&result, odp_actions->data, odp_actions->size);
3817 ofpbuf_delete(odp_actions);
3820 unixctl_command_reply(conn, 200, ds_cstr(&result));
3823 ds_destroy(&result);
3824 ofpbuf_uninit(&packet);
3829 ofproto_dpif_unixctl_init(void)
3831 static bool registered;
3837 unixctl_command_register("ofproto/trace", ofproto_unixctl_trace, NULL);
3838 unixctl_command_register("fdb/show", ofproto_unixctl_fdb_show, NULL);
3841 const struct ofproto_class ofproto_dpif_class = {
3866 port_is_lacp_current,
3874 rule_modify_actions,
3887 is_mirror_output_bundle,