X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=c024963a230600c23c696c8d5dd7939f03ee4248;hb=b0f7b9b5c98557d159e4a12f125eacbf2a04a25b;hp=93e79a978d07ad6d8afbb5e8e381a380e2360939;hpb=e1154f713ec8f47ff38979d76e9456b49b0bc264;p=openvswitch diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 93e79a97..c024963a 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -88,7 +88,7 @@ struct rule_dpif { * * - Do not include packet or bytes that can be obtained from any facet's * packet_count or byte_count member or that can be obtained from the - * datapath by, e.g., dpif_flow_get() for any facet. + * datapath by, e.g., dpif_flow_get() for any subfacet. */ uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ @@ -106,6 +106,15 @@ static struct rule_dpif *rule_dpif_cast(const struct rule *rule) static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *, const struct flow *, uint8_t table); +static void flow_push_stats(const struct rule_dpif *, const struct flow *, + uint64_t packets, uint64_t bytes, + long long int used); + +static uint32_t rule_calculate_tag(const struct flow *, + const struct flow_wildcards *, + uint32_t basis); +static void rule_invalidate(const struct rule_dpif *); + #define MAX_MIRRORS 32 typedef uint32_t mirror_mask_t; #define MIRROR_MASK_C(X) UINT32_C(X) @@ -121,9 +130,10 @@ struct ofmirror { struct hmapx dsts; /* Contains "struct ofbundle *"s. */ unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */ - /* Output (mutually exclusive). */ + /* Output (exactly one of out == NULL and out_vlan == -1 is true). */ struct ofbundle *out; /* Output port or NULL. */ int out_vlan; /* Output VLAN or -1. */ + mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */ }; static void mirror_destroy(struct ofmirror *); @@ -144,6 +154,7 @@ struct ofbundle { * NULL if all VLANs are trunked. */ struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */ struct bond *bond; /* Nonnull iff more than one port. */ + bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ /* Status. */ bool floodable; /* True if no port has OFPPC_NO_FLOOD set. */ @@ -164,6 +175,8 @@ static void bundle_wait(struct ofbundle *); static void stp_run(struct ofproto_dpif *ofproto); static void stp_wait(struct ofproto_dpif *ofproto); +static bool ofbundle_includes_vlan(const struct ofbundle *, uint16_t vlan); + struct action_xlate_ctx { /* action_xlate_ctx_init() initializes these members. */ @@ -220,44 +233,65 @@ static void action_xlate_ctx_init(struct action_xlate_ctx *, static struct ofpbuf *xlate_actions(struct action_xlate_ctx *, const union ofp_action *in, size_t n_in); -/* An exact-match instantiation of an OpenFlow flow. */ +/* An exact-match instantiation of an OpenFlow flow. + * + * A facet associates a "struct flow", which represents the Open vSwitch + * userspace idea of an exact-match flow, with a set of datapath actions. + * + * A facet contains one or more subfacets. Each subfacet tracks the datapath's + * idea of the exact-match flow equivalent to the facet. When the kernel + * module (or other dpif implementation) and Open vSwitch userspace agree on + * the definition of a flow key, there is exactly one subfacet per facet. If + * the dpif implementation supports more-specific flow matching than userspace, + * however, a facet can have more than one subfacet, each of which corresponds + * to some distinction in flow that userspace simply doesn't understand. + * + * Flow expiration works in terms of subfacets, so a facet must have at least + * one subfacet or it will never expire, leaking memory. */ struct facet { + /* Owners. */ + struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */ + struct list list_node; /* In owning rule's 'facets' list. */ + struct rule_dpif *rule; /* Owning rule. */ + + /* Owned data. */ + struct list subfacets; long long int used; /* Time last used; time created if not used. */ + /* Key. */ + struct flow flow; + /* These statistics: * * - Do include packets and bytes sent "by hand", e.g. with * dpif_execute(). * * - Do include packets and bytes that were obtained from the datapath - * when its statistics were reset (e.g. dpif_flow_put() with + * when a subfacet's statistics were reset (e.g. dpif_flow_put() with * DPIF_FP_ZERO_STATS). + * + * - Do not include packets or bytes that can be obtained from the + * datapath for any existing subfacet. */ uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ - uint64_t dp_packet_count; /* Last known packet count in the datapath. */ - uint64_t dp_byte_count; /* Last known byte count in the datapath. */ - + /* Resubmit statistics. */ uint64_t rs_packet_count; /* Packets pushed to resubmit children. */ uint64_t rs_byte_count; /* Bytes pushed to resubmit children. */ long long int rs_used; /* Used time pushed to resubmit children. */ + /* Accounting. */ uint64_t accounted_bytes; /* Bytes processed by facet_account(). */ + struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */ - struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */ - struct list list_node; /* In owning rule's 'facets' list. */ - struct rule_dpif *rule; /* Owning rule. */ - struct flow flow; /* Exact-match flow. */ - bool installed; /* Installed in datapath? */ - bool may_install; /* True ordinarily; false if actions must - * be reassessed for every packet. */ + /* Datapath actions. */ + bool may_install; /* Reassess actions for every packet? */ bool has_learn; /* Actions include NXAST_LEARN? */ bool has_normal; /* Actions output to OFPP_NORMAL? */ size_t actions_len; /* Number of bytes in actions[]. */ struct nlattr *actions; /* Datapath actions. */ - tag_type tags; /* Tags. */ - struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */ + tag_type tags; /* Tags that would require revalidation. */ }; static struct facet *facet_create(struct rule_dpif *, const struct flow *); @@ -274,38 +308,64 @@ static bool execute_controller_action(struct ofproto_dpif *, const struct nlattr *odp_actions, size_t actions_len, struct ofpbuf *packet); -static void facet_execute(struct ofproto_dpif *, struct facet *, - struct ofpbuf *packet); - -static int facet_put__(struct ofproto_dpif *, struct facet *, - const struct nlattr *actions, size_t actions_len, - struct dpif_flow_stats *); -static void facet_install(struct ofproto_dpif *, struct facet *, - bool zero_stats); -static void facet_uninstall(struct ofproto_dpif *, struct facet *); + static void facet_flush_stats(struct ofproto_dpif *, struct facet *); static void facet_make_actions(struct ofproto_dpif *, struct facet *, const struct ofpbuf *packet); static void facet_update_time(struct ofproto_dpif *, struct facet *, long long int used); -static void facet_update_stats(struct ofproto_dpif *, struct facet *, - const struct dpif_flow_stats *); static void facet_reset_counters(struct facet *); -static void facet_reset_dp_stats(struct facet *, struct dpif_flow_stats *); static void facet_push_stats(struct facet *); static void facet_account(struct ofproto_dpif *, struct facet *); static bool facet_is_controller_flow(struct facet *); -static void flow_push_stats(const struct rule_dpif *, - struct flow *, uint64_t packets, uint64_t bytes, - long long int used); +/* A dpif flow associated with a facet. + * + * See also the large comment on struct facet. */ +struct subfacet { + /* Owners. */ + struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */ + struct list list_node; /* In struct facet's 'facets' list. */ + struct facet *facet; /* Owning facet. */ + + /* Key. + * + * To save memory in the common case, 'key' is NULL if 'key_fitness' is + * ODP_FIT_PERFECT, that is, odp_flow_key_from_flow() can accurately + * regenerate the ODP flow key from ->facet->flow. */ + enum odp_key_fitness key_fitness; + struct nlattr *key; + int key_len; -static uint32_t rule_calculate_tag(const struct flow *, - const struct flow_wildcards *, - uint32_t basis); -static void rule_invalidate(const struct rule_dpif *); + long long int used; /* Time last used; time created if not used. */ + + uint64_t dp_packet_count; /* Last known packet count in the datapath. */ + uint64_t dp_byte_count; /* Last known byte count in the datapath. */ + + bool installed; /* Installed in datapath? */ +}; + +static struct subfacet *subfacet_create(struct ofproto_dpif *, struct facet *, + enum odp_key_fitness, + const struct nlattr *key, + size_t key_len); +static struct subfacet *subfacet_find(struct ofproto_dpif *, + const struct nlattr *key, size_t key_len, + const struct flow *); +static void subfacet_destroy(struct ofproto_dpif *, struct subfacet *); +static void subfacet_destroy__(struct ofproto_dpif *, struct subfacet *); +static void subfacet_reset_dp_stats(struct subfacet *, + struct dpif_flow_stats *); +static void subfacet_update_time(struct ofproto_dpif *, struct subfacet *, + long long int used); +static void subfacet_update_stats(struct ofproto_dpif *, struct subfacet *, + const struct dpif_flow_stats *); +static int subfacet_install(struct ofproto_dpif *, struct subfacet *, + const struct nlattr *actions, size_t actions_len, + struct dpif_flow_stats *); +static void subfacet_uninstall(struct ofproto_dpif *, struct subfacet *); struct ofport_dpif { struct ofport up; @@ -321,6 +381,18 @@ struct ofport_dpif { struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ long long int stp_state_entered; + + struct hmap priorities; /* Map of attached 'priority_to_dscp's. */ +}; + +/* Node in 'ofport_dpif''s 'priorities' map. Used to maintain a map from + * 'priority' (the datapath's term for QoS queue) to the dscp bits which all + * traffic egressing the 'ofport' with that priority should be marked with. */ +struct priority_to_dscp { + struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'priorities' map. */ + uint32_t priority; /* Priority of this queue (see struct flow). */ + + uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */ }; static struct ofport_dpif * @@ -333,6 +405,7 @@ ofport_dpif_cast(const struct ofport *ofport) static void port_run(struct ofport_dpif *); static void port_wait(struct ofport_dpif *); static int set_cfm(struct ofport *, const struct cfm_settings *); +static void ofport_clear_priorities(struct ofport_dpif *); struct dpif_completion { struct list list_node; @@ -371,6 +444,7 @@ struct ofproto_dpif { /* Facets. */ struct hmap facets; + struct hmap subfacets; /* Revalidation. */ struct table_dpif tables[N_TABLES]; @@ -409,12 +483,9 @@ static struct ofport_dpif *get_odp_port(struct ofproto_dpif *, static void update_learning_table(struct ofproto_dpif *, const struct flow *, int vlan, struct ofbundle *); -static bool is_admissible(struct ofproto_dpif *, const struct flow *, - bool have_packet, tag_type *, int *vlanp, - struct ofbundle **in_bundlep); - /* Upcalls. */ #define FLOW_MISS_MAX_BATCH 50 + static void handle_upcall(struct ofproto_dpif *, struct dpif_upcall *); static void handle_miss_upcalls(struct ofproto_dpif *, struct dpif_upcall *, size_t n); @@ -422,6 +493,9 @@ static void handle_miss_upcalls(struct ofproto_dpif *, /* Flow expiration. */ static int expire(struct ofproto_dpif *); +/* NetFlow. */ +static void send_netflow_active_timeouts(struct ofproto_dpif *); + /* Utilities. */ static int send_packet(struct ofproto_dpif *, uint32_t odp_port, const struct ofpbuf *packet); @@ -517,6 +591,7 @@ construct(struct ofproto *ofproto_, int *n_tablesp) timer_set_duration(&ofproto->next_expiration, 1000); hmap_init(&ofproto->facets); + hmap_init(&ofproto->subfacets); for (i = 0; i < N_TABLES; i++) { struct table_dpif *table = &ofproto->tables[i]; @@ -579,6 +654,7 @@ destruct(struct ofproto *ofproto_) mac_learning_destroy(ofproto->ml); hmap_destroy(&ofproto->facets); + hmap_destroy(&ofproto->subfacets); dpif_close(ofproto->dpif); } @@ -627,7 +703,9 @@ run(struct ofproto *ofproto_) } if (ofproto->netflow) { - netflow_run(ofproto->netflow); + if (netflow_run(ofproto->netflow)) { + send_netflow_active_timeouts(ofproto); + } } if (ofproto->sflow) { dpif_sflow_run(ofproto->sflow); @@ -690,6 +768,9 @@ wait(struct ofproto *ofproto_) HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { bundle_wait(bundle); } + if (ofproto->netflow) { + netflow_wait(ofproto->netflow); + } mac_learning_wait(ofproto->ml); stp_wait(ofproto); if (ofproto->need_revalidate) { @@ -712,9 +793,13 @@ flush(struct ofproto *ofproto_) * bother trying to uninstall it. There is no point in uninstalling it * individually since we are about to blow away all the facets with * dpif_flow_flush(). */ - facet->installed = false; - facet->dp_packet_count = 0; - facet->dp_byte_count = 0; + struct subfacet *subfacet; + + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + subfacet->installed = false; + subfacet->dp_packet_count = 0; + subfacet->dp_byte_count = 0; + } facet_remove(ofproto, facet); } dpif_flow_flush(ofproto->dpif); @@ -753,24 +838,6 @@ get_tables(struct ofproto *ofproto_, struct ofp_table_stats *ots) htonll(s.n_hit + ofproto->n_matches)); } -static int -set_netflow(struct ofproto *ofproto_, - const struct netflow_options *netflow_options) -{ - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - - if (netflow_options) { - if (!ofproto->netflow) { - ofproto->netflow = netflow_create(); - } - return netflow_set_options(ofproto->netflow, netflow_options); - } else { - netflow_destroy(ofproto->netflow); - ofproto->netflow = NULL; - return 0; - } -} - static struct ofport * port_alloc(void) { @@ -799,6 +866,7 @@ port_construct(struct ofport *port_) port->may_enable = true; port->stp_port = NULL; port->stp_state = STP_DISABLED; + hmap_init(&port->priorities); if (ofproto->sflow) { dpif_sflow_add_port(ofproto->sflow, port->odp_port, @@ -820,6 +888,9 @@ port_destruct(struct ofport *port_) if (ofproto->sflow) { dpif_sflow_del_port(ofproto->sflow, port->odp_port); } + + ofport_clear_priorities(port); + hmap_destroy(&port->priorities); } static void @@ -949,13 +1020,8 @@ send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d " "with unknown MAC", ofproto->up.name, port_num); } else { - int error = netdev_send(ofport->up.netdev, pkt); - if (error) { - VLOG_WARN_RL(&rl, "%s: sending BPDU on port %s failed (%s)", - ofproto->up.name, - netdev_get_name(ofport->up.netdev), - strerror(error)); - } + send_packet(ofproto_dpif_cast(ofport->up.ofproto), + ofport->odp_port, pkt); } } ofpbuf_delete(pkt); @@ -1040,7 +1106,7 @@ update_stp_port_state(struct ofport_dpif *ofport) ofport->stp_state = state; ofport->stp_state_entered = time_msec(); - if (fwd_change) { + if (fwd_change && ofport->bundle) { bundle_update(ofport->bundle); } @@ -1070,6 +1136,7 @@ set_stp_port(struct ofport *ofport_, if (sp) { ofport->stp_port = NULL; stp_port_disable(sp); + update_stp_port_state(ofport); } return 0; } else if (sp && stp_port_no(sp) != s->port_num @@ -1109,6 +1176,7 @@ get_stp_port_status(struct ofport *ofport_, s->state = stp_port_get_state(sp); s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000; s->role = stp_port_get_role(sp); + stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count); return 0; } @@ -1174,6 +1242,82 @@ stp_process_packet(const struct ofport_dpif *ofport, } } +static struct priority_to_dscp * +get_priority(const struct ofport_dpif *ofport, uint32_t priority) +{ + struct priority_to_dscp *pdscp; + uint32_t hash; + + hash = hash_int(priority, 0); + HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &ofport->priorities) { + if (pdscp->priority == priority) { + return pdscp; + } + } + return NULL; +} + +static void +ofport_clear_priorities(struct ofport_dpif *ofport) +{ + struct priority_to_dscp *pdscp, *next; + + HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &ofport->priorities) { + hmap_remove(&ofport->priorities, &pdscp->hmap_node); + free(pdscp); + } +} + +static int +set_queues(struct ofport *ofport_, + const struct ofproto_port_queue *qdscp_list, + size_t n_qdscp) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct hmap new = HMAP_INITIALIZER(&new); + size_t i; + + for (i = 0; i < n_qdscp; i++) { + struct priority_to_dscp *pdscp; + uint32_t priority; + uint8_t dscp; + + dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK; + if (dpif_queue_to_priority(ofproto->dpif, qdscp_list[i].queue, + &priority)) { + continue; + } + + pdscp = get_priority(ofport, priority); + if (pdscp) { + hmap_remove(&ofport->priorities, &pdscp->hmap_node); + } else { + pdscp = xmalloc(sizeof *pdscp); + pdscp->priority = priority; + pdscp->dscp = dscp; + ofproto->need_revalidate = true; + } + + if (pdscp->dscp != dscp) { + pdscp->dscp = dscp; + ofproto->need_revalidate = true; + } + + hmap_insert(&new, &pdscp->hmap_node, hash_int(pdscp->priority, 0)); + } + + if (!hmap_is_empty(&ofport->priorities)) { + ofport_clear_priorities(ofport); + ofproto->need_revalidate = true; + } + + hmap_swap(&new, &ofport->priorities); + hmap_destroy(&new); + + return 0; +} + /* Bundles. */ /* Expires all MAC learning entries associated with 'port' and forces ofproto @@ -1232,8 +1376,7 @@ bundle_update(struct ofbundle *bundle) bundle->floodable = true; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - if (port->up.opp.config & htonl(OFPPC_NO_FLOOD) - || !stp_forward_in_state(port->stp_state)) { + if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) { bundle->floodable = false; break; } @@ -1280,8 +1423,7 @@ bundle_add_port(struct ofbundle *bundle, uint32_t ofp_port, port->bundle = bundle; list_push_back(&bundle->ports, &port->bundle_node); - if (port->up.opp.config & htonl(OFPPC_NO_FLOOD) - || !stp_forward_in_state(port->stp_state)) { + if (port->up.opp.config & htonl(OFPPC_NO_FLOOD)) { bundle->floodable = false; } } @@ -1367,6 +1509,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, bundle->vlan_mode = PORT_VLAN_TRUNK; bundle->vlan = -1; bundle->trunks = NULL; + bundle->use_priority_tags = s->use_priority_tags; bundle->lacp = NULL; bundle->bond = NULL; @@ -1425,8 +1568,10 @@ bundle_set(struct ofproto *ofproto_, void *aux, } /* Set VLAN tagging mode */ - if (s->vlan_mode != bundle->vlan_mode) { + if (s->vlan_mode != bundle->vlan_mode + || s->use_priority_tags != bundle->use_priority_tags) { bundle->vlan_mode = s->vlan_mode; + bundle->use_priority_tags = s->use_priority_tags; need_flush = true; } @@ -1549,12 +1694,8 @@ send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) pdu_size); memcpy(packet_pdu, pdu, pdu_size); - error = netdev_send(port->up.netdev, &packet); - if (error) { - VLOG_WARN_RL(&rl, "port %s: sending LACP PDU on iface %s failed " - "(%s)", port->bundle->name, - netdev_get_name(port->up.netdev), strerror(error)); - } + send_packet(ofproto_dpif_cast(port->up.ofproto), port->odp_port, + &packet); ofpbuf_uninit(&packet); } else { VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface " @@ -1573,7 +1714,16 @@ bundle_send_learning_packets(struct ofbundle *bundle) error = n_packets = n_errors = 0; LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { if (e->port.p != bundle) { - int ret = bond_send_learning_packet(bundle->bond, e->mac, e->vlan); + struct ofpbuf *learning_packet; + struct ofport_dpif *port; + int ret; + + learning_packet = bond_compose_learning_packet(bundle->bond, e->mac, + e->vlan, + (void **)&port); + ret = send_packet(ofproto_dpif_cast(port->up.ofproto), + port->odp_port, learning_packet); + ofpbuf_delete(learning_packet); if (ret) { error = ret; n_errors++; @@ -1655,6 +1805,39 @@ mirror_lookup(struct ofproto_dpif *ofproto, void *aux) return NULL; } +/* Update the 'dup_mirrors' member of each of the ofmirrors in 'ofproto'. */ +static void +mirror_update_dups(struct ofproto_dpif *ofproto) +{ + int i; + + for (i = 0; i < MAX_MIRRORS; i++) { + struct ofmirror *m = ofproto->mirrors[i]; + + if (m) { + m->dup_mirrors = MIRROR_MASK_C(1) << i; + } + } + + for (i = 0; i < MAX_MIRRORS; i++) { + struct ofmirror *m1 = ofproto->mirrors[i]; + int j; + + if (!m1) { + continue; + } + + for (j = i + 1; j < MAX_MIRRORS; j++) { + struct ofmirror *m2 = ofproto->mirrors[j]; + + if (m2 && m1->out == m2->out && m1->out_vlan == m2->out_vlan) { + m1->dup_mirrors |= MIRROR_MASK_C(1) << j; + m2->dup_mirrors |= m1->dup_mirrors; + } + } + } +} + static int mirror_set(struct ofproto *ofproto_, void *aux, const struct ofproto_mirror_settings *s) @@ -1760,6 +1943,7 @@ mirror_set(struct ofproto *ofproto_, void *aux, ofproto->need_revalidate = true; mac_learning_flush(ofproto->ml); + mirror_update_dups(ofproto); return 0; } @@ -1793,6 +1977,8 @@ mirror_destroy(struct ofmirror *mirror) ofproto->mirrors[mirror->idx] = NULL; free(mirror->name); free(mirror); + + mirror_update_dups(ofproto); } static int @@ -2023,6 +2209,7 @@ port_is_lacp_current(const struct ofport *ofport_) struct flow_miss { struct hmap_node hmap_node; struct flow flow; + enum odp_key_fitness key_fitness; const struct nlattr *key; size_t key_len; struct list packets; @@ -2030,7 +2217,7 @@ struct flow_miss { struct flow_miss_op { union dpif_op dpif_op; - struct facet *facet; + struct subfacet *subfacet; }; /* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each @@ -2113,6 +2300,7 @@ process_special(struct ofproto_dpif *ofproto, const struct flow *flow, static struct flow_miss * flow_miss_create(struct hmap *todo, const struct flow *flow, + enum odp_key_fitness key_fitness, const struct nlattr *key, size_t key_len) { uint32_t hash = flow_hash(flow, 0); @@ -2127,6 +2315,7 @@ flow_miss_create(struct hmap *todo, const struct flow *flow, miss = xmalloc(sizeof *miss); hmap_insert(todo, &miss->hmap_node, hash); miss->flow = *flow; + miss->key_fitness = key_fitness; miss->key = key; miss->key_len = key_len; list_init(&miss->packets); @@ -2139,6 +2328,7 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, { const struct flow *flow = &miss->flow; struct ofpbuf *packet, *next_packet; + struct subfacet *subfacet; struct facet *facet; facet = facet_lookup_valid(ofproto, flow); @@ -2172,6 +2362,9 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, facet = facet_create(rule, flow); } + subfacet = subfacet_create(ofproto, facet, + miss->key_fitness, miss->key, miss->key_len); + LIST_FOR_EACH_SAFE (packet, next_packet, list_node, &miss->packets) { list_remove(&packet->list_node); ofproto->n_matches++; @@ -2199,7 +2392,7 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, struct flow_miss_op *op = &ops[(*n_ops)++]; struct dpif_execute *execute = &op->dpif_op.execute; - op->facet = facet; + op->subfacet = subfacet; execute->type = DPIF_OP_EXECUTE; execute->key = miss->key; execute->key_len = miss->key_len; @@ -2212,11 +2405,11 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, } } - if (facet->may_install) { + if (facet->may_install && subfacet->key_fitness != ODP_FIT_TOO_LITTLE) { struct flow_miss_op *op = &ops[(*n_ops)++]; struct dpif_flow_put *put = &op->dpif_op.flow_put; - op->facet = facet; + op->subfacet = subfacet; put->type = DPIF_OP_FLOW_PUT; put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; put->key = miss->key; @@ -2250,12 +2443,16 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, * that we can process them together. */ hmap_init(&todo); for (upcall = upcalls; upcall < &upcalls[n_upcalls]; upcall++) { + enum odp_key_fitness fitness; struct flow_miss *miss; struct flow flow; - /* Obtain in_port and tun_id, at least, then set 'flow''s header - * pointers. */ - odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); + /* Obtain metadata and check userspace/kernel agreement on flow match, + * then set 'flow''s header pointers. */ + fitness = odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); + if (fitness == ODP_FIT_ERROR) { + continue; + } flow_extract(upcall->packet, flow.priority, flow.tun_id, flow.in_port, &flow); @@ -2267,7 +2464,8 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, } /* Add other packets to a to-do list. */ - miss = flow_miss_create(&todo, &flow, upcall->key, upcall->key_len); + miss = flow_miss_create(&todo, &flow, fitness, + upcall->key, upcall->key_len); list_push_back(&miss->packets, &upcall->packet->list_node); } @@ -2298,7 +2496,7 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, switch (op->dpif_op.type) { case DPIF_OP_EXECUTE: execute = &op->dpif_op.execute; - if (op->facet->actions != execute->actions) { + if (op->subfacet->facet->actions != execute->actions) { free((struct nlattr *) execute->actions); } ofpbuf_delete((struct ofpbuf *) execute->packet); @@ -2307,7 +2505,7 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, case DPIF_OP_FLOW_PUT: put = &op->dpif_op.flow_put; if (!put->error) { - op->facet->installed = true; + op->subfacet->installed = true; } break; } @@ -2361,10 +2559,10 @@ handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) /* Flow expiration. */ -static int facet_max_idle(const struct ofproto_dpif *); +static int subfacet_max_idle(const struct ofproto_dpif *); static void update_stats(struct ofproto_dpif *); static void rule_expire(struct rule_dpif *); -static void expire_facets(struct ofproto_dpif *, int dp_max_idle); +static void expire_subfacets(struct ofproto_dpif *, int dp_max_idle); /* This function is called periodically by run(). Its job is to collect * updates for the flows that have been installed into the datapath, most @@ -2382,9 +2580,9 @@ expire(struct ofproto_dpif *ofproto) /* Update stats for each flow in the datapath. */ update_stats(ofproto); - /* Expire facets that have been idle too long. */ - dp_max_idle = facet_max_idle(ofproto); - expire_facets(ofproto, dp_max_idle); + /* Expire subfacets that have been idle too long. */ + dp_max_idle = subfacet_max_idle(ofproto); + expire_subfacets(ofproto, dp_max_idle); /* Expire OpenFlow flows whose idle_timeout or hard_timeout has passed. */ OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { @@ -2432,46 +2630,41 @@ update_stats(struct ofproto_dpif *p) dpif_flow_dump_start(&dump, p->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) { - struct facet *facet; + enum odp_key_fitness fitness; + struct subfacet *subfacet; struct flow flow; - if (odp_flow_key_to_flow(key, key_len, &flow)) { - struct ds s; - - ds_init(&s); - odp_flow_key_format(key, key_len, &s); - VLOG_WARN_RL(&rl, "failed to convert datapath flow key to flow: %s", - ds_cstr(&s)); - ds_destroy(&s); - + fitness = odp_flow_key_to_flow(key, key_len, &flow); + if (fitness == ODP_FIT_ERROR) { continue; } - facet = facet_find(p, &flow); - if (facet && facet->installed) { + subfacet = subfacet_find(p, key, key_len, &flow); + if (subfacet && subfacet->installed) { + struct facet *facet = subfacet->facet; - if (stats->n_packets >= facet->dp_packet_count) { - uint64_t extra = stats->n_packets - facet->dp_packet_count; + if (stats->n_packets >= subfacet->dp_packet_count) { + uint64_t extra = stats->n_packets - subfacet->dp_packet_count; facet->packet_count += extra; } else { VLOG_WARN_RL(&rl, "unexpected packet count from the datapath"); } - if (stats->n_bytes >= facet->dp_byte_count) { - facet->byte_count += stats->n_bytes - facet->dp_byte_count; + if (stats->n_bytes >= subfacet->dp_byte_count) { + facet->byte_count += stats->n_bytes - subfacet->dp_byte_count; } else { VLOG_WARN_RL(&rl, "unexpected byte count from datapath"); } - facet->dp_packet_count = stats->n_packets; - facet->dp_byte_count = stats->n_bytes; + subfacet->dp_packet_count = stats->n_packets; + subfacet->dp_byte_count = stats->n_bytes; - facet_update_time(p, facet, stats->used); + subfacet_update_time(p, subfacet, stats->used); facet_account(p, facet); facet_push_stats(facet); } else { - /* There's a flow in the datapath that we know nothing about. - * Delete it. */ + /* There's a flow in the datapath that we know nothing about, or a + * flow that shouldn't be installed but was anyway. Delete it. */ COVERAGE_INC(facet_unexpected); dpif_flow_del(p->dpif, key, key_len, NULL); } @@ -2480,58 +2673,60 @@ update_stats(struct ofproto_dpif *p) } /* Calculates and returns the number of milliseconds of idle time after which - * facets should expire from the datapath and we should fold their statistics - * into their parent rules in userspace. */ + * subfacets should expire from the datapath. When a subfacet expires, we fold + * its statistics into its facet, and when a facet's last subfacet expires, we + * fold its statistic into its rule. */ static int -facet_max_idle(const struct ofproto_dpif *ofproto) +subfacet_max_idle(const struct ofproto_dpif *ofproto) { /* * Idle time histogram. * - * Most of the time a switch has a relatively small number of facets. When - * this is the case we might as well keep statistics for all of them in - * userspace and to cache them in the kernel datapath for performance as + * Most of the time a switch has a relatively small number of subfacets. + * When this is the case we might as well keep statistics for all of them + * in userspace and to cache them in the kernel datapath for performance as * well. * - * As the number of facets increases, the memory required to maintain + * As the number of subfacets increases, the memory required to maintain * statistics about them in userspace and in the kernel becomes - * significant. However, with a large number of facets it is likely that - * only a few of them are "heavy hitters" that consume a large amount of - * bandwidth. At this point, only heavy hitters are worth caching in the - * kernel and maintaining in userspaces; other facets we can discard. + * significant. However, with a large number of subfacets it is likely + * that only a few of them are "heavy hitters" that consume a large amount + * of bandwidth. At this point, only heavy hitters are worth caching in + * the kernel and maintaining in userspaces; other subfacets we can + * discard. * * The technique used to compute the idle time is to build a histogram with - * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each facet + * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet * that is installed in the kernel gets dropped in the appropriate bucket. * After the histogram has been built, we compute the cutoff so that only - * the most-recently-used 1% of facets (but at least + * the most-recently-used 1% of subfacets (but at least * ofproto->up.flow_eviction_threshold flows) are kept cached. At least - * the most-recently-used bucket of facets is kept, so actually an - * arbitrary number of facets can be kept in any given expiration run + * the most-recently-used bucket of subfacets is kept, so actually an + * arbitrary number of subfacets can be kept in any given expiration run * (though the next run will delete most of those unless they receive * additional data). * - * This requires a second pass through the facets, in addition to the pass - * made by update_stats(), because the former function never looks - * at uninstallable facets. + * This requires a second pass through the subfacets, in addition to the + * pass made by update_stats(), because the former function never looks at + * uninstallable subfacets. */ enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) }; enum { N_BUCKETS = 5000 / BUCKET_WIDTH }; int buckets[N_BUCKETS] = { 0 }; int total, subtotal, bucket; - struct facet *facet; + struct subfacet *subfacet; long long int now; int i; - total = hmap_count(&ofproto->facets); + total = hmap_count(&ofproto->subfacets); if (total <= ofproto->up.flow_eviction_threshold) { return N_BUCKETS * BUCKET_WIDTH; } /* Build histogram. */ now = time_msec(); - HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { - long long int idle = now - facet->used; + HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->subfacets) { + long long int idle = now - subfacet->used; int bucket = (idle <= 0 ? 0 : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1 : (unsigned int) idle / BUCKET_WIDTH); @@ -2566,38 +2761,15 @@ facet_max_idle(const struct ofproto_dpif *ofproto) } static void -facet_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet) -{ - if (ofproto->netflow && !facet_is_controller_flow(facet) && - netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) { - struct ofexpired expired; - - if (facet->installed) { - struct dpif_flow_stats stats; - - facet_put__(ofproto, facet, facet->actions, facet->actions_len, - &stats); - facet_update_stats(ofproto, facet, &stats); - } - - expired.flow = facet->flow; - expired.packet_count = facet->packet_count; - expired.byte_count = facet->byte_count; - expired.used = facet->used; - netflow_expire(ofproto->netflow, &facet->nf_flow, &expired); - } -} - -static void -expire_facets(struct ofproto_dpif *ofproto, int dp_max_idle) +expire_subfacets(struct ofproto_dpif *ofproto, int dp_max_idle) { long long int cutoff = time_msec() - dp_max_idle; - struct facet *facet, *next_facet; + struct subfacet *subfacet, *next_subfacet; - HMAP_FOR_EACH_SAFE (facet, next_facet, hmap_node, &ofproto->facets) { - facet_active_timeout(ofproto, facet); - if (facet->used < cutoff) { - facet_remove(ofproto, facet); + HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node, + &ofproto->subfacets) { + if (subfacet->used < cutoff) { + subfacet_destroy(ofproto, subfacet); } } } @@ -2645,7 +2817,10 @@ rule_expire(struct rule_dpif *rule) * the ofproto's classifier table. * * The facet will initially have no ODP actions. The caller should fix that - * by calling facet_make_actions(). */ + * by calling facet_make_actions(). + * + * The facet will initially have no subfacets. The caller should create (at + * least) one subfacet with subfacet_create(). */ static struct facet * facet_create(struct rule_dpif *rule, const struct flow *flow) { @@ -2658,6 +2833,7 @@ facet_create(struct rule_dpif *rule, const struct flow *flow) list_push_back(&rule->facets, &facet->list_node); facet->rule = rule; facet->flow = *flow; + list_init(&facet->subfacets); netflow_flow_init(&facet->nf_flow); netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used); @@ -2726,45 +2902,23 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, return !error; } -/* Executes the actions indicated by 'facet' on 'packet' and credits 'facet''s - * statistics appropriately. 'packet' must have at least sizeof(struct - * ofp_packet_in) bytes of headroom. - * - * For correct results, 'packet' must actually be in 'facet''s flow; that is, - * applying flow_extract() to 'packet' would yield the same flow as - * 'facet->flow'. - * - * 'facet' must have accurately composed datapath actions; that is, it must - * not be in need of revalidation. - * - * Takes ownership of 'packet'. */ -static void -facet_execute(struct ofproto_dpif *ofproto, struct facet *facet, - struct ofpbuf *packet) -{ - struct dpif_flow_stats stats; - - assert(ofpbuf_headroom(packet) >= sizeof(struct ofp_packet_in)); - - dpif_flow_stats_extract(&facet->flow, packet, &stats); - stats.used = time_msec(); - if (execute_odp_actions(ofproto, &facet->flow, - facet->actions, facet->actions_len, packet)) { - facet_update_stats(ofproto, facet, &stats); - } -} - /* Remove 'facet' from 'ofproto' and free up the associated memory: * * - If 'facet' was installed in the datapath, uninstalls it and updates its - * rule's statistics, via facet_uninstall(). + * rule's statistics, via subfacet_uninstall(). * * - Removes 'facet' from its rule and from ofproto->facets. */ static void facet_remove(struct ofproto_dpif *ofproto, struct facet *facet) { - facet_uninstall(ofproto, facet); + struct subfacet *subfacet, *next_subfacet; + + LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node, + &facet->subfacets) { + subfacet_destroy__(ofproto, subfacet); + } + facet_flush_stats(ofproto, facet); hmap_remove(&ofproto->facets, &facet->hmap_node); list_remove(&facet->list_node); @@ -2798,55 +2952,6 @@ facet_make_actions(struct ofproto_dpif *p, struct facet *facet, ofpbuf_delete(odp_actions); } -/* Updates 'facet''s flow in the datapath setting its actions to 'actions_len' - * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters - * in the datapath will be zeroed and 'stats' will be updated with traffic new - * since 'facet' was last updated. - * - * Returns 0 if successful, otherwise a positive errno value.*/ -static int -facet_put__(struct ofproto_dpif *ofproto, struct facet *facet, - const struct nlattr *actions, size_t actions_len, - struct dpif_flow_stats *stats) -{ - struct odputil_keybuf keybuf; - enum dpif_flow_put_flags flags; - struct ofpbuf key; - int ret; - - flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; - if (stats) { - flags |= DPIF_FP_ZERO_STATS; - } - - ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &facet->flow); - - ret = dpif_flow_put(ofproto->dpif, flags, key.data, key.size, - actions, actions_len, stats); - - if (stats) { - facet_reset_dp_stats(facet, stats); - } - - return ret; -} - -/* If 'facet' is installable, inserts or re-inserts it into 'p''s datapath. If - * 'zero_stats' is true, clears any existing statistics from the datapath for - * 'facet'. */ -static void -facet_install(struct ofproto_dpif *p, struct facet *facet, bool zero_stats) -{ - struct dpif_flow_stats stats; - - if (facet->may_install - && !facet_put__(p, facet, facet->actions, facet->actions_len, - zero_stats ? &stats : NULL)) { - facet->installed = true; - } -} - static void facet_account(struct ofproto_dpif *ofproto, struct facet *facet) { @@ -2884,10 +2989,10 @@ facet_account(struct ofproto_dpif *ofproto, struct facet *facet) * hash bucket.) */ vlan_tci = facet->flow.vlan_tci; NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->actions, facet->actions_len) { + const struct ovs_action_push_vlan *vlan; struct ofport_dpif *port; switch (nl_attr_type(a)) { - const struct nlattr *nested; case OVS_ACTION_ATTR_OUTPUT: port = get_odp_port(ofproto, nl_attr_get_u32(a)); if (port && port->bundle && port->bundle->bond) { @@ -2896,50 +3001,18 @@ facet_account(struct ofproto_dpif *ofproto, struct facet *facet) } break; - case OVS_ACTION_ATTR_POP: - if (nl_attr_get_u16(a) == OVS_KEY_ATTR_8021Q) { - vlan_tci = htons(0); - } + case OVS_ACTION_ATTR_POP_VLAN: + vlan_tci = htons(0); break; - case OVS_ACTION_ATTR_PUSH: - nested = nl_attr_get(a); - if (nl_attr_type(nested) == OVS_KEY_ATTR_8021Q) { - const struct ovs_key_8021q *q_key; - - q_key = nl_attr_get_unspec(nested, sizeof(*q_key)); - vlan_tci = q_key->q_tci; - } + case OVS_ACTION_ATTR_PUSH_VLAN: + vlan = nl_attr_get(a); + vlan_tci = vlan->vlan_tci; break; } } } -/* If 'rule' is installed in the datapath, uninstalls it. */ -static void -facet_uninstall(struct ofproto_dpif *p, struct facet *facet) -{ - if (facet->installed) { - struct odputil_keybuf keybuf; - struct dpif_flow_stats stats; - struct ofpbuf key; - int error; - - ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &facet->flow); - - error = dpif_flow_del(p->dpif, key.data, key.size, &stats); - facet_reset_dp_stats(facet, &stats); - if (!error) { - facet_update_stats(p, facet, &stats); - } - facet->installed = false; - } else { - assert(facet->dp_packet_count == 0); - assert(facet->dp_byte_count == 0); - } -} - /* Returns true if the only action for 'facet' is to send to the controller. * (We don't report NetFlow expiration messages for such facets because they * are just part of the control logic for the network, not real traffic). */ @@ -2952,24 +3025,6 @@ facet_is_controller_flow(struct facet *facet) htons(OFPP_CONTROLLER))); } -/* Resets 'facet''s datapath statistics counters. This should be called when - * 'facet''s statistics are cleared in the datapath. If 'stats' is non-null, - * it should contain the statistics returned by dpif when 'facet' was reset in - * the datapath. 'stats' will be modified to only included statistics new - * since 'facet' was last updated. */ -static void -facet_reset_dp_stats(struct facet *facet, struct dpif_flow_stats *stats) -{ - if (stats && facet->dp_packet_count <= stats->n_packets - && facet->dp_byte_count <= stats->n_bytes) { - stats->n_packets -= facet->dp_packet_count; - stats->n_bytes -= facet->dp_byte_count; - } - - facet->dp_packet_count = 0; - facet->dp_byte_count = 0; -} - /* Folds all of 'facet''s statistics into its rule. Also updates the * accounting ofhook and emits a NetFlow expiration if appropriate. All of * 'facet''s statistics in the datapath should have been zeroed and folded into @@ -2977,8 +3032,12 @@ facet_reset_dp_stats(struct facet *facet, struct dpif_flow_stats *stats) static void facet_flush_stats(struct ofproto_dpif *ofproto, struct facet *facet) { - assert(!facet->dp_byte_count); - assert(!facet->dp_packet_count); + struct subfacet *subfacet; + + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + assert(!subfacet->dp_byte_count); + assert(!subfacet->dp_packet_count); + } facet_push_stats(facet); facet_account(ofproto, facet); @@ -3061,7 +3120,9 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) struct action_xlate_ctx ctx; struct ofpbuf *odp_actions; struct rule_dpif *new_rule; + struct subfacet *subfacet; bool actions_changed; + bool flush_stats; COVERAGE_INC(facet_revalidate); @@ -3087,19 +3148,24 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) /* If the datapath actions changed or the installability changed, * then we need to talk to the datapath. */ - if (actions_changed || ctx.may_set_up_flow != facet->installed) { - if (ctx.may_set_up_flow) { - struct dpif_flow_stats stats; - - facet_put__(ofproto, facet, - odp_actions->data, odp_actions->size, &stats); - facet_update_stats(ofproto, facet, &stats); - } else { - facet_uninstall(ofproto, facet); + flush_stats = false; + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + bool should_install = (ctx.may_set_up_flow + && subfacet->key_fitness != ODP_FIT_TOO_LITTLE); + if (actions_changed || should_install != subfacet->installed) { + if (should_install) { + struct dpif_flow_stats stats; + + subfacet_install(ofproto, subfacet, + odp_actions->data, odp_actions->size, &stats); + subfacet_update_stats(ofproto, subfacet, &stats); + } else { + subfacet_uninstall(ofproto, subfacet); + } + flush_stats = true; } - - /* The datapath flow is gone or has zeroed stats, so push stats out of - * 'facet' into 'rule'. */ + } + if (flush_stats) { facet_flush_stats(ofproto, facet); } @@ -3143,25 +3209,6 @@ facet_update_time(struct ofproto_dpif *ofproto, struct facet *facet, } } -/* Folds the statistics from 'stats' into the counters in 'facet'. - * - * Because of the meaning of a facet's counters, it only makes sense to do this - * if 'stats' are not tracked in the datapath, that is, if 'stats' represents a - * packet that was sent by hand or if it represents statistics that have been - * cleared out of the datapath. */ -static void -facet_update_stats(struct ofproto_dpif *ofproto, struct facet *facet, - const struct dpif_flow_stats *stats) -{ - if (stats->n_packets || stats->used > facet->used) { - facet_update_time(ofproto, facet, stats->used); - facet->packet_count += stats->n_packets; - facet->byte_count += stats->n_bytes; - facet_push_stats(facet); - netflow_flow_update_flags(&facet->nf_flow, stats->tcp_flags); - } -} - static void facet_reset_counters(struct facet *facet) { @@ -3217,7 +3264,7 @@ push_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) * 'rule''s actions. */ static void flow_push_stats(const struct rule_dpif *rule, - struct flow *flow, uint64_t packets, uint64_t bytes, + const struct flow *flow, uint64_t packets, uint64_t bytes, long long int used) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); @@ -3233,7 +3280,226 @@ flow_push_stats(const struct rule_dpif *rule, rule->up.actions, rule->up.n_actions)); } -/* Rules. */ +/* Subfacets. */ + +static struct subfacet * +subfacet_find__(struct ofproto_dpif *ofproto, + const struct nlattr *key, size_t key_len, uint32_t key_hash, + const struct flow *flow) +{ + struct subfacet *subfacet; + + HMAP_FOR_EACH_WITH_HASH (subfacet, hmap_node, key_hash, + &ofproto->subfacets) { + if (subfacet->key + ? (subfacet->key_len == key_len + && !memcmp(key, subfacet->key, key_len)) + : flow_equal(flow, &subfacet->facet->flow)) { + return subfacet; + } + } + + return NULL; +} + +/* Searches 'facet' (within 'ofproto') for a subfacet with the specified + * 'key_fitness', 'key', and 'key_len'. Returns the existing subfacet if + * there is one, otherwise creates and returns a new subfacet. */ +static struct subfacet * +subfacet_create(struct ofproto_dpif *ofproto, struct facet *facet, + enum odp_key_fitness key_fitness, + const struct nlattr *key, size_t key_len) +{ + uint32_t key_hash = odp_flow_key_hash(key, key_len); + struct subfacet *subfacet; + + subfacet = subfacet_find__(ofproto, key, key_len, key_hash, &facet->flow); + if (subfacet) { + if (subfacet->facet == facet) { + return subfacet; + } + + /* This shouldn't happen. */ + VLOG_ERR_RL(&rl, "subfacet with wrong facet"); + subfacet_destroy(ofproto, subfacet); + } + + subfacet = xzalloc(sizeof *subfacet); + hmap_insert(&ofproto->subfacets, &subfacet->hmap_node, key_hash); + list_push_back(&facet->subfacets, &subfacet->list_node); + subfacet->facet = facet; + subfacet->used = time_msec(); + subfacet->key_fitness = key_fitness; + if (key_fitness != ODP_FIT_PERFECT) { + subfacet->key = xmemdup(key, key_len); + subfacet->key_len = key_len; + } + subfacet->installed = false; + + return subfacet; +} + +/* Searches 'ofproto' for a subfacet with the given 'key', 'key_len', and + * 'flow'. Returns the subfacet if one exists, otherwise NULL. */ +static struct subfacet * +subfacet_find(struct ofproto_dpif *ofproto, + const struct nlattr *key, size_t key_len, + const struct flow *flow) +{ + uint32_t key_hash = odp_flow_key_hash(key, key_len); + + return subfacet_find__(ofproto, key, key_len, key_hash, flow); +} + +/* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from + * its facet within 'ofproto', and frees it. */ +static void +subfacet_destroy__(struct ofproto_dpif *ofproto, struct subfacet *subfacet) +{ + subfacet_uninstall(ofproto, subfacet); + hmap_remove(&ofproto->subfacets, &subfacet->hmap_node); + list_remove(&subfacet->list_node); + free(subfacet->key); + free(subfacet); +} + +/* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the + * last remaining subfacet in its facet destroys the facet too. */ +static void +subfacet_destroy(struct ofproto_dpif *ofproto, struct subfacet *subfacet) +{ + struct facet *facet = subfacet->facet; + + subfacet_destroy__(ofproto, subfacet); + if (list_is_empty(&facet->subfacets)) { + facet_remove(ofproto, facet); + } +} + +/* Initializes 'key' with the sequence of OVS_KEY_ATTR_* Netlink attributes + * that can be used to refer to 'subfacet'. The caller must provide 'keybuf' + * for use as temporary storage. */ +static void +subfacet_get_key(struct subfacet *subfacet, struct odputil_keybuf *keybuf, + struct ofpbuf *key) +{ + if (!subfacet->key) { + ofpbuf_use_stack(key, keybuf, sizeof *keybuf); + odp_flow_key_from_flow(key, &subfacet->facet->flow); + } else { + ofpbuf_use_const(key, subfacet->key, subfacet->key_len); + } +} + +/* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len' + * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters + * in the datapath will be zeroed and 'stats' will be updated with traffic new + * since 'subfacet' was last updated. + * + * Returns 0 if successful, otherwise a positive errno value. */ +static int +subfacet_install(struct ofproto_dpif *ofproto, struct subfacet *subfacet, + const struct nlattr *actions, size_t actions_len, + struct dpif_flow_stats *stats) +{ + struct odputil_keybuf keybuf; + enum dpif_flow_put_flags flags; + struct ofpbuf key; + int ret; + + flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; + if (stats) { + flags |= DPIF_FP_ZERO_STATS; + } + + subfacet_get_key(subfacet, &keybuf, &key); + ret = dpif_flow_put(ofproto->dpif, flags, key.data, key.size, + actions, actions_len, stats); + + if (stats) { + subfacet_reset_dp_stats(subfacet, stats); + } + + return ret; +} + +/* If 'subfacet' is installed in the datapath, uninstalls it. */ +static void +subfacet_uninstall(struct ofproto_dpif *p, struct subfacet *subfacet) +{ + if (subfacet->installed) { + struct odputil_keybuf keybuf; + struct dpif_flow_stats stats; + struct ofpbuf key; + int error; + + subfacet_get_key(subfacet, &keybuf, &key); + error = dpif_flow_del(p->dpif, key.data, key.size, &stats); + subfacet_reset_dp_stats(subfacet, &stats); + if (!error) { + subfacet_update_stats(p, subfacet, &stats); + } + subfacet->installed = false; + } else { + assert(subfacet->dp_packet_count == 0); + assert(subfacet->dp_byte_count == 0); + } +} + +/* Resets 'subfacet''s datapath statistics counters. This should be called + * when 'subfacet''s statistics are cleared in the datapath. If 'stats' is + * non-null, it should contain the statistics returned by dpif when 'subfacet' + * was reset in the datapath. 'stats' will be modified to include only + * statistics new since 'subfacet' was last updated. */ +static void +subfacet_reset_dp_stats(struct subfacet *subfacet, + struct dpif_flow_stats *stats) +{ + if (stats + && subfacet->dp_packet_count <= stats->n_packets + && subfacet->dp_byte_count <= stats->n_bytes) { + stats->n_packets -= subfacet->dp_packet_count; + stats->n_bytes -= subfacet->dp_byte_count; + } + + subfacet->dp_packet_count = 0; + subfacet->dp_byte_count = 0; +} + +/* Updates 'subfacet''s used time. The caller is responsible for calling + * facet_push_stats() to update the flows which 'subfacet' resubmits into. */ +static void +subfacet_update_time(struct ofproto_dpif *ofproto, struct subfacet *subfacet, + long long int used) +{ + if (used > subfacet->used) { + subfacet->used = used; + facet_update_time(ofproto, subfacet->facet, used); + } +} + +/* Folds the statistics from 'stats' into the counters in 'subfacet'. + * + * Because of the meaning of a subfacet's counters, it only makes sense to do + * this if 'stats' are not tracked in the datapath, that is, if 'stats' + * represents a packet that was sent by hand or if it represents statistics + * that have been cleared out of the datapath. */ +static void +subfacet_update_stats(struct ofproto_dpif *ofproto, struct subfacet *subfacet, + const struct dpif_flow_stats *stats) +{ + if (stats->n_packets || stats->used > subfacet->used) { + struct facet *facet = subfacet->facet; + + subfacet_update_time(ofproto, subfacet, stats->used); + facet->packet_count += stats->n_packets; + facet->byte_count += stats->n_bytes; + facet_push_stats(facet); + netflow_flow_update_flags(&facet->nf_flow, stats->tcp_flags); + } +} + +/* Rules. */ static struct rule_dpif * rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, @@ -3247,7 +3513,7 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, } cls = &ofproto->up.tables[table_id]; - if (flow->tos_frag & FLOW_FRAG_ANY + if (flow->nw_frag & FLOW_NW_FRAG_ANY && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { /* For OFPC_NORMAL frag_handling, we must pretend that transport ports * are unavailable. */ @@ -3375,37 +3641,15 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes) } static int -rule_execute(struct rule *rule_, struct flow *flow, struct ofpbuf *packet) +rule_execute(struct rule *rule_, const struct flow *flow, + struct ofpbuf *packet) { struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct action_xlate_ctx ctx; struct ofpbuf *odp_actions; - struct facet *facet; size_t size; - /* First look for a related facet. If we find one, account it to that. */ - facet = facet_lookup_valid(ofproto, flow); - if (facet && facet->rule == rule) { - if (!facet->may_install) { - facet_make_actions(ofproto, facet, packet); - } - facet_execute(ofproto, facet, packet); - return 0; - } - - /* Otherwise, if 'rule' is in fact the correct rule for 'packet', then - * create a new facet for it and use that. */ - if (rule_dpif_lookup(ofproto, flow, 0) == rule) { - facet = facet_create(rule, flow); - facet_make_actions(ofproto, facet, packet); - facet_execute(ofproto, facet, packet); - facet_install(ofproto, facet, true); - return 0; - } - - /* We can't account anything to a facet. If we were to try, then that - * facet would have a non-matching rule, busting our invariants. */ action_xlate_ctx_init(&ctx, ofproto, flow, packet); odp_actions = xlate_actions(&ctx, rule->up.actions, rule->up.n_actions); size = packet->size; @@ -3482,19 +3726,12 @@ put_userspace_action(const struct ofproto_dpif *ofproto, const struct flow *flow, const struct user_action_cookie *cookie) { - size_t offset; uint32_t pid; pid = dpif_port_get_pid(ofproto->dpif, ofp_port_to_odp_port(flow->in_port)); - offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); - nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); - nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, - cookie, sizeof *cookie); - nl_msg_end_nested(odp_actions, offset); - - return odp_actions->size - NLA_ALIGN(sizeof *cookie); + return odp_put_userspace_action(pid, cookie, odp_actions); } /* Compose SAMPLE action for sFlow. */ @@ -3585,13 +3822,10 @@ fix_sflow_action(struct action_xlate_ctx *ctx) } static void -commit_action__(struct ofpbuf *odp_actions, - enum ovs_action_attr act_type, - enum ovs_key_attr key_type, - const void *key, size_t key_size) +commit_set_action(struct ofpbuf *odp_actions, enum ovs_key_attr key_type, + const void *key, size_t key_size) { - size_t offset = nl_msg_start_nested(odp_actions, act_type); - + size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); nl_msg_put_unspec(odp_actions, key_type, key, key_size); nl_msg_end_nested(odp_actions, offset); } @@ -3605,8 +3839,8 @@ commit_set_tun_id_action(const struct flow *flow, struct flow *base, } base->tun_id = flow->tun_id; - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_TUN_ID, &base->tun_id, sizeof(base->tun_id)); + commit_set_action(odp_actions, OVS_KEY_ATTR_TUN_ID, + &base->tun_id, sizeof(base->tun_id)); } static void @@ -3626,41 +3860,37 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base, memcpy(eth_key.eth_src, base->dl_src, ETH_ADDR_LEN); memcpy(eth_key.eth_dst, base->dl_dst, ETH_ADDR_LEN); - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_ETHERNET, ð_key, sizeof(eth_key)); + commit_set_action(odp_actions, OVS_KEY_ATTR_ETHERNET, + ð_key, sizeof(eth_key)); } static void -commit_vlan_action(struct action_xlate_ctx *ctx, ovs_be16 new_tci) +commit_vlan_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) { - struct flow *base = &ctx->base_flow; - - if (base->vlan_tci == new_tci) { + if (base->vlan_tci == flow->vlan_tci) { return; } if (base->vlan_tci & htons(VLAN_CFI)) { - nl_msg_put_u16(ctx->odp_actions, OVS_ACTION_ATTR_POP, - OVS_KEY_ATTR_8021Q); + nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); } - if (new_tci & htons(VLAN_CFI)) { - struct ovs_key_8021q q_key; + if (flow->vlan_tci & htons(VLAN_CFI)) { + struct ovs_action_push_vlan vlan; - q_key.q_tpid = htons(ETH_TYPE_VLAN); - q_key.q_tci = new_tci & ~htons(VLAN_CFI); - - commit_action__(ctx->odp_actions, OVS_ACTION_ATTR_PUSH, - OVS_KEY_ATTR_8021Q, &q_key, sizeof(q_key)); + vlan.vlan_tpid = htons(ETH_TYPE_VLAN); + vlan.vlan_tci = flow->vlan_tci; + nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, + &vlan, sizeof vlan); } - base->vlan_tci = new_tci; + base->vlan_tci = flow->vlan_tci; } static void commit_set_nw_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions) { - int frag = base->tos_frag & FLOW_FRAG_MASK; struct ovs_key_ipv4 ipv4_key; if (base->dl_type != htons(ETH_TYPE_IP) || @@ -3670,22 +3900,23 @@ commit_set_nw_action(const struct flow *flow, struct flow *base, if (base->nw_src == flow->nw_src && base->nw_dst == flow->nw_dst && - base->tos_frag == flow->tos_frag) { + base->nw_tos == flow->nw_tos && + base->nw_ttl == flow->nw_ttl && + base->nw_frag == flow->nw_frag) { return; } - - memset(&ipv4_key, 0, sizeof(ipv4_key)); ipv4_key.ipv4_src = base->nw_src = flow->nw_src; ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; + ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos; + ipv4_key.ipv4_ttl = base->nw_ttl = flow->nw_ttl; ipv4_key.ipv4_proto = base->nw_proto; - ipv4_key.ipv4_tos = flow->tos_frag & IP_DSCP_MASK; - ipv4_key.ipv4_frag = (frag == 0 ? OVS_FRAG_TYPE_NONE - : frag == FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST - : OVS_FRAG_TYPE_LATER); + ipv4_key.ipv4_frag = (base->nw_frag == 0 ? OVS_FRAG_TYPE_NONE + : base->nw_frag == FLOW_NW_FRAG_ANY + ? OVS_FRAG_TYPE_FIRST : OVS_FRAG_TYPE_LATER); - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(ipv4_key)); + commit_set_action(odp_actions, OVS_KEY_ATTR_IPV4, + &ipv4_key, sizeof(ipv4_key)); } static void @@ -3707,8 +3938,8 @@ commit_set_port_action(const struct flow *flow, struct flow *base, port_key.tcp_src = base->tp_src = flow->tp_src; port_key.tcp_dst = base->tp_dst = flow->tp_dst; - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_TCP, &port_key, sizeof(port_key)); + commit_set_action(odp_actions, OVS_KEY_ATTR_TCP, + &port_key, sizeof(port_key)); } else if (flow->nw_proto == IPPROTO_UDP) { struct ovs_key_udp port_key; @@ -3716,8 +3947,8 @@ commit_set_port_action(const struct flow *flow, struct flow *base, port_key.udp_src = base->tp_src = flow->tp_src; port_key.udp_dst = base->tp_dst = flow->tp_dst; - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_UDP, &port_key, sizeof(port_key)); + commit_set_action(odp_actions, OVS_KEY_ATTR_UDP, + &port_key, sizeof(port_key)); } } @@ -3730,9 +3961,8 @@ commit_set_priority_action(const struct flow *flow, struct flow *base, } base->priority = flow->priority; - commit_action__(odp_actions, OVS_ACTION_ATTR_SET, - OVS_KEY_ATTR_PRIORITY, &base->priority, - sizeof(base->priority)); + commit_set_action(odp_actions, OVS_KEY_ATTR_PRIORITY, + &base->priority, sizeof(base->priority)); } static void @@ -3744,43 +3974,51 @@ commit_odp_actions(struct action_xlate_ctx *ctx) commit_set_tun_id_action(flow, base, odp_actions); commit_set_ether_addr_action(flow, base, odp_actions); - commit_vlan_action(ctx, flow->vlan_tci); + commit_vlan_action(flow, base, odp_actions); commit_set_nw_action(flow, base, odp_actions); commit_set_port_action(flow, base, odp_actions); commit_set_priority_action(flow, base, odp_actions); } static void -compose_output_action(struct action_xlate_ctx *ctx, uint16_t odp_port) -{ - nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); - ctx->sflow_odp_port = odp_port; - ctx->sflow_n_outputs++; -} - -static void -add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) +compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, + bool check_stp) { const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port); uint16_t odp_port = ofp_port_to_odp_port(ofp_port); + uint8_t flow_nw_tos = ctx->flow.nw_tos; if (ofport) { + struct priority_to_dscp *pdscp; + if (ofport->up.opp.config & htonl(OFPPC_NO_FWD) - || !stp_forward_in_state(ofport->stp_state)) { - /* Forwarding disabled on port. */ + || (check_stp && !stp_forward_in_state(ofport->stp_state))) { return; } + + pdscp = get_priority(ofport, ctx->flow.priority); + if (pdscp) { + ctx->flow.nw_tos &= ~IP_DSCP_MASK; + ctx->flow.nw_tos |= pdscp->dscp; + } } else { - /* - * We don't have an ofport record for this port, but it doesn't hurt to - * allow forwarding to it anyhow. Maybe such a port will appear later - * and we're pre-populating the flow table. - */ + /* We may not have an ofport record for this port, but it doesn't hurt + * to allow forwarding to it anyhow. Maybe such a port will appear + * later and we're pre-populating the flow table. */ } commit_odp_actions(ctx); - compose_output_action(ctx, odp_port); + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); + ctx->sflow_odp_port = odp_port; + ctx->sflow_n_outputs++; ctx->nf_output_iface = ofp_port; + ctx->flow.nw_tos = flow_nw_tos; +} + +static void +compose_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) +{ + compose_output_action__(ctx, ofp_port, true); } static void @@ -3852,17 +4090,22 @@ xlate_resubmit_table(struct action_xlate_ctx *ctx, } static void -flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) +flood_packets(struct action_xlate_ctx *ctx, bool all) { struct ofport_dpif *ofport; commit_odp_actions(ctx); HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { uint16_t ofp_port = ofport->up.ofp_port; - if (ofp_port != ctx->flow.in_port - && !(ofport->up.opp.config & mask) - && stp_forward_in_state(ofport->stp_state)) { - compose_output_action(ctx, ofport->odp_port); + + if (ofp_port == ctx->flow.in_port) { + continue; + } + + if (all) { + compose_output_action__(ctx, ofp_port, false); + } else if (!(ofport->up.opp.config & htonl(OFPPC_NO_FLOOD))) { + compose_output_action(ctx, ofp_port); } } @@ -3874,6 +4117,7 @@ compose_controller_action(struct action_xlate_ctx *ctx, int len) { struct user_action_cookie cookie; + commit_odp_actions(ctx); cookie.type = USER_ACTION_COOKIE_CONTROLLER; cookie.data = len; cookie.n_output = 0; @@ -3891,7 +4135,7 @@ xlate_output_action__(struct action_xlate_ctx *ctx, switch (port) { case OFPP_IN_PORT: - add_output_action(ctx, ctx->flow.in_port); + compose_output_action(ctx, ctx->flow.in_port); break; case OFPP_TABLE: xlate_table_action(ctx, ctx->flow.in_port, ctx->table_id); @@ -3900,23 +4144,22 @@ xlate_output_action__(struct action_xlate_ctx *ctx, xlate_normal(ctx); break; case OFPP_FLOOD: - flood_packets(ctx, htonl(OFPPC_NO_FLOOD)); + flood_packets(ctx, false); break; case OFPP_ALL: - flood_packets(ctx, htonl(0)); + flood_packets(ctx, true); break; case OFPP_CONTROLLER: - commit_odp_actions(ctx); compose_controller_action(ctx, max_len); break; case OFPP_LOCAL: - add_output_action(ctx, OFPP_LOCAL); + compose_output_action(ctx, OFPP_LOCAL); break; case OFPP_NONE: break; default: if (port != ctx->flow.in_port) { - add_output_action(ctx, port); + compose_output_action(ctx, port); } break; } @@ -3955,7 +4198,7 @@ static void xlate_enqueue_action(struct action_xlate_ctx *ctx, const struct ofp_action_enqueue *oae) { - uint16_t ofp_port, odp_port; + uint16_t ofp_port; uint32_t flow_priority, priority; int error; @@ -3974,17 +4217,16 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, } else if (ofp_port == ctx->flow.in_port) { return; } - odp_port = ofp_port_to_odp_port(ofp_port); /* Add datapath actions. */ flow_priority = ctx->flow.priority; ctx->flow.priority = priority; - add_output_action(ctx, odp_port); + compose_output_action(ctx, ofp_port); ctx->flow.priority = flow_priority; /* Update NetFlow output port. */ if (ctx->nf_output_iface == NF_OUT_DROP) { - ctx->nf_output_iface = odp_port; + ctx->nf_output_iface = ofp_port; } else if (ctx->nf_output_iface != NF_OUT_FLOOD) { ctx->nf_output_iface = NF_OUT_MULTI; } @@ -4166,8 +4408,8 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, break; case OFPUTIL_OFPAT_SET_NW_TOS: - ctx->flow.tos_frag &= ~IP_DSCP_MASK; - ctx->flow.tos_frag |= ia->nw_tos.nw_tos & IP_DSCP_MASK; + ctx->flow.nw_tos &= ~IP_DSCP_MASK; + ctx->flow.nw_tos |= ia->nw_tos.nw_tos & IP_DSCP_MASK; break; case OFPUTIL_OFPAT_SET_TP_SRC: @@ -4308,7 +4550,7 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->table_id = 0; ctx->exit = false; - if (ctx->flow.tos_frag & FLOW_FRAG_ANY) { + if (ctx->flow.nw_frag & FLOW_NW_FRAG_ANY) { switch (ctx->ofproto->up.frag_handling) { case OFPC_FRAG_NORMAL: /* We must pretend that transport ports are unavailable. */ @@ -4342,7 +4584,7 @@ xlate_actions(struct action_xlate_ctx *ctx, if (ctx->packet && connmgr_msg_in_hook(ctx->ofproto->up.connmgr, &ctx->flow, ctx->packet)) { - compose_output_action(ctx, OVSP_LOCAL); + compose_output_action(ctx, OFPP_LOCAL); } } fix_sflow_action(ctx); @@ -4353,21 +4595,6 @@ xlate_actions(struct action_xlate_ctx *ctx, /* OFPP_NORMAL implementation. */ -struct dst { - struct ofport_dpif *port; - uint16_t vid; -}; - -struct dst_set { - struct dst builtin[32]; - struct dst *dsts; - size_t n, allocated; -}; - -static void dst_set_init(struct dst_set *); -static void dst_set_add(struct dst_set *, const struct dst *); -static void dst_set_free(struct dst_set *); - static struct ofport_dpif *ofbundle_get_a_port(const struct ofbundle *); /* Given 'vid', the VID obtained from the 802.1Q header that was received as @@ -4396,6 +4623,58 @@ input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid) } } +/* Checks whether a packet with the given 'vid' may ingress on 'in_bundle'. + * If so, returns true. Otherwise, returns false and, if 'warn' is true, logs + * a warning. + * + * 'vid' should be the VID obtained from the 802.1Q header that was received as + * part of a packet (specify 0 if there was no 802.1Q header), in the range + * 0...4095. */ +static bool +input_vid_is_valid(uint16_t vid, struct ofbundle *in_bundle, bool warn) +{ + switch (in_bundle->vlan_mode) { + case PORT_VLAN_ACCESS: + if (vid) { + if (warn) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged " + "packet received on port %s configured as VLAN " + "%"PRIu16" access port", + in_bundle->ofproto->up.name, vid, + in_bundle->name, in_bundle->vlan); + } + return false; + } + return true; + + case PORT_VLAN_NATIVE_UNTAGGED: + case PORT_VLAN_NATIVE_TAGGED: + if (!vid) { + /* Port must always carry its native VLAN. */ + return true; + } + /* Fall through. */ + case PORT_VLAN_TRUNK: + if (!ofbundle_includes_vlan(in_bundle, vid)) { + if (warn) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" packet " + "received on port %s not configured for trunking " + "VLAN %"PRIu16, + in_bundle->ofproto->up.name, vid, + in_bundle->name, vid); + } + return false; + } + return true; + + default: + NOT_REACHED(); + } + +} + /* Given 'vlan', the VLAN that a packet belongs to, and * 'out_bundle', a bundle on which the packet is to be output, returns the VID * that should be included in the 802.1Q header. (If the return value is 0, @@ -4422,20 +4701,38 @@ output_vlan_to_vid(const struct ofbundle *out_bundle, uint16_t vlan) } } -static bool -set_dst(struct action_xlate_ctx *ctx, struct dst *dst, - const struct ofbundle *in_bundle, const struct ofbundle *out_bundle) +static void +output_normal(struct action_xlate_ctx *ctx, const struct ofbundle *out_bundle, + uint16_t vlan) { - uint16_t vlan; + struct ofport_dpif *port; + uint16_t vid; + ovs_be16 tci, old_tci; + + vid = output_vlan_to_vid(out_bundle, vlan); + if (!out_bundle->bond) { + port = ofbundle_get_a_port(out_bundle); + } else { + port = bond_choose_output_slave(out_bundle->bond, &ctx->flow, + vid, &ctx->tags); + if (!port) { + /* No slaves enabled, so drop packet. */ + return; + } + } - vlan = input_vid_to_vlan(in_bundle, vlan_tci_to_vid(ctx->flow.vlan_tci)); - dst->vid = output_vlan_to_vid(out_bundle, vlan); + old_tci = ctx->flow.vlan_tci; + tci = htons(vid); + if (tci || out_bundle->use_priority_tags) { + tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK); + if (tci) { + tci |= htons(VLAN_CFI); + } + } + ctx->flow.vlan_tci = tci; - dst->port = (!out_bundle->bond - ? ofbundle_get_a_port(out_bundle) - : bond_choose_output_slave(out_bundle->bond, &ctx->flow, - dst->vid, &ctx->tags)); - return dst->port != NULL; + compose_output_action(ctx, port->up.ofp_port); + ctx->flow.vlan_tci = old_tci; } static int @@ -4445,54 +4742,6 @@ mirror_mask_ffs(mirror_mask_t mask) return ffs(mask); } -static void -dst_set_init(struct dst_set *set) -{ - set->dsts = set->builtin; - set->n = 0; - set->allocated = ARRAY_SIZE(set->builtin); -} - -static void -dst_set_add(struct dst_set *set, const struct dst *dst) -{ - if (set->n >= set->allocated) { - size_t new_allocated; - struct dst *new_dsts; - - new_allocated = set->allocated * 2; - new_dsts = xmalloc(new_allocated * sizeof *new_dsts); - memcpy(new_dsts, set->dsts, set->n * sizeof *new_dsts); - - dst_set_free(set); - - set->dsts = new_dsts; - set->allocated = new_allocated; - } - set->dsts[set->n++] = *dst; -} - -static void -dst_set_free(struct dst_set *set) -{ - if (set->dsts != set->builtin) { - free(set->dsts); - } -} - -static bool -dst_is_duplicate(const struct dst_set *set, const struct dst *test) -{ - size_t i; - for (i = 0; i < set->n; i++) { - if (set->dsts[i].vid == test->vid - && set->dsts[i].port == test->port) { - return true; - } - } - return false; -} - static bool ofbundle_trunks_vlan(const struct ofbundle *bundle, uint16_t vlan) { @@ -4514,12 +4763,12 @@ ofbundle_get_a_port(const struct ofbundle *bundle) struct ofport_dpif, bundle_node); } -static void +static mirror_mask_t compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan, const struct ofbundle *in_bundle, - const struct ofbundle *out_bundle, struct dst_set *set) + const struct ofbundle *out_bundle) { - struct dst dst; + mirror_mask_t dst_mirrors = 0; if (out_bundle == OFBUNDLE_FLOOD) { struct ofbundle *bundle; @@ -4528,16 +4777,18 @@ compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan, if (bundle != in_bundle && ofbundle_includes_vlan(bundle, vlan) && bundle->floodable - && !bundle->mirror_out - && set_dst(ctx, &dst, in_bundle, bundle)) { - dst_set_add(set, &dst); + && !bundle->mirror_out) { + output_normal(ctx, bundle, vlan); + dst_mirrors |= bundle->dst_mirrors; } } ctx->nf_output_iface = NF_OUT_FLOOD; - } else if (out_bundle && set_dst(ctx, &dst, in_bundle, out_bundle)) { - dst_set_add(set, &dst); - ctx->nf_output_iface = dst.port->odp_port; + } else if (out_bundle) { + output_normal(ctx, out_bundle, vlan); + dst_mirrors = out_bundle->dst_mirrors; } + + return dst_mirrors; } static bool @@ -4588,153 +4839,41 @@ eth_dst_may_rspan(const uint8_t dst[ETH_ADDR_LEN]) } static void -compose_mirror_dsts(struct action_xlate_ctx *ctx, - uint16_t vlan, const struct ofbundle *in_bundle, - struct dst_set *set) +output_mirrors(struct action_xlate_ctx *ctx, + uint16_t vlan, const struct ofbundle *in_bundle, + mirror_mask_t dst_mirrors) { struct ofproto_dpif *ofproto = ctx->ofproto; mirror_mask_t mirrors; - uint16_t flow_vid; - size_t i; - - mirrors = in_bundle->src_mirrors; - for (i = 0; i < set->n; i++) { - mirrors |= set->dsts[i].port->bundle->dst_mirrors; - } + mirrors = in_bundle->src_mirrors | dst_mirrors; if (!mirrors) { return; } - flow_vid = vlan_tci_to_vid(ctx->flow.vlan_tci); while (mirrors) { - struct ofmirror *m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1]; - if (vlan_is_mirrored(m, vlan)) { - struct dst dst; - - if (m->out) { - if (set_dst(ctx, &dst, in_bundle, m->out) - && !dst_is_duplicate(set, &dst)) { - dst_set_add(set, &dst); - } - } else if (eth_dst_may_rspan(ctx->flow.dl_dst)) { - struct ofbundle *bundle; - - HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { - if (ofbundle_includes_vlan(bundle, m->out_vlan) - && set_dst(ctx, &dst, in_bundle, bundle)) - { - /* set_dst() got dst->vid from the input packet's VLAN, - * not from m->out_vlan, so recompute it. */ - dst.vid = output_vlan_to_vid(bundle, m->out_vlan); - - if (dst_is_duplicate(set, &dst)) { - continue; - } - - if (bundle == in_bundle && dst.vid == flow_vid) { - /* Don't send out input port on same VLAN. */ - continue; - } - dst_set_add(set, &dst); - } - } - } - } - mirrors &= mirrors - 1; - } -} + struct ofmirror *m; -static void -compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan, - const struct ofbundle *in_bundle, - const struct ofbundle *out_bundle) -{ - uint16_t initial_vid, cur_vid; - const struct dst *dst; - struct dst_set set; - - dst_set_init(&set); - compose_dsts(ctx, vlan, in_bundle, out_bundle, &set); - compose_mirror_dsts(ctx, vlan, in_bundle, &set); - if (!set.n) { - dst_set_free(&set); - return; - } + m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1]; - /* Output all the packets we can without having to change the VLAN. */ - commit_odp_actions(ctx); - initial_vid = vlan_tci_to_vid(ctx->flow.vlan_tci); - for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) { - if (dst->vid != initial_vid) { + if (!vlan_is_mirrored(m, vlan)) { + mirrors &= mirrors - 1; continue; } - compose_output_action(ctx, dst->port->odp_port); - } - /* Then output the rest. */ - cur_vid = initial_vid; - for (dst = set.dsts; dst < &set.dsts[set.n]; dst++) { - if (dst->vid == initial_vid) { - continue; - } - if (dst->vid != cur_vid) { - ovs_be16 tci; - - tci = htons(dst->vid); - tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK); - if (tci) { - tci |= htons(VLAN_CFI); - } - commit_vlan_action(ctx, tci); - - cur_vid = dst->vid; - } - compose_output_action(ctx, dst->port->odp_port); - } - - dst_set_free(&set); -} - -/* Returns the effective vlan of a packet, taking into account both the - * 802.1Q header and implicitly tagged ports. A value of 0 indicates that - * the packet is untagged and -1 indicates it has an invalid header and - * should be dropped. */ -static int -flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow, - struct ofbundle *in_bundle, bool have_packet) -{ - int vlan = vlan_tci_to_vid(flow->vlan_tci); - if (vlan) { - if (in_bundle->vlan_mode == PORT_VLAN_ACCESS) { - /* Drop tagged packet on access port */ - if (have_packet) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged " - "packet received on port %s configured with " - "implicit VLAN %"PRIu16, - ofproto->up.name, vlan, - in_bundle->name, in_bundle->vlan); - } - return -1; - } else if (ofbundle_includes_vlan(in_bundle, vlan)) { - return vlan; - } else { - /* Drop packets from a VLAN not member of the trunk */ - if (have_packet) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged " - "packet received on port %s not configured for " - "trunking VLAN %d", - ofproto->up.name, vlan, in_bundle->name, vlan); + mirrors &= ~m->dup_mirrors; + if (m->out) { + output_normal(ctx, m->out, vlan); + } else if (eth_dst_may_rspan(ctx->flow.dl_dst) + && vlan != m->out_vlan) { + struct ofbundle *bundle; + + HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { + if (ofbundle_includes_vlan(bundle, m->out_vlan) + && !bundle->mirror_out) { + output_normal(ctx, bundle, m->out_vlan); + } } - return -1; - } - } else { - if (in_bundle->vlan_mode != PORT_VLAN_TRUNK) { - return in_bundle->vlan; - } else { - return ofbundle_includes_vlan(in_bundle, 0) ? 0 : -1; } } } @@ -4789,83 +4928,62 @@ update_learning_table(struct ofproto_dpif *ofproto, } } -/* Determines whether packets in 'flow' within 'br' should be forwarded or +static struct ofport_dpif * +lookup_input_bundle(struct ofproto_dpif *ofproto, uint16_t in_port, bool warn) +{ + struct ofport_dpif *ofport; + + /* Find the port and bundle for the received packet. */ + ofport = get_ofp_port(ofproto, in_port); + if (ofport && ofport->bundle) { + return ofport; + } + + /* Odd. A few possible reasons here: + * + * - We deleted a port but there are still a few packets queued up + * from it. + * + * - Someone externally added a port (e.g. "ovs-dpctl add-if") that + * we don't know about. + * + * - The ofproto client didn't configure the port as part of a bundle. + */ + if (warn) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown " + "port %"PRIu16, ofproto->up.name, in_port); + } + return NULL; +} + +/* Determines whether packets in 'flow' within 'ofproto' should be forwarded or * dropped. Returns true if they may be forwarded, false if they should be * dropped. * - * If 'have_packet' is true, it indicates that the caller is processing a - * received packet. If 'have_packet' is false, then the caller is just - * revalidating an existing flow because configuration has changed. Either - * way, 'have_packet' only affects logging (there is no point in logging errors - * during revalidation). + * 'in_port' must be the ofport_dpif that corresponds to flow->in_port. + * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull). * - * Sets '*in_portp' to the input port. This will be a null pointer if - * flow->in_port does not designate a known input port (in which case - * is_admissible() returns false). - * - * When returning true, sets '*vlanp' to the effective VLAN of the input - * packet, as returned by flow_get_vlan(). + * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as + * returned by input_vid_to_vlan(). It must be a valid VLAN for 'in_port', as + * checked by input_vid_is_valid(). * * May also add tags to '*tags', although the current implementation only does * so in one special case. */ static bool is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, - bool have_packet, - tag_type *tags, int *vlanp, struct ofbundle **in_bundlep) + struct ofport_dpif *in_port, uint16_t vlan, tag_type *tags) { - struct ofport_dpif *in_port; - struct ofbundle *in_bundle; - int vlan; - - /* Find the port and bundle for the received packet. */ - in_port = get_ofp_port(ofproto, flow->in_port); - *in_bundlep = in_bundle = in_port ? in_port->bundle : NULL; - if (!in_port || !in_bundle) { - /* No interface? Something fishy... */ - if (have_packet) { - /* Odd. A few possible reasons here: - * - * - We deleted a port but there are still a few packets queued up - * from it. - * - * - Someone externally added a port (e.g. "ovs-dpctl add-if") that - * we don't know about. - * - * - Packet arrived on the local port but the local port is not - * part of a bundle. - */ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - - VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown " - "port %"PRIu16, - ofproto->up.name, flow->in_port); - } - *vlanp = -1; - return false; - } - *vlanp = vlan = flow_get_vlan(ofproto, flow, in_bundle, have_packet); - if (vlan < 0) { - return false; - } + struct ofbundle *in_bundle = in_port->bundle; - /* Drop frames for reserved multicast addresses only if forward_bpdu - * option is absent. */ + /* Drop frames for reserved multicast addresses + * only if forward_bpdu option is absent. */ if (eth_addr_is_reserved(flow->dl_dst) && !ofproto->up.forward_bpdu) { return false; } - /* Drop frames on bundles reserved for mirroring. */ - if (in_bundle->mirror_out) { - if (have_packet) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " - "%s, which is reserved exclusively for mirroring", - ofproto->up.name, in_bundle->name); - } - return false; - } - if (in_bundle->bond) { struct mac_entry *mac; @@ -4894,18 +5012,60 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, static void xlate_normal(struct action_xlate_ctx *ctx) { + mirror_mask_t dst_mirrors = 0; + struct ofport_dpif *in_port; struct ofbundle *in_bundle; struct ofbundle *out_bundle; struct mac_entry *mac; - int vlan; + uint16_t vlan; + uint16_t vid; ctx->has_normal = true; - /* Check whether we should drop packets in this flow. */ - if (!is_admissible(ctx->ofproto, &ctx->flow, ctx->packet != NULL, - &ctx->tags, &vlan, &in_bundle)) { - out_bundle = NULL; - goto done; + /* Obtain in_port from ctx->flow.in_port. + * + * lookup_input_bundle() also ensures that in_port belongs to a bundle. */ + in_port = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port, + ctx->packet != NULL); + if (!in_port) { + return; + } + in_bundle = in_port->bundle; + + /* Drop malformed frames. */ + if (ctx->flow.dl_type == htons(ETH_TYPE_VLAN) && + !(ctx->flow.vlan_tci & htons(VLAN_CFI))) { + if (ctx->packet != NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial " + "VLAN tag received on port %s", + ctx->ofproto->up.name, in_bundle->name); + } + return; + } + + /* Drop frames on bundles reserved for mirroring. */ + if (in_bundle->mirror_out) { + if (ctx->packet != NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " + "%s, which is reserved exclusively for mirroring", + ctx->ofproto->up.name, in_bundle->name); + } + return; + } + + /* Check VLAN. */ + vid = vlan_tci_to_vid(ctx->flow.vlan_tci); + if (!input_vid_is_valid(vid, in_bundle, ctx->packet != NULL)) { + return; + } + vlan = input_vid_to_vlan(in_bundle, vid); + + /* Check other admissibility requirements. */ + if (!is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) { + output_mirrors(ctx, vlan, in_bundle, 0); + return; } /* Learn source MAC. */ @@ -4931,14 +5091,10 @@ xlate_normal(struct action_xlate_ctx *ctx) } /* Don't send packets out their input bundles. */ - if (in_bundle == out_bundle) { - out_bundle = NULL; - } - -done: - if (in_bundle) { - compose_actions(ctx, vlan, in_bundle, out_bundle); + if (in_bundle != out_bundle) { + dst_mirrors = compose_dsts(ctx, vlan, in_bundle, out_bundle); } + output_mirrors(ctx, vlan, in_bundle, dst_mirrors); } /* Optimized flow revalidation. @@ -5100,6 +5256,26 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet, } return error; } + +/* NetFlow. */ + +static int +set_netflow(struct ofproto *ofproto_, + const struct netflow_options *netflow_options) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + if (netflow_options) { + if (!ofproto->netflow) { + ofproto->netflow = netflow_create(); + } + return netflow_set_options(ofproto->netflow, netflow_options); + } else { + netflow_destroy(ofproto->netflow); + ofproto->netflow = NULL; + return 0; + } +} static void get_netflow_ids(const struct ofproto *ofproto_, @@ -5109,6 +5285,42 @@ get_netflow_ids(const struct ofproto *ofproto_, dpif_get_netflow_ids(ofproto->dpif, engine_type, engine_id); } + +static void +send_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet) +{ + if (!facet_is_controller_flow(facet) && + netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) { + struct subfacet *subfacet; + struct ofexpired expired; + + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + if (subfacet->installed) { + struct dpif_flow_stats stats; + + subfacet_install(ofproto, subfacet, facet->actions, + facet->actions_len, &stats); + subfacet_update_stats(ofproto, subfacet, &stats); + } + } + + expired.flow = facet->flow; + expired.packet_count = facet->packet_count; + expired.byte_count = facet->byte_count; + expired.used = facet->used; + netflow_expire(ofproto->netflow, &facet->nf_flow, &expired); + } +} + +static void +send_netflow_active_timeouts(struct ofproto_dpif *ofproto) +{ + struct facet *facet; + + HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { + send_active_timeout(ofproto, facet); + } +} static struct ofproto_dpif * ofproto_dpif_lookup(const char *name) @@ -5258,7 +5470,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, /* Convert string to datapath key. */ ofpbuf_init(&odp_key, 0); - error = odp_flow_key_from_string(arg1, &odp_key); + error = odp_flow_key_from_string(arg1, NULL, &odp_key); if (error) { unixctl_command_reply(conn, 501, "Bad flow syntax"); goto exit; @@ -5390,7 +5602,7 @@ ofproto_dpif_unixctl_init(void) unixctl_command_register("fdb/flush", "bridge", ofproto_unixctl_fdb_flush, NULL); unixctl_command_register("fdb/show", "bridge", ofproto_unixctl_fdb_show, - NULL); + NULL); unixctl_command_register("ofproto/clog", "", ofproto_dpif_clog, NULL); unixctl_command_register("ofproto/unclog", "", ofproto_dpif_unclog, NULL); } @@ -5443,6 +5655,7 @@ const struct ofproto_class ofproto_dpif_class = { get_stp_status, set_stp_port, get_stp_port_status, + set_queues, bundle_set, bundle_remove, mirror_set,