X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=ofproto%2Fofproto-dpif.c;h=eabce4e07213225838120debc28d7e01d19f19c9;hb=e2a6ca36ca0ebd859f87bf135b90395c53214f28;hp=241dbd3a861ad990c63d1dd09700d9d8714d28ca;hpb=e84173dc0c6df7b1816514c3c8702bcec81fc45b;p=openvswitch diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 241dbd3a..eabce4e0 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -134,12 +134,17 @@ struct ofmirror { struct ofbundle *out; /* Output port or NULL. */ int out_vlan; /* Output VLAN or -1. */ mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */ + + /* Counters. */ + int64_t packet_count; /* Number of packets sent. */ + int64_t byte_count; /* Number of bytes sent. */ }; static void mirror_destroy(struct ofmirror *); +static void update_mirror_stats(struct ofproto_dpif *ofproto, + mirror_mask_t mirrors, + uint64_t packets, uint64_t bytes); -/* A group of one or more OpenFlow ports. */ -#define OFBUNDLE_FLOOD ((struct ofbundle *) 1) struct ofbundle { struct ofproto_dpif *ofproto; /* Owning ofproto. */ struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */ @@ -171,6 +176,8 @@ static void bundle_destroy(struct ofbundle *); static void bundle_del_port(struct ofport_dpif *); static void bundle_run(struct ofbundle *); static void bundle_wait(struct ofbundle *); +static struct ofport_dpif *lookup_input_bundle(struct ofproto_dpif *, + uint16_t in_port, bool warn); static void stp_run(struct ofproto_dpif *ofproto); static void stp_wait(struct ofproto_dpif *ofproto); @@ -213,13 +220,14 @@ struct action_xlate_ctx { bool has_learn; /* Actions include NXAST_LEARN? */ bool has_normal; /* Actions output to OFPP_NORMAL? */ uint16_t nf_output_iface; /* Output interface index for NetFlow. */ + mirror_mask_t mirrors; /* Bitmap of associated mirrors. */ /* xlate_actions() initializes and uses these members, but the client has no * reason to look at them. */ int recurse; /* Recursion level, via xlate_table_action. */ struct flow base_flow; /* Flow at the last commit. */ - uint32_t original_priority; /* Priority when packet arrived. */ + uint32_t orig_skb_priority; /* Priority when packet arrived. */ uint8_t table_id; /* OpenFlow table ID where flow was found. */ uint32_t sflow_n_outputs; /* Number of output ports. */ uint16_t sflow_odp_port; /* Output port for composing sFlow action. */ @@ -276,9 +284,9 @@ struct facet { uint64_t byte_count; /* Number of bytes received. */ /* Resubmit statistics. */ - uint64_t rs_packet_count; /* Packets pushed to resubmit children. */ - uint64_t rs_byte_count; /* Bytes pushed to resubmit children. */ - long long int rs_used; /* Used time pushed to resubmit children. */ + uint64_t prev_packet_count; /* Number of packets from last stats push. */ + uint64_t prev_byte_count; /* Number of bytes from last stats push. */ + long long int prev_used; /* Used time from last stats push. */ /* Accounting. */ uint64_t accounted_bytes; /* Bytes processed by facet_account(). */ @@ -294,6 +302,7 @@ struct facet { bool has_learn; /* Actions include NXAST_LEARN? */ bool has_normal; /* Actions output to OFPP_NORMAL? */ tag_type tags; /* Tags that would require revalidation. */ + mirror_mask_t mirrors; /* Bitmap of dependent mirrors. */ }; static struct facet *facet_create(struct rule_dpif *, const struct flow *); @@ -309,7 +318,7 @@ static bool execute_controller_action(struct ofproto_dpif *, const struct flow *, const struct nlattr *odp_actions, size_t actions_len, - struct ofpbuf *packet); + struct ofpbuf *packet, bool clone); static void facet_flush_stats(struct ofproto_dpif *, struct facet *); @@ -364,8 +373,7 @@ static struct subfacet *subfacet_create(struct ofproto_dpif *, struct facet *, const struct nlattr *key, size_t key_len, ovs_be16 initial_tci); static struct subfacet *subfacet_find(struct ofproto_dpif *, - const struct nlattr *key, size_t key_len, - const struct flow *); + const struct nlattr *key, size_t key_len); static void subfacet_destroy(struct ofproto_dpif *, struct subfacet *); static void subfacet_destroy__(struct ofproto_dpif *, struct subfacet *); static void subfacet_reset_dp_stats(struct subfacet *, @@ -392,11 +400,21 @@ struct ofport_dpif { uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */ bool may_enable; /* May be enabled in bonds. */ + /* Spanning tree. */ struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ long long int stp_state_entered; struct hmap priorities; /* Map of attached 'priority_to_dscp's. */ + + /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) + * + * This is deprecated. It is only for compatibility with broken device + * drivers in old versions of Linux that do not properly support VLANs when + * VLAN devices are not used. When broken device drivers are no longer in + * widespread use, we will delete these interfaces. */ + uint16_t realdev_ofp_port; + int vlandev_vid; }; /* Node in 'ofport_dpif''s 'priorities' map. Used to maintain a map from @@ -409,6 +427,27 @@ struct priority_to_dscp { uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */ }; +/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) + * + * This is deprecated. It is only for compatibility with broken device drivers + * in old versions of Linux that do not properly support VLANs when VLAN + * devices are not used. When broken device drivers are no longer in + * widespread use, we will delete these interfaces. */ +struct vlan_splinter { + struct hmap_node realdev_vid_node; + struct hmap_node vlandev_node; + uint16_t realdev_ofp_port; + uint16_t vlandev_ofp_port; + int vid; +}; + +static uint32_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, + uint32_t realdev, ovs_be16 vlan_tci); +static uint16_t vsp_vlandev_to_realdev(const struct ofproto_dpif *, + uint16_t vlandev, int *vid); +static void vsp_remove(struct ofport_dpif *); +static void vsp_add(struct ofport_dpif *, uint16_t realdev_ofp_port, int vid); + static struct ofport_dpif * ofport_dpif_cast(const struct ofport *ofport) { @@ -438,6 +477,7 @@ struct table_dpif { }; struct ofproto_dpif { + struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ struct ofproto up; struct dpif *dpif; int max_ports; @@ -469,16 +509,25 @@ struct ofproto_dpif { struct list completions; bool has_bundle_action; /* True when the first bundle action appears. */ + struct netdev_stats stats; /* To account packets generated and consumed in + * userspace. */ /* Spanning tree. */ struct stp *stp; long long int stp_last_tick; + + /* VLAN splinters. */ + struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */ + struct hmap vlandev_map; /* vlandev -> (realdev,vid). */ }; /* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only * for debugging the asynchronous flow_mod implementation.) */ static bool clogged; +/* All existing ofproto_dpif instances, indexed by ->up.name. */ +static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs); + static void ofproto_dpif_unixctl_init(void); static struct ofproto_dpif * @@ -499,10 +548,7 @@ static void update_learning_table(struct ofproto_dpif *, struct ofbundle *); /* Upcalls. */ #define FLOW_MISS_MAX_BATCH 50 - -static void handle_upcall(struct ofproto_dpif *, struct dpif_upcall *); -static void handle_miss_upcalls(struct ofproto_dpif *, - struct dpif_upcall *, size_t n); +static int handle_upcalls(struct ofproto_dpif *, unsigned int max_batch); /* Flow expiration. */ static int expire(struct ofproto_dpif *); @@ -511,11 +557,12 @@ static int expire(struct ofproto_dpif *); static void send_netflow_active_timeouts(struct ofproto_dpif *); /* Utilities. */ -static int send_packet(const struct ofport_dpif *, - const struct ofpbuf *packet); +static int send_packet(const struct ofport_dpif *, struct ofpbuf *packet); static size_t compose_sflow_action(const struct ofproto_dpif *, struct ofpbuf *odp_actions, const struct flow *, uint32_t odp_port); +static void add_mirror_actions(struct action_xlate_ctx *ctx, + const struct flow *flow); /* Global variables. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -623,7 +670,14 @@ construct(struct ofproto *ofproto_, int *n_tablesp) ofproto->has_bundle_action = false; + hmap_init(&ofproto->vlandev_map); + hmap_init(&ofproto->realdev_vid_map); + + hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node, + hash_string(ofproto->up.name, 0)); + *n_tablesp = N_TABLES; + memset(&ofproto->stats, 0, sizeof ofproto->stats); return 0; } @@ -647,6 +701,7 @@ destruct(struct ofproto *ofproto_) struct classifier *table; int i; + hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); complete_operations(ofproto); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { @@ -670,47 +725,56 @@ destruct(struct ofproto *ofproto_) hmap_destroy(&ofproto->facets); hmap_destroy(&ofproto->subfacets); + hmap_destroy(&ofproto->vlandev_map); + hmap_destroy(&ofproto->realdev_vid_map); + dpif_close(ofproto->dpif); } +static int +run_fast(struct ofproto *ofproto_) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + unsigned int work; + + /* Handle one or more batches of upcalls, until there's nothing left to do + * or until we do a fixed total amount of work. + * + * We do work in batches because it can be much cheaper to set up a number + * of flows and fire off their patches all at once. We do multiple batches + * because in some cases handling a packet can cause another packet to be + * queued almost immediately as part of the return flow. Both + * optimizations can make major improvements on some benchmarks and + * presumably for real traffic as well. */ + work = 0; + while (work < FLOW_MISS_MAX_BATCH) { + int retval = handle_upcalls(ofproto, FLOW_MISS_MAX_BATCH - work); + if (retval <= 0) { + return -retval; + } + work += retval; + } + return 0; +} + static int run(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct dpif_upcall misses[FLOW_MISS_MAX_BATCH]; struct ofport_dpif *ofport; struct ofbundle *bundle; - size_t n_misses; - int i; + int error; if (!clogged) { complete_operations(ofproto); } dpif_run(ofproto->dpif); - n_misses = 0; - for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) { - struct dpif_upcall *upcall = &misses[n_misses]; - int error; - - error = dpif_recv(ofproto->dpif, upcall); - if (error) { - if (error == ENODEV && n_misses == 0) { - return error; - } - break; - } - - if (upcall->type == DPIF_UC_MISS) { - /* Handle it later. */ - n_misses++; - } else { - handle_upcall(ofproto, upcall); - } + error = run_fast(ofproto_); + if (error) { + return error; } - handle_miss_upcalls(ofproto, misses, n_misses); - if (timer_expired(&ofproto->next_expiration)) { int delay = expire(ofproto); timer_set_duration(&ofproto->next_expiration, delay); @@ -881,10 +945,11 @@ port_construct(struct ofport *port_) port->stp_port = NULL; port->stp_state = STP_DISABLED; hmap_init(&port->priorities); + port->realdev_ofp_port = 0; + port->vlandev_vid = 0; if (ofproto->sflow) { - dpif_sflow_add_port(ofproto->sflow, port->odp_port, - netdev_get_name(port->up.netdev)); + dpif_sflow_add_port(ofproto->sflow, port_); } return 0; @@ -947,8 +1012,7 @@ set_sflow(struct ofproto *ofproto_, ds = ofproto->sflow = dpif_sflow_create(ofproto->dpif); HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { - dpif_sflow_add_port(ds, ofport->odp_port, - netdev_get_name(ofport->up.netdev)); + dpif_sflow_add_port(ds, &ofport->up); } ofproto->need_revalidate = true; } @@ -1333,10 +1397,17 @@ set_queues(struct ofport *ofport_, /* Bundles. */ -/* Expires all MAC learning entries associated with 'port' and forces ofproto - * to revalidate every flow. */ +/* Expires all MAC learning entries associated with 'bundle' and forces its + * ofproto to revalidate every flow. + * + * Normally MAC learning entries are removed only from the ofproto associated + * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries + * are removed from every ofproto. When patch ports and SLB bonds are in use + * and a VM migration happens and the gratuitous ARPs are somehow lost, this + * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate + * with the host from which it migrated. */ static void -bundle_flush_macs(struct ofbundle *bundle) +bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) { struct ofproto_dpif *ofproto = bundle->ofproto; struct mac_learning *ml = ofproto->ml; @@ -1345,6 +1416,23 @@ bundle_flush_macs(struct ofbundle *bundle) ofproto->need_revalidate = true; LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { if (mac->port.p == bundle) { + if (all_ofprotos) { + struct ofproto_dpif *o; + + HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + if (o != ofproto) { + struct mac_entry *e; + + e = mac_learning_lookup(o->ml, mac->mac, mac->vlan, + NULL); + if (e) { + tag_set_add(&o->revalidate_set, e->tag); + mac_learning_expire(o->ml, e); + } + } + } + } + mac_learning_expire(ml, mac); } } @@ -1478,7 +1566,7 @@ bundle_destroy(struct ofbundle *bundle) bundle_del_port(port); } - bundle_flush_macs(bundle); + bundle_flush_macs(bundle, true); hmap_remove(&ofproto->bundles, &bundle->hmap_node); free(bundle->name); free(bundle->trunks); @@ -1666,7 +1754,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, /* If we changed something that would affect MAC learning, un-learn * everything on this port and force flow revalidation. */ if (need_flush) { - bundle_flush_macs(bundle); + bundle_flush_macs(bundle, false); } return 0; @@ -1992,6 +2080,24 @@ mirror_destroy(struct ofmirror *mirror) mirror_update_dups(ofproto); } +static int +mirror_get_stats(struct ofproto *ofproto_, void *aux, + uint64_t *packets, uint64_t *bytes) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct ofmirror *mirror = mirror_lookup(ofproto, aux); + + if (!mirror) { + *packets = *bytes = UINT64_MAX; + return 0; + } + + *packets = mirror->packet_count; + *bytes = mirror->byte_count; + + return 0; +} + static int set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans) { @@ -2136,6 +2242,63 @@ port_del(struct ofproto *ofproto_, uint16_t ofp_port) return error; } +static int +port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + int error; + + error = netdev_get_stats(ofport->up.netdev, stats); + + if (!error && ofport->odp_port == OVSP_LOCAL) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + + /* ofproto->stats.tx_packets represents packets that we created + * internally and sent to some port (e.g. packets sent with + * send_packet()). Account for them as if they had come from + * OFPP_LOCAL and got forwarded. */ + + if (stats->rx_packets != UINT64_MAX) { + stats->rx_packets += ofproto->stats.tx_packets; + } + + if (stats->rx_bytes != UINT64_MAX) { + stats->rx_bytes += ofproto->stats.tx_bytes; + } + + /* ofproto->stats.rx_packets represents packets that were received on + * some port and we processed internally and dropped (e.g. STP). + * Account fro them as if they had been forwarded to OFPP_LOCAL. */ + + if (stats->tx_packets != UINT64_MAX) { + stats->tx_packets += ofproto->stats.rx_packets; + } + + if (stats->tx_bytes != UINT64_MAX) { + stats->tx_bytes += ofproto->stats.rx_bytes; + } + } + + return error; +} + +/* Account packets for LOCAL port. */ +static void +ofproto_update_local_port_stats(const struct ofproto *ofproto_, + size_t tx_size, size_t rx_size) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + if (rx_size) { + ofproto->stats.rx_packets++; + ofproto->stats.rx_bytes += rx_size; + } + if (tx_size) { + ofproto->stats.tx_packets++; + ofproto->stats.tx_bytes += tx_size; + } +} + struct port_dump_state { struct dpif_port_dump dump; bool done; @@ -2380,6 +2543,8 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, miss->initial_tci); LIST_FOR_EACH_SAFE (packet, next_packet, list_node, &miss->packets) { + struct dpif_flow_stats stats; + list_remove(&packet->list_node); ofproto->n_matches++; @@ -2400,12 +2565,27 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, if (!facet->may_install || !subfacet->actions) { subfacet_make_actions(ofproto, subfacet, packet); } + + /* Credit statistics to subfacet for this packet. We must do this now + * because execute_controller_action() below may destroy 'packet'. */ + dpif_flow_stats_extract(&facet->flow, packet, &stats); + subfacet_update_stats(ofproto, subfacet, &stats); + if (!execute_controller_action(ofproto, &facet->flow, subfacet->actions, - subfacet->actions_len, packet)) { + subfacet->actions_len, packet, true)) { struct flow_miss_op *op = &ops[(*n_ops)++]; struct dpif_execute *execute = &op->dpif_op.execute; + if (flow->vlan_tci != subfacet->initial_tci) { + /* This packet was received on a VLAN splinter port. We added + * a VLAN to the packet to make the packet resemble the flow, + * but the actions were composed assuming that the packet + * contained no VLAN. So, we must remove the VLAN header from + * the packet before trying to execute the actions. */ + eth_pop_vlan(packet); + } + op->subfacet = subfacet; execute->type = DPIF_OP_EXECUTE; execute->key = miss->key; @@ -2434,12 +2614,31 @@ handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, } } +/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of + * OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns + * an ODP_FIT_* value that indicates how well 'key' fits our expectations for + * what a flow key should contain. + * + * This function also includes some logic to help make VLAN splinters + * transparent to the rest of the upcall processing logic. In particular, if + * the extracted in_port is a VLAN splinter port, it replaces flow->in_port by + * the "real" port, sets flow->vlan_tci correctly for the VLAN of the VLAN + * splinter port, and pushes a VLAN header onto 'packet' (if it is nonnull). + * + * Sets '*initial_tci' to the VLAN TCI with which the packet was really + * received, that is, the actual VLAN TCI extracted by odp_flow_key_to_flow(). + * (This differs from the value returned in flow->vlan_tci only for packets + * received on VLAN splinters.) + */ static enum odp_key_fitness -ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto OVS_UNUSED, +ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto, const struct nlattr *key, size_t key_len, - struct flow *flow, ovs_be16 *initial_tci) + struct flow *flow, ovs_be16 *initial_tci, + struct ofpbuf *packet) { enum odp_key_fitness fitness; + uint16_t realdev; + int vid; fitness = odp_flow_key_to_flow(key, key_len, flow); if (fitness == ODP_FIT_ERROR) { @@ -2447,6 +2646,36 @@ ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto OVS_UNUSED, } *initial_tci = flow->vlan_tci; + realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port, &vid); + if (realdev) { + /* Cause the flow to be processed as if it came in on the real device + * with the VLAN device's VLAN ID. */ + flow->in_port = realdev; + flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI); + if (packet) { + /* Make the packet resemble the flow, so that it gets sent to an + * OpenFlow controller properly, so that it looks correct for + * sFlow, and so that flow_extract() will get the correct vlan_tci + * if it is called on 'packet'. + * + * The allocated space inside 'packet' probably also contains + * 'key', that is, both 'packet' and 'key' are probably part of a + * struct dpif_upcall (see the large comment on that structure + * definition), so pushing data on 'packet' is in general not a + * good idea since it could overwrite 'key' or free it as a side + * effect. However, it's OK in this special case because we know + * that 'packet' is inside a Netlink attribute: pushing 4 bytes + * will just overwrite the 4-byte "struct nlattr", which is fine + * since we don't need that header anymore. */ + eth_push_vlan(packet, flow->vlan_tci); + } + + /* Let the caller know that we can't reproduce 'key' from 'flow'. */ + if (fitness == ODP_FIT_PERFECT) { + fitness = ODP_FIT_TOO_MUCH; + } + } + return fitness; } @@ -2482,15 +2711,19 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, * then set 'flow''s header pointers. */ fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key, upcall->key_len, - &flow, &initial_tci); + &flow, &initial_tci, + upcall->packet); if (fitness == ODP_FIT_ERROR) { + ofpbuf_delete(upcall->packet); continue; } - flow_extract(upcall->packet, flow.priority, flow.tun_id, + flow_extract(upcall->packet, flow.skb_priority, flow.tun_id, flow.in_port, &flow); /* Handle 802.1ag, LACP, and STP specially. */ if (process_special(ofproto, &flow, upcall->packet)) { + ofproto_update_local_port_stats(&ofproto->up, + 0, upcall->packet->size); ofpbuf_delete(upcall->packet); ofproto->n_matches++; continue; @@ -2558,8 +2791,9 @@ handle_userspace_upcall(struct ofproto_dpif *ofproto, fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key, upcall->key_len, &flow, - &initial_tci); + &initial_tci, upcall->packet); if (fitness == ODP_FIT_ERROR) { + ofpbuf_delete(upcall->packet); return; } @@ -2575,26 +2809,50 @@ handle_userspace_upcall(struct ofproto_dpif *ofproto, &flow, false); } else { VLOG_WARN_RL(&rl, "invalid user cookie : 0x%"PRIx64, upcall->userdata); + ofpbuf_delete(upcall->packet); } } -static void -handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +static int +handle_upcalls(struct ofproto_dpif *ofproto, unsigned int max_batch) { - switch (upcall->type) { - case DPIF_UC_ACTION: - handle_userspace_upcall(ofproto, upcall); - break; + struct dpif_upcall misses[FLOW_MISS_MAX_BATCH]; + int n_misses; + int i; - case DPIF_UC_MISS: - /* The caller handles these. */ - NOT_REACHED(); + assert (max_batch <= FLOW_MISS_MAX_BATCH); - case DPIF_N_UC_TYPES: - default: - VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, upcall->type); - break; + n_misses = 0; + for (i = 0; i < max_batch; i++) { + struct dpif_upcall *upcall = &misses[n_misses]; + int error; + + error = dpif_recv(ofproto->dpif, upcall); + if (error) { + break; + } + + switch (upcall->type) { + case DPIF_UC_ACTION: + handle_userspace_upcall(ofproto, upcall); + break; + + case DPIF_UC_MISS: + /* Handle it later. */ + n_misses++; + break; + + case DPIF_N_UC_TYPES: + default: + VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, + upcall->type); + break; + } } + + handle_miss_upcalls(ofproto, misses, n_misses); + + return i; } /* Flow expiration. */ @@ -2670,16 +2928,9 @@ update_stats(struct ofproto_dpif *p) dpif_flow_dump_start(&dump, p->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) { - enum odp_key_fitness fitness; struct subfacet *subfacet; - struct flow flow; - - fitness = odp_flow_key_to_flow(key, key_len, &flow); - if (fitness == ODP_FIT_ERROR) { - continue; - } - subfacet = subfacet_find(p, key, key_len, &flow); + subfacet = subfacet_find(p, key, key_len); if (subfacet && subfacet->installed) { struct facet *facet = subfacet->facet; @@ -2703,9 +2954,18 @@ update_stats(struct ofproto_dpif *p) facet_account(p, facet); facet_push_stats(facet); } else { + if (!VLOG_DROP_WARN(&rl)) { + struct ds s; + + ds_init(&s); + odp_flow_key_format(key, key_len, &s); + VLOG_WARN("unexpected flow from datapath %s", ds_cstr(&s)); + ds_destroy(&s); + } + + COVERAGE_INC(facet_unexpected); /* There's a flow in the datapath that we know nothing about, or a * flow that shouldn't be installed but was anyway. Delete it. */ - COVERAGE_INC(facet_unexpected); dpif_flow_del(p->dpif, key, key_len, NULL); } } @@ -2883,11 +3143,17 @@ facet_free(struct facet *facet) free(facet); } +/* If the 'actions_len' bytes of actions in 'odp_actions' are just a single + * OVS_ACTION_ATTR_USERSPACE action, executes it internally and returns true. + * Otherwise, returns false without doing anything. + * + * If 'clone' is true, the caller always retains ownership of 'packet'. + * Otherwise, ownership is transferred to this function if it returns true. */ static bool execute_controller_action(struct ofproto_dpif *ofproto, const struct flow *flow, const struct nlattr *odp_actions, size_t actions_len, - struct ofpbuf *packet) + struct ofpbuf *packet, bool clone) { if (actions_len && odp_actions->nla_type == OVS_ACTION_ATTR_USERSPACE @@ -2903,7 +3169,7 @@ execute_controller_action(struct ofproto_dpif *ofproto, nla = nl_attr_find_nested(odp_actions, OVS_USERSPACE_ATTR_USERDATA); send_packet_in_action(ofproto, packet, nl_attr_get_u64(nla), flow, - false); + clone); return true; } else { return false; @@ -2924,7 +3190,7 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, int error; if (execute_controller_action(ofproto, flow, odp_actions, actions_len, - packet)) { + packet, false)) { return true; } @@ -3214,6 +3480,7 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) facet->may_install = ctx.may_set_up_flow; facet->has_learn = ctx.has_learn; facet->has_normal = ctx.has_normal; + facet->mirrors = ctx.mirrors; if (new_actions) { i = 0; LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { @@ -3232,7 +3499,7 @@ facet_revalidate(struct ofproto_dpif *ofproto, struct facet *facet) list_push_back(&new_rule->facets, &facet->list_node); facet->rule = new_rule; facet->used = new_rule->up.created; - facet->rs_used = facet->used; + facet->prev_used = facet->used; } return true; @@ -3258,30 +3525,33 @@ facet_reset_counters(struct facet *facet) { facet->packet_count = 0; facet->byte_count = 0; - facet->rs_packet_count = 0; - facet->rs_byte_count = 0; + facet->prev_packet_count = 0; + facet->prev_byte_count = 0; facet->accounted_bytes = 0; } static void facet_push_stats(struct facet *facet) { - uint64_t rs_packets, rs_bytes; + uint64_t new_packets, new_bytes; - assert(facet->packet_count >= facet->rs_packet_count); - assert(facet->byte_count >= facet->rs_byte_count); - assert(facet->used >= facet->rs_used); + assert(facet->packet_count >= facet->prev_packet_count); + assert(facet->byte_count >= facet->prev_byte_count); + assert(facet->used >= facet->prev_used); - rs_packets = facet->packet_count - facet->rs_packet_count; - rs_bytes = facet->byte_count - facet->rs_byte_count; + new_packets = facet->packet_count - facet->prev_packet_count; + new_bytes = facet->byte_count - facet->prev_byte_count; - if (rs_packets || rs_bytes || facet->used > facet->rs_used) { - facet->rs_packet_count = facet->packet_count; - facet->rs_byte_count = facet->byte_count; - facet->rs_used = facet->used; + if (new_packets || new_bytes || facet->used > facet->prev_used) { + facet->prev_packet_count = facet->packet_count; + facet->prev_byte_count = facet->byte_count; + facet->prev_used = facet->used; flow_push_stats(facet->rule, &facet->flow, - rs_packets, rs_bytes, facet->used); + new_packets, new_bytes, facet->used); + + update_mirror_stats(ofproto_dpif_cast(facet->rule->up.ofproto), + facet->mirrors, new_packets, new_bytes); } } @@ -3305,7 +3575,7 @@ push_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) } /* Pushes flow statistics to the rules which 'flow' resubmits into given - * 'rule''s actions. */ + * 'rule''s actions and mirrors. */ static void flow_push_stats(const struct rule_dpif *rule, const struct flow *flow, uint64_t packets, uint64_t bytes, @@ -3392,12 +3662,18 @@ subfacet_create(struct ofproto_dpif *ofproto, struct facet *facet, * 'flow'. Returns the subfacet if one exists, otherwise NULL. */ static struct subfacet * subfacet_find(struct ofproto_dpif *ofproto, - const struct nlattr *key, size_t key_len, - const struct flow *flow) + const struct nlattr *key, size_t key_len) { uint32_t key_hash = odp_flow_key_hash(key, key_len); + enum odp_key_fitness fitness; + struct flow flow; + + fitness = odp_flow_key_to_flow(key, key_len, &flow); + if (fitness == ODP_FIT_ERROR) { + return NULL; + } - return subfacet_find__(ofproto, key, key_len, key_hash, flow); + return subfacet_find__(ofproto, key, key_len, key_hash, &flow); } /* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from @@ -3459,6 +3735,7 @@ subfacet_make_actions(struct ofproto_dpif *p, struct subfacet *subfacet, facet->has_learn = ctx.has_learn; facet->has_normal = ctx.has_normal; facet->nf_flow.output_iface = ctx.nf_output_iface; + facet->mirrors = ctx.mirrors; if (subfacet->actions_len != odp_actions->size || memcmp(subfacet->actions, odp_actions->data, odp_actions->size)) { @@ -3762,18 +4039,26 @@ rule_modify_actions(struct rule *rule_) } /* Sends 'packet' out 'ofport'. + * May modify 'packet'. * Returns 0 if successful, otherwise a positive errno value. */ static int -send_packet(const struct ofport_dpif *ofport, const struct ofpbuf *packet) +send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) { const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); - uint16_t odp_port = ofport->odp_port; struct ofpbuf key, odp_actions; struct odputil_keybuf keybuf; + uint16_t odp_port; struct flow flow; int error; flow_extract((struct ofpbuf *) packet, 0, 0, 0, &flow); + odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port, + flow.vlan_tci); + if (odp_port != ofport->odp_port) { + eth_pop_vlan(packet); + flow.vlan_tci = htons(0); + } + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, &flow); @@ -3791,6 +4076,7 @@ send_packet(const struct ofport_dpif *ofport, const struct ofpbuf *packet) VLOG_WARN_RL(&rl, "%s: failed to send packet on port %"PRIu32" (%s)", ofproto->up.name, odp_port, strerror(error)); } + ofproto_update_local_port_stats(ofport->up.ofproto, packet->size, 0); return error; } @@ -3901,172 +4187,15 @@ fix_sflow_action(struct action_xlate_ctx *ctx) cookie->vlan_tci = base->vlan_tci; } -static void -commit_set_action(struct ofpbuf *odp_actions, enum ovs_key_attr key_type, - const void *key, size_t key_size) -{ - size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); - nl_msg_put_unspec(odp_actions, key_type, key, key_size); - nl_msg_end_nested(odp_actions, offset); -} - -static void -commit_set_tun_id_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - if (base->tun_id == flow->tun_id) { - return; - } - base->tun_id = flow->tun_id; - - commit_set_action(odp_actions, OVS_KEY_ATTR_TUN_ID, - &base->tun_id, sizeof(base->tun_id)); -} - -static void -commit_set_ether_addr_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - struct ovs_key_ethernet eth_key; - - if (eth_addr_equals(base->dl_src, flow->dl_src) && - eth_addr_equals(base->dl_dst, flow->dl_dst)) { - return; - } - - memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); - memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); - - memcpy(eth_key.eth_src, base->dl_src, ETH_ADDR_LEN); - memcpy(eth_key.eth_dst, base->dl_dst, ETH_ADDR_LEN); - - commit_set_action(odp_actions, OVS_KEY_ATTR_ETHERNET, - ð_key, sizeof(eth_key)); -} - -static void -commit_vlan_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - if (base->vlan_tci == flow->vlan_tci) { - return; - } - - if (base->vlan_tci & htons(VLAN_CFI)) { - nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); - } - - if (flow->vlan_tci & htons(VLAN_CFI)) { - struct ovs_action_push_vlan vlan; - - vlan.vlan_tpid = htons(ETH_TYPE_VLAN); - vlan.vlan_tci = flow->vlan_tci; - nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, - &vlan, sizeof vlan); - } - base->vlan_tci = flow->vlan_tci; -} - -static void -commit_set_nw_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - struct ovs_key_ipv4 ipv4_key; - - if (base->dl_type != htons(ETH_TYPE_IP) || - !base->nw_src || !base->nw_dst) { - return; - } - - if (base->nw_src == flow->nw_src && - base->nw_dst == flow->nw_dst && - base->nw_tos == flow->nw_tos && - base->nw_ttl == flow->nw_ttl && - base->nw_frag == flow->nw_frag) { - return; - } - - ipv4_key.ipv4_src = base->nw_src = flow->nw_src; - ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; - ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos; - ipv4_key.ipv4_ttl = base->nw_ttl = flow->nw_ttl; - ipv4_key.ipv4_proto = base->nw_proto; - ipv4_key.ipv4_frag = (base->nw_frag == 0 ? OVS_FRAG_TYPE_NONE - : base->nw_frag == FLOW_NW_FRAG_ANY - ? OVS_FRAG_TYPE_FIRST : OVS_FRAG_TYPE_LATER); - - commit_set_action(odp_actions, OVS_KEY_ATTR_IPV4, - &ipv4_key, sizeof(ipv4_key)); -} - -static void -commit_set_port_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - if (!base->tp_src || !base->tp_dst) { - return; - } - - if (base->tp_src == flow->tp_src && - base->tp_dst == flow->tp_dst) { - return; - } - - if (flow->nw_proto == IPPROTO_TCP) { - struct ovs_key_tcp port_key; - - port_key.tcp_src = base->tp_src = flow->tp_src; - port_key.tcp_dst = base->tp_dst = flow->tp_dst; - - commit_set_action(odp_actions, OVS_KEY_ATTR_TCP, - &port_key, sizeof(port_key)); - - } else if (flow->nw_proto == IPPROTO_UDP) { - struct ovs_key_udp port_key; - - port_key.udp_src = base->tp_src = flow->tp_src; - port_key.udp_dst = base->tp_dst = flow->tp_dst; - - commit_set_action(odp_actions, OVS_KEY_ATTR_UDP, - &port_key, sizeof(port_key)); - } -} - -static void -commit_set_priority_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions) -{ - if (base->priority == flow->priority) { - return; - } - base->priority = flow->priority; - - commit_set_action(odp_actions, OVS_KEY_ATTR_PRIORITY, - &base->priority, sizeof(base->priority)); -} - -static void -commit_odp_actions(struct action_xlate_ctx *ctx) -{ - const struct flow *flow = &ctx->flow; - struct flow *base = &ctx->base_flow; - struct ofpbuf *odp_actions = ctx->odp_actions; - - commit_set_tun_id_action(flow, base, odp_actions); - commit_set_ether_addr_action(flow, base, odp_actions); - commit_vlan_action(flow, base, odp_actions); - commit_set_nw_action(flow, base, odp_actions); - commit_set_port_action(flow, base, odp_actions); - commit_set_priority_action(flow, base, odp_actions); -} - static void compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, bool check_stp) { const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port); uint16_t odp_port = ofp_port_to_odp_port(ofp_port); + ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci; uint8_t flow_nw_tos = ctx->flow.nw_tos; + uint16_t out_port; if (ofport) { struct priority_to_dscp *pdscp; @@ -4076,7 +4205,7 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, return; } - pdscp = get_priority(ofport, ctx->flow.priority); + pdscp = get_priority(ofport, ctx->flow.skb_priority); if (pdscp) { ctx->flow.nw_tos &= ~IP_DSCP_MASK; ctx->flow.nw_tos |= pdscp->dscp; @@ -4087,11 +4216,18 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, * later and we're pre-populating the flow table. */ } - commit_odp_actions(ctx); - nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); + out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port, + ctx->flow.vlan_tci); + if (out_port != odp_port) { + ctx->flow.vlan_tci = htons(0); + } + commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions); + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port); + ctx->sflow_odp_port = odp_port; ctx->sflow_n_outputs++; ctx->nf_output_iface = ofp_port; + ctx->flow.vlan_tci = flow_vlan_tci; ctx->flow.nw_tos = flow_nw_tos; } @@ -4174,7 +4310,6 @@ flood_packets(struct action_xlate_ctx *ctx, bool all) { struct ofport_dpif *ofport; - commit_odp_actions(ctx); HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { uint16_t ofp_port = ofport->up.ofp_port; @@ -4197,7 +4332,7 @@ compose_controller_action(struct action_xlate_ctx *ctx, int len) { struct user_action_cookie cookie; - commit_odp_actions(ctx); + commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions); cookie.type = USER_ACTION_COOKIE_CONTROLLER; cookie.data = len; cookie.n_output = 0; @@ -4299,10 +4434,10 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, } /* Add datapath actions. */ - flow_priority = ctx->flow.priority; - ctx->flow.priority = priority; + flow_priority = ctx->flow.skb_priority; + ctx->flow.skb_priority = priority; compose_output_action(ctx, ofp_port); - ctx->flow.priority = flow_priority; + ctx->flow.skb_priority = flow_priority; /* Update NetFlow output port. */ if (ctx->nf_output_iface == NF_OUT_DROP) { @@ -4327,7 +4462,7 @@ xlate_set_queue_action(struct action_xlate_ctx *ctx, return; } - ctx->flow.priority = priority; + ctx->flow.skb_priority = priority; } struct xlate_reg_state { @@ -4525,7 +4660,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, break; case OFPUTIL_NXAST_POP_QUEUE: - ctx->flow.priority = ctx->original_priority; + ctx->flow.skb_priority = ctx->orig_skb_priority; break; case OFPUTIL_NXAST_REG_MOVE: @@ -4617,6 +4752,8 @@ static struct ofpbuf * xlate_actions(struct action_xlate_ctx *ctx, const union ofp_action *in, size_t n_in) { + struct flow orig_flow = ctx->flow; + COVERAGE_INC(ofproto_dpif_xlate); ctx->odp_actions = ofpbuf_new(512); @@ -4626,8 +4763,9 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->has_learn = false; ctx->has_normal = false; ctx->nf_output_iface = NF_OUT_DROP; + ctx->mirrors = 0; ctx->recurse = 0; - ctx->original_priority = ctx->flow.priority; + ctx->orig_skb_priority = ctx->flow.skb_priority; ctx->table_id = 0; ctx->exit = false; @@ -4668,6 +4806,7 @@ xlate_actions(struct action_xlate_ctx *ctx, compose_output_action(ctx, OFPP_LOCAL); } } + add_mirror_actions(ctx, &orig_flow); fix_sflow_action(ctx); } @@ -4844,34 +4983,6 @@ ofbundle_get_a_port(const struct ofbundle *bundle) struct ofport_dpif, bundle_node); } -static mirror_mask_t -compose_dsts(struct action_xlate_ctx *ctx, uint16_t vlan, - const struct ofbundle *in_bundle, - const struct ofbundle *out_bundle) -{ - mirror_mask_t dst_mirrors = 0; - - if (out_bundle == OFBUNDLE_FLOOD) { - struct ofbundle *bundle; - - HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) { - if (bundle != in_bundle - && ofbundle_includes_vlan(bundle, vlan) - && bundle->floodable - && !bundle->mirror_out) { - output_normal(ctx, bundle, vlan); - dst_mirrors |= bundle->dst_mirrors; - } - } - ctx->nf_output_iface = NF_OUT_FLOOD; - } else if (out_bundle) { - output_normal(ctx, out_bundle, vlan); - dst_mirrors = out_bundle->dst_mirrors; - } - - return dst_mirrors; -} - static bool vlan_is_mirrored(const struct ofmirror *m, int vlan) { @@ -4920,18 +5031,70 @@ eth_dst_may_rspan(const uint8_t dst[ETH_ADDR_LEN]) } static void -output_mirrors(struct action_xlate_ctx *ctx, - uint16_t vlan, const struct ofbundle *in_bundle, - mirror_mask_t dst_mirrors) +add_mirror_actions(struct action_xlate_ctx *ctx, const struct flow *orig_flow) { struct ofproto_dpif *ofproto = ctx->ofproto; mirror_mask_t mirrors; + struct ofport_dpif *in_port; + struct ofbundle *in_bundle; + uint16_t vlan; + uint16_t vid; + const struct nlattr *a; + size_t left; + + /* Obtain in_port from orig_flow.in_port. + * + * lookup_input_bundle() also ensures that in_port belongs to a bundle. */ + in_port = lookup_input_bundle(ctx->ofproto, orig_flow->in_port, + ctx->packet != NULL); + if (!in_port) { + return; + } + in_bundle = in_port->bundle; + mirrors = in_bundle->src_mirrors; + + /* Drop frames on bundles reserved for mirroring. */ + if (in_bundle->mirror_out) { + if (ctx->packet != NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " + "%s, which is reserved exclusively for mirroring", + ctx->ofproto->up.name, in_bundle->name); + } + return; + } + + /* Check VLAN. */ + vid = vlan_tci_to_vid(orig_flow->vlan_tci); + if (!input_vid_is_valid(vid, in_bundle, ctx->packet != NULL)) { + return; + } + vlan = input_vid_to_vlan(in_bundle, vid); + + /* Look at the output ports to check for destination selections. */ + + NL_ATTR_FOR_EACH (a, left, ctx->odp_actions->data, + ctx->odp_actions->size) { + enum ovs_action_attr type = nl_attr_type(a); + struct ofport_dpif *ofport; + + if (type != OVS_ACTION_ATTR_OUTPUT) { + continue; + } + + ofport = get_odp_port(ofproto, nl_attr_get_u32(a)); + if (ofport && ofport->bundle) { + mirrors |= ofport->bundle->dst_mirrors; + } + } - mirrors = in_bundle->src_mirrors | dst_mirrors; if (!mirrors) { return; } + /* Restore the original packet before adding the mirror actions. */ + ctx->flow = *orig_flow; + while (mirrors) { struct ofmirror *m; @@ -4943,9 +5106,10 @@ output_mirrors(struct action_xlate_ctx *ctx, } mirrors &= ~m->dup_mirrors; + ctx->mirrors |= m->dup_mirrors; if (m->out) { output_normal(ctx, m->out, vlan); - } else if (eth_dst_may_rspan(ctx->flow.dl_dst) + } else if (eth_dst_may_rspan(orig_flow->dl_dst) && vlan != m->out_vlan) { struct ofbundle *bundle; @@ -4959,6 +5123,34 @@ output_mirrors(struct action_xlate_ctx *ctx, } } +static void +update_mirror_stats(struct ofproto_dpif *ofproto, mirror_mask_t mirrors, + uint64_t packets, uint64_t bytes) +{ + if (!mirrors) { + return; + } + + for (; mirrors; mirrors &= mirrors - 1) { + struct ofmirror *m; + + m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1]; + + if (!m) { + /* In normal circumstances 'm' will not be NULL. However, + * if mirrors are reconfigured, we can temporarily get out + * of sync in facet_revalidate(). We could "correct" the + * mirror list before reaching here, but doing that would + * not properly account the traffic stats we've currently + * accumulated for previous mirror configuration. */ + continue; + } + + m->packet_count += packets; + m->byte_count += bytes; + } +} + /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to * indicate this; newer upstream kernels use gratuitous ARP requests. */ @@ -5093,10 +5285,8 @@ is_admissible(struct ofproto_dpif *ofproto, const struct flow *flow, static void xlate_normal(struct action_xlate_ctx *ctx) { - mirror_mask_t dst_mirrors = 0; struct ofport_dpif *in_port; struct ofbundle *in_bundle; - struct ofbundle *out_bundle; struct mac_entry *mac; uint16_t vlan; uint16_t vid; @@ -5145,7 +5335,6 @@ xlate_normal(struct action_xlate_ctx *ctx) /* Check other admissibility requirements. */ if (!is_admissible(ctx->ofproto, &ctx->flow, in_port, vlan, &ctx->tags)) { - output_mirrors(ctx, vlan, in_bundle, 0); return; } @@ -5158,7 +5347,9 @@ xlate_normal(struct action_xlate_ctx *ctx) mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan, &ctx->tags); if (mac) { - out_bundle = mac->port.p; + if (mac->port.p != in_bundle) { + output_normal(ctx, mac->port.p, vlan); + } } else if (!ctx->packet && !eth_addr_is_multicast(ctx->flow.dl_dst)) { /* If we are revalidating but don't have a learning entry then eject * the flow. Installing a flow that floods packets opens up a window @@ -5168,14 +5359,18 @@ xlate_normal(struct action_xlate_ctx *ctx) ctx->may_set_up_flow = false; return; } else { - out_bundle = OFBUNDLE_FLOOD; - } + struct ofbundle *bundle; - /* Don't send packets out their input bundles. */ - if (in_bundle != out_bundle) { - dst_mirrors = compose_dsts(ctx, vlan, in_bundle, out_bundle); + HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) { + if (bundle != in_bundle + && ofbundle_includes_vlan(bundle, vlan) + && bundle->floodable + && !bundle->mirror_out) { + output_normal(ctx, bundle, vlan); + } + } + ctx->nf_output_iface = NF_OUT_FLOOD; } - output_mirrors(ctx, vlan, in_bundle, dst_mirrors); } /* Optimized flow revalidation. @@ -5406,19 +5601,24 @@ send_netflow_active_timeouts(struct ofproto_dpif *ofproto) static struct ofproto_dpif * ofproto_dpif_lookup(const char *name) { - struct ofproto *ofproto = ofproto_lookup(name); - return (ofproto && ofproto->ofproto_class == &ofproto_dpif_class - ? ofproto_dpif_cast(ofproto) - : NULL); + struct ofproto_dpif *ofproto; + + HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node, + hash_string(name, 0), &all_ofproto_dpifs) { + if (!strcmp(ofproto->up.name, name)) { + return ofproto; + } + } + return NULL; } static void -ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, - const char *args, void *aux OVS_UNUSED) +ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *aux OVS_UNUSED) { const struct ofproto_dpif *ofproto; - ofproto = ofproto_dpif_lookup(args); + ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply(conn, 501, "no such bridge"); return; @@ -5429,14 +5629,14 @@ ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, } static void -ofproto_unixctl_fdb_show(struct unixctl_conn *conn, - const char *args, void *aux OVS_UNUSED) +ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct ofproto_dpif *ofproto; const struct mac_entry *e; - ofproto = ofproto_dpif_lookup(args); + ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply(conn, 501, "no such bridge"); return; @@ -5522,12 +5722,10 @@ trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule) } static void -ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, +ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { - char *dpname, *arg1, *arg2, *arg3, *arg4; - char *args = xstrdup(args_); - char *save_ptr = NULL; + const char *dpname = argv[1]; struct ofproto_dpif *ofproto; struct ofpbuf odp_key; struct ofpbuf *packet; @@ -5541,29 +5739,21 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, ofpbuf_init(&odp_key, 0); ds_init(&result); - dpname = strtok_r(args, " ", &save_ptr); - if (!dpname) { - unixctl_command_reply(conn, 501, "Bad command syntax"); - goto exit; - } - ofproto = ofproto_dpif_lookup(dpname); if (!ofproto) { unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list " "for help)"); goto exit; } - arg1 = strtok_r(NULL, " ", &save_ptr); - arg2 = strtok_r(NULL, " ", &save_ptr); - arg3 = strtok_r(NULL, " ", &save_ptr); - arg4 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ - if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) { + if (argc == 3 || (argc == 4 && !strcmp(argv[3], "-generate"))) { /* ofproto/trace dpname flow [-generate] */ + const char *flow_s = argv[2]; + const char *generate_s = argv[3]; int error; /* Convert string to datapath key. */ ofpbuf_init(&odp_key, 0); - error = odp_flow_key_from_string(arg1, NULL, &odp_key); + error = odp_flow_key_from_string(flow_s, NULL, &odp_key); if (error) { unixctl_command_reply(conn, 501, "Bad flow syntax"); goto exit; @@ -5572,37 +5762,31 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, /* Convert odp_key to flow. */ error = ofproto_dpif_extract_flow_key(ofproto, odp_key.data, odp_key.size, &flow, - &initial_tci); + &initial_tci, NULL); if (error == ODP_FIT_ERROR) { unixctl_command_reply(conn, 501, "Invalid flow"); goto exit; } /* Generate a packet, if requested. */ - if (arg2) { + if (generate_s) { packet = ofpbuf_new(0); flow_compose(packet, &flow); } - } else if (dpname && arg1 && arg2 && arg3 && arg4) { + } else if (argc == 6) { /* ofproto/trace dpname priority tun_id in_port packet */ - uint16_t in_port; - ovs_be64 tun_id; - uint32_t priority; - - priority = atoi(arg1); - tun_id = htonll(strtoull(arg2, NULL, 0)); - in_port = ofp_port_to_odp_port(atoi(arg3)); - - packet = ofpbuf_new(strlen(args) / 2); - arg4 = ofpbuf_put_hex(packet, arg4, NULL); - arg4 += strspn(arg4, " "); - if (*arg4 != '\0') { - unixctl_command_reply(conn, 501, "Trailing garbage in command"); - goto exit; - } - if (packet->size < ETH_HEADER_LEN) { - unixctl_command_reply(conn, 501, - "Packet data too short for Ethernet"); + const char *priority_s = argv[2]; + const char *tun_id_s = argv[3]; + const char *in_port_s = argv[4]; + const char *packet_s = argv[5]; + uint16_t in_port = ofp_port_to_odp_port(atoi(in_port_s)); + ovs_be64 tun_id = htonll(strtoull(tun_id_s, NULL, 0)); + uint32_t priority = atoi(priority_s); + const char *msg; + + msg = eth_from_hex(packet_s, &packet); + if (msg) { + unixctl_command_reply(conn, 501, msg); goto exit; } @@ -5657,20 +5841,19 @@ exit: ds_destroy(&result); ofpbuf_delete(packet); ofpbuf_uninit(&odp_key); - free(args); } static void -ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, - const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { clogged = true; unixctl_command_reply(conn, 200, NULL); } static void -ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, - const char *args_ OVS_UNUSED, void *aux OVS_UNUSED) +ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { clogged = false; unixctl_command_reply(conn, 200, NULL); @@ -5685,15 +5868,160 @@ ofproto_dpif_unixctl_init(void) } registered = true; - unixctl_command_register("ofproto/trace", - "bridge {tun_id in_port packet | odp_flow [-generate]}", - ofproto_unixctl_trace, NULL); - unixctl_command_register("fdb/flush", "bridge", ofproto_unixctl_fdb_flush, - NULL); - unixctl_command_register("fdb/show", "bridge", ofproto_unixctl_fdb_show, - NULL); - unixctl_command_register("ofproto/clog", "", ofproto_dpif_clog, NULL); - unixctl_command_register("ofproto/unclog", "", ofproto_dpif_unclog, NULL); + unixctl_command_register( + "ofproto/trace", + "bridge {tun_id in_port packet | odp_flow [-generate]}", + 2, 4, ofproto_unixctl_trace, NULL); + unixctl_command_register("fdb/flush", "bridge", 1, 1, + ofproto_unixctl_fdb_flush, NULL); + unixctl_command_register("fdb/show", "bridge", 1, 1, + ofproto_unixctl_fdb_show, NULL); + unixctl_command_register("ofproto/clog", "", 0, 0, + ofproto_dpif_clog, NULL); + unixctl_command_register("ofproto/unclog", "", 0, 0, + ofproto_dpif_unclog, NULL); +} + +/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) + * + * This is deprecated. It is only for compatibility with broken device drivers + * in old versions of Linux that do not properly support VLANs when VLAN + * devices are not used. When broken device drivers are no longer in + * widespread use, we will delete these interfaces. */ + +static int +set_realdev(struct ofport *ofport_, uint16_t realdev_ofp_port, int vid) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto); + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + if (realdev_ofp_port == ofport->realdev_ofp_port + && vid == ofport->vlandev_vid) { + return 0; + } + + ofproto->need_revalidate = true; + + if (ofport->realdev_ofp_port) { + vsp_remove(ofport); + } + if (realdev_ofp_port && ofport->bundle) { + /* vlandevs are enslaved to their realdevs, so they are not allowed to + * themselves be part of a bundle. */ + bundle_set(ofport->up.ofproto, ofport->bundle, NULL); + } + + ofport->realdev_ofp_port = realdev_ofp_port; + ofport->vlandev_vid = vid; + + if (realdev_ofp_port) { + vsp_add(ofport, realdev_ofp_port, vid); + } + + return 0; +} + +static uint32_t +hash_realdev_vid(uint16_t realdev_ofp_port, int vid) +{ + return hash_2words(realdev_ofp_port, vid); +} + +static uint32_t +vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, + uint32_t realdev_odp_port, ovs_be16 vlan_tci) +{ + if (!hmap_is_empty(&ofproto->realdev_vid_map)) { + uint16_t realdev_ofp_port = odp_port_to_ofp_port(realdev_odp_port); + int vid = vlan_tci_to_vid(vlan_tci); + const struct vlan_splinter *vsp; + + HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node, + hash_realdev_vid(realdev_ofp_port, vid), + &ofproto->realdev_vid_map) { + if (vsp->realdev_ofp_port == realdev_ofp_port + && vsp->vid == vid) { + return ofp_port_to_odp_port(vsp->vlandev_ofp_port); + } + } + } + return realdev_odp_port; +} + +static struct vlan_splinter * +vlandev_find(const struct ofproto_dpif *ofproto, uint16_t vlandev_ofp_port) +{ + struct vlan_splinter *vsp; + + HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node, hash_int(vlandev_ofp_port, 0), + &ofproto->vlandev_map) { + if (vsp->vlandev_ofp_port == vlandev_ofp_port) { + return vsp; + } + } + + return NULL; +} + +static uint16_t +vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto, + uint16_t vlandev_ofp_port, int *vid) +{ + if (!hmap_is_empty(&ofproto->vlandev_map)) { + const struct vlan_splinter *vsp; + + vsp = vlandev_find(ofproto, vlandev_ofp_port); + if (vsp) { + if (vid) { + *vid = vsp->vid; + } + return vsp->realdev_ofp_port; + } + } + return 0; +} + +static void +vsp_remove(struct ofport_dpif *port) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); + struct vlan_splinter *vsp; + + vsp = vlandev_find(ofproto, port->up.ofp_port); + if (vsp) { + hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node); + hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node); + free(vsp); + + port->realdev_ofp_port = 0; + } else { + VLOG_ERR("missing vlan device record"); + } +} + +static void +vsp_add(struct ofport_dpif *port, uint16_t realdev_ofp_port, int vid) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); + + if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL) + && (vsp_realdev_to_vlandev(ofproto, realdev_ofp_port, htons(vid)) + == realdev_ofp_port)) { + struct vlan_splinter *vsp; + + vsp = xmalloc(sizeof *vsp); + hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node, + hash_int(port->up.ofp_port, 0)); + hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node, + hash_realdev_vid(realdev_ofp_port, vid)); + vsp->realdev_ofp_port = realdev_ofp_port; + vsp->vlandev_ofp_port = port->up.ofp_port; + vsp->vid = vid; + + port->realdev_ofp_port = realdev_ofp_port; + } else { + VLOG_ERR("duplicate vlan device record"); + } } const struct ofproto_class ofproto_dpif_class = { @@ -5705,6 +6033,7 @@ const struct ofproto_class ofproto_dpif_class = { destruct, dealloc, run, + run_fast, wait, flush, get_features, @@ -5718,6 +6047,7 @@ const struct ofproto_class ofproto_dpif_class = { port_query_by_name, port_add, port_del, + port_get_stats, port_dump_start, port_dump_next, port_dump_done, @@ -5748,7 +6078,9 @@ const struct ofproto_class ofproto_dpif_class = { bundle_set, bundle_remove, mirror_set, + mirror_get_stats, set_flood_vlans, is_mirror_output_bundle, forward_bpdu_changed, + set_realdev, };