X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=ofproto%2Fofproto.c;h=32ce4f241d811aeca4b0f296dbc586528b842652;hb=27d34fce499f0f6b904fd27e84e81ecd44d67cfd;hp=859f416fe1312ea85baf5bef9103e64ab3d986ad;hpb=8497dd41214ddaac26928f2efa90becd1b336a52;p=openvswitch diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 859f416f..32ce4f24 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -24,6 +24,7 @@ #include #include #include +#include "byte-order.h" #include "classifier.h" #include "coverage.h" #include "discovery.h" @@ -58,18 +59,11 @@ #include "unixctl.h" #include "vconn.h" #include "vlog.h" -#include "xtoxll.h" -VLOG_DEFINE_THIS_MODULE(ofproto) +VLOG_DEFINE_THIS_MODULE(ofproto); #include "sflow_api.h" -enum { - TABLEID_HASH = 0, - TABLEID_CLASSIFIER = 1 -}; - - struct ofport { struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */ struct netdev *netdev; @@ -81,7 +75,7 @@ static void ofport_free(struct ofport *); static void hton_ofp_phy_port(struct ofp_phy_port *); static int xlate_actions(const union ofp_action *in, size_t n_in, - const flow_t *flow, struct ofproto *ofproto, + const struct flow *, struct ofproto *, const struct ofpbuf *packet, struct odp_actions *out, tag_type *tags, bool *may_set_up_flow, uint16_t *nf_output_iface); @@ -89,8 +83,7 @@ static int xlate_actions(const union ofp_action *in, size_t n_in, struct rule { struct cls_rule cr; - uint64_t flow_cookie; /* Controller-issued identifier. - (Kept in network-byte order.) */ + ovs_be64 flow_cookie; /* Controller-issued identifier. */ uint16_t idle_timeout; /* In seconds from time of last use. */ uint16_t hard_timeout; /* In seconds from time of creation. */ bool send_flow_removed; /* Send a flow removed message? */ @@ -155,7 +148,7 @@ rule_is_hidden(const struct rule *rule) static struct rule *rule_create(struct ofproto *, struct rule *super, const union ofp_action *, size_t n_actions, uint16_t idle_timeout, uint16_t hard_timeout, - uint64_t flow_cookie, bool send_flow_removed); + ovs_be64 flow_cookie, bool send_flow_removed); static void rule_free(struct rule *); static void rule_destroy(struct ofproto *, struct rule *); static struct rule *rule_from_cls_rule(const struct cls_rule *); @@ -168,8 +161,7 @@ static void rule_install(struct ofproto *, struct rule *, struct rule *displaced_rule); static void rule_uninstall(struct ofproto *, struct rule *); static void rule_post_uninstall(struct ofproto *, struct rule *); -static void send_flow_removed(struct ofproto *p, struct rule *rule, - long long int now, uint8_t reason); +static void send_flow_removed(struct ofproto *, struct rule *, uint8_t reason); /* ofproto supports two kinds of OpenFlow connections: * @@ -217,6 +209,7 @@ struct ofconn { struct list node; /* In struct ofproto's "all_conns" list. */ struct rconn *rconn; /* OpenFlow connection. */ enum ofconn_type type; /* Type. */ + int flow_format; /* One of NXFF_*. */ /* OFPT_PACKET_IN related data. */ struct rconn_packet_counter *packet_in_counter; /* # queued on 'rconn'. */ @@ -251,7 +244,7 @@ BUILD_ASSERT_DECL(OFPR_ACTION == _ODPL_ACTION_NR); static struct ofconn *ofconn_create(struct ofproto *, struct rconn *, enum ofconn_type); static void ofconn_destroy(struct ofconn *); -static void ofconn_run(struct ofconn *, struct ofproto *); +static void ofconn_run(struct ofconn *); static void ofconn_wait(struct ofconn *); static bool ofconn_receives_async_msgs(const struct ofconn *); static char *ofconn_make_name(const struct ofproto *, const char *target); @@ -297,7 +290,6 @@ struct ofproto { bool need_revalidate; long long int next_expiration; struct tag_set revalidate_set; - bool tun_id_from_cookie; /* OpenFlow connections. */ struct hmap controllers; /* Controller "struct ofconn"s. */ @@ -324,7 +316,7 @@ static const struct ofhooks default_ofhooks; static uint64_t pick_datapath_id(const struct ofproto *); static uint64_t pick_fallback_dpid(void); -static void ofproto_expire(struct ofproto *); +static int ofproto_expire(struct ofproto *); static void update_stats(struct ofproto *, struct rule *, const struct odp_flow_stats *); @@ -333,10 +325,7 @@ static void revalidate_cb(struct cls_rule *rule_, void *p_); static void handle_odp_msg(struct ofproto *, struct ofpbuf *); -static void handle_openflow(struct ofconn *, struct ofproto *, - struct ofpbuf *); - -static void refresh_port_groups(struct ofproto *); +static void handle_openflow(struct ofconn *, struct ofpbuf *); static struct ofport *get_port(const struct ofproto *, uint16_t odp_port); static void update_port(struct ofproto *, const char *devname); @@ -886,7 +875,6 @@ ofproto_set_sflow(struct ofproto *ofproto, struct ofport *ofport; os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif); - refresh_port_groups(ofproto); HMAP_FOR_EACH (ofport, hmap_node, &ofproto->ports) { ofproto_sflow_add_port(os, ofport->odp_port, netdev_get_name(ofport->netdev)); @@ -1103,7 +1091,7 @@ ofproto_run1(struct ofproto *p) } LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &p->all_conns) { - ofconn_run(ofconn, p); + ofconn_run(ofconn); } /* Fail-open maintenance. Do this after processing the ofconns since @@ -1147,9 +1135,9 @@ ofproto_run1(struct ofproto *p) } if (time_msec() >= p->next_expiration) { + int delay = ofproto_expire(p); + p->next_expiration = time_msec() + delay; COVERAGE_INC(ofproto_expiration); - ofproto_expire(p); - p->next_expiration = time_msec() + 1000; } if (p->netflow) { @@ -1247,8 +1235,50 @@ ofproto_is_alive(const struct ofproto *p) return !hmap_is_empty(&p->controllers); } +/* Deletes port number 'odp_port' from the datapath for 'ofproto'. + * + * This is almost the same as calling dpif_port_del() directly on the + * datapath, but it also makes 'ofproto' close its open netdev for the port + * (if any). This makes it possible to create a new netdev of a different + * type under the same name, which otherwise the netdev library would refuse + * to do because of the conflict. (The netdev would eventually get closed on + * the next trip through ofproto_run(), but this interface is more direct.) + * + * Returns 0 if successful, otherwise a positive errno. */ +int +ofproto_port_del(struct ofproto *ofproto, uint16_t odp_port) +{ + struct ofport *ofport = get_port(ofproto, odp_port); + const char *name = ofport ? (char *) ofport->opp.name : ""; + int error; + + error = dpif_port_del(ofproto->dpif, odp_port); + if (error) { + VLOG_ERR("%s: failed to remove port %"PRIu16" (%s) interface (%s)", + dpif_name(ofproto->dpif), odp_port, name, strerror(error)); + } else if (ofport) { + /* 'name' is ofport->opp.name and update_port() is going to destroy + * 'ofport'. Just in case update_port() refers to 'name' after it + * destroys 'ofport', make a copy of it around the update_port() + * call. */ + char *devname = xstrdup(name); + update_port(ofproto, devname); + free(devname); + } + return error; +} + +/* Checks if 'ofproto' thinks 'odp_port' should be included in floods. Returns + * true if 'odp_port' exists and should be included, false otherwise. */ +bool +ofproto_port_is_floodable(struct ofproto *ofproto, uint16_t odp_port) +{ + struct ofport *ofport = get_port(ofproto, odp_port); + return ofport && !(ofport->opp.config & OFPPC_NO_FLOOD); +} + int -ofproto_send_packet(struct ofproto *p, const flow_t *flow, +ofproto_send_packet(struct ofproto *p, const struct flow *flow, const union ofp_action *actions, size_t n_actions, const struct ofpbuf *packet) { @@ -1263,14 +1293,13 @@ ofproto_send_packet(struct ofproto *p, const flow_t *flow, /* XXX Should we translate the dpif_execute() errno value into an OpenFlow * error code? */ - dpif_execute(p->dpif, flow->in_port, odp_actions.actions, - odp_actions.n_actions, packet); + dpif_execute(p->dpif, odp_actions.actions, odp_actions.n_actions, packet); return 0; } void -ofproto_add_flow(struct ofproto *p, - const flow_t *flow, uint32_t wildcards, unsigned int priority, +ofproto_add_flow(struct ofproto *p, const struct flow *flow, + uint32_t wildcards, unsigned int priority, const union ofp_action *actions, size_t n_actions, int idle_timeout) { @@ -1283,14 +1312,15 @@ ofproto_add_flow(struct ofproto *p, } void -ofproto_delete_flow(struct ofproto *ofproto, const flow_t *flow, +ofproto_delete_flow(struct ofproto *ofproto, const struct flow *flow, uint32_t wildcards, unsigned int priority) { + struct cls_rule target; struct rule *rule; + cls_rule_from_flow(flow, wildcards, priority, &target); rule = rule_from_cls_rule(classifier_find_rule_exactly(&ofproto->cls, - flow, wildcards, - priority)); + &target)); if (rule) { rule_remove(ofproto, rule); } @@ -1334,6 +1364,8 @@ reinit_ports(struct ofproto *p) size_t n_odp_ports; size_t i; + COVERAGE_INC(ofproto_reinit_ports); + svec_init(&devnames); HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { svec_add (&devnames, (char *) ofport->opp.name); @@ -1351,38 +1383,6 @@ reinit_ports(struct ofproto *p) svec_destroy(&devnames); } -static size_t -refresh_port_group(struct ofproto *p, unsigned int group) -{ - uint16_t *ports; - size_t n_ports; - struct ofport *port; - - assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD); - - ports = xmalloc(hmap_count(&p->ports) * sizeof *ports); - n_ports = 0; - HMAP_FOR_EACH (port, hmap_node, &p->ports) { - if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { - ports[n_ports++] = port->odp_port; - } - } - dpif_port_group_set(p->dpif, group, ports, n_ports); - free(ports); - - return n_ports; -} - -static void -refresh_port_groups(struct ofproto *p) -{ - size_t n_flood = refresh_port_group(p, DP_GROUP_FLOOD); - size_t n_all = refresh_port_group(p, DP_GROUP_ALL); - if (p->sflow) { - ofproto_sflow_set_group_sizes(p->sflow, n_flood, n_all); - } -} - static struct ofport * make_ofport(const struct odp_port *odp_port) { @@ -1390,7 +1390,6 @@ make_ofport(const struct odp_port *odp_port) enum netdev_flags flags; struct ofport *ofport; struct netdev *netdev; - bool carrier; int error; memset(&netdev_options, 0, sizeof netdev_options); @@ -1418,8 +1417,7 @@ make_ofport(const struct odp_port *odp_port) netdev_get_flags(netdev, &flags); ofport->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; - netdev_get_carrier(netdev, &carrier); - ofport->opp.state = carrier ? 0 : OFPPS_LINK_DOWN; + ofport->opp.state = netdev_get_carrier(netdev) ? 0 : OFPPS_LINK_DOWN; netdev_get_features(netdev, &ofport->opp.curr, &ofport->opp.advertised, @@ -1481,9 +1479,6 @@ send_port_status(struct ofproto *p, const struct ofport *ofport, hton_ofp_phy_port(&ops->desc); queue_tx(b, ofconn, NULL); } - if (p->ofhooks->port_changed_cb) { - p->ofhooks->port_changed_cb(reason, &ofport->opp, p->aux); - } } static void @@ -1601,9 +1596,6 @@ update_port(struct ofproto *p, const char *devname) : !new_ofport ? OFPPR_DELETE : OFPPR_MODIFY)); ofport_free(old_ofport); - - /* Update port groups. */ - refresh_port_groups(p); } static int @@ -1629,7 +1621,6 @@ init_ports(struct ofproto *p) } } free(ports); - refresh_port_groups(p); return 0; } @@ -1641,6 +1632,7 @@ ofconn_create(struct ofproto *p, struct rconn *rconn, enum ofconn_type type) list_push_back(&p->all_conns, &ofconn->node); ofconn->rconn = rconn; ofconn->type = type; + ofconn->flow_format = NXFF_OPENFLOW10; ofconn->role = NX_ROLE_OTHER; ofconn->packet_in_counter = rconn_packet_counter_create (); ofconn->pktbuf = NULL; @@ -1667,8 +1659,9 @@ ofconn_destroy(struct ofconn *ofconn) } static void -ofconn_run(struct ofconn *ofconn, struct ofproto *p) +ofconn_run(struct ofconn *ofconn) { + struct ofproto *p = ofconn->ofproto; int iteration; size_t i; @@ -1705,7 +1698,7 @@ ofconn_run(struct ofconn *ofconn, struct ofproto *p) if (p->fail_open) { fail_open_maybe_recover(p->fail_open); } - handle_openflow(ofconn, p, of_msg); + handle_openflow(ofconn, of_msg); ofpbuf_delete(of_msg); } } @@ -1846,7 +1839,7 @@ static struct rule * rule_create(struct ofproto *ofproto, struct rule *super, const union ofp_action *actions, size_t n_actions, uint16_t idle_timeout, uint16_t hard_timeout, - uint64_t flow_cookie, bool send_flow_removed) + ovs_be64 flow_cookie, bool send_flow_removed) { struct rule *rule = xzalloc(sizeof *rule); rule->idle_timeout = idle_timeout; @@ -1907,7 +1900,7 @@ rule_destroy(struct ofproto *ofproto, struct rule *rule) } static bool -rule_has_out_port(const struct rule *rule, uint16_t out_port) +rule_has_out_port(const struct rule *rule, ovs_be16 out_port) { const union ofp_action *oa; struct actions_iterator i; @@ -1952,8 +1945,7 @@ execute_odp_actions(struct ofproto *ofproto, uint16_t in_port, } else { int error; - error = dpif_execute(ofproto->dpif, in_port, - actions, n_actions, packet); + error = dpif_execute(ofproto->dpif, actions, n_actions, packet); ofpbuf_delete(packet); return !error; } @@ -1978,7 +1970,7 @@ execute_odp_actions(struct ofproto *ofproto, uint16_t in_port, * Takes ownership of 'packet'. */ static void rule_execute(struct ofproto *ofproto, struct rule *rule, - struct ofpbuf *packet, const flow_t *flow) + struct ofpbuf *packet, const struct flow *flow) { const union odp_action *actions; struct odp_flow_stats stats; @@ -2037,7 +2029,7 @@ rule_insert(struct ofproto *p, struct rule *rule, struct ofpbuf *packet, /* Send the packet and credit it to the rule. */ if (packet) { - flow_t flow; + struct flow flow; flow_extract(packet, 0, in_port, &flow); rule_execute(p, rule, packet, &flow); } @@ -2059,7 +2051,7 @@ rule_insert(struct ofproto *p, struct rule *rule, struct ofpbuf *packet, static struct rule * rule_create_subrule(struct ofproto *ofproto, struct rule *rule, - const flow_t *flow) + const struct flow *flow) { struct rule *subrule = rule_create(ofproto, rule, NULL, 0, rule->idle_timeout, rule->hard_timeout, @@ -2067,7 +2059,11 @@ rule_create_subrule(struct ofproto *ofproto, struct rule *rule, COVERAGE_INC(ofproto_subrule_create); cls_rule_from_flow(flow, 0, (rule->cr.priority <= UINT16_MAX ? UINT16_MAX : rule->cr.priority), &subrule->cr); - classifier_insert_exact(&ofproto->cls, &subrule->cr); + + if (classifier_insert(&ofproto->cls, &subrule->cr)) { + /* Can't happen, */ + NOT_REACHED(); + } return subrule; } @@ -2131,7 +2127,7 @@ do_put_flow(struct ofproto *ofproto, struct rule *rule, int flags, struct odp_flow_put *put) { memset(&put->flow.stats, 0, sizeof put->flow.stats); - put->flow.key = rule->cr.flow; + odp_flow_key_from_flow(&put->flow.key, &rule->cr.flow); put->flow.actions = rule->odp_actions; put->flow.n_actions = rule->n_odp_actions; put->flow.flags = 0; @@ -2235,7 +2231,7 @@ rule_uninstall(struct ofproto *p, struct rule *rule) if (rule->installed) { struct odp_flow odp_flow; - odp_flow.key = rule->cr.flow; + odp_flow_key_from_flow(&odp_flow.key, &rule->cr.flow); odp_flow.actions = NULL; odp_flow.n_actions = 0; odp_flow.flags = 0; @@ -2301,34 +2297,15 @@ queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn, } } -static void -send_error(const struct ofconn *ofconn, const struct ofp_header *oh, - int error, const void *data, size_t len) -{ - struct ofpbuf *buf; - struct ofp_error_msg *oem; - - if (!(error >> 16)) { - VLOG_WARN_RL(&rl, "not sending bad error code %d to controller", - error); - return; - } - - COVERAGE_INC(ofproto_error); - oem = make_openflow_xid(len + sizeof *oem, OFPT_ERROR, - oh ? oh->xid : 0, &buf); - oem->type = htons((unsigned int) error >> 16); - oem->code = htons(error & 0xffff); - memcpy(oem->data, data, len); - queue_tx(buf, ofconn, ofconn->reply_counter); -} - static void send_error_oh(const struct ofconn *ofconn, const struct ofp_header *oh, int error) { - size_t oh_length = ntohs(oh->length); - send_error(ofconn, oh, error, oh, MIN(oh_length, 64)); + struct ofpbuf *buf = make_ofp_error_msg(error, oh); + if (buf) { + COVERAGE_INC(ofproto_error); + queue_tx(buf, ofconn, ofconn->reply_counter); + } } static void @@ -2352,15 +2329,14 @@ handle_echo_request(struct ofconn *ofconn, struct ofp_header *oh) } static int -handle_features_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) +handle_features_request(struct ofconn *ofconn, struct ofp_header *oh) { struct ofp_switch_features *osf; struct ofpbuf *buf; struct ofport *port; osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf); - osf->datapath_id = htonll(p->datapath_id); + osf->datapath_id = htonll(ofconn->ofproto->datapath_id); osf->n_buffers = htonl(pktbuf_capacity()); osf->n_tables = 2; osf->capabilities = htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS | @@ -2378,7 +2354,7 @@ handle_features_request(struct ofproto *p, struct ofconn *ofconn, (1u << OFPAT_SET_TP_DST) | (1u << OFPAT_ENQUEUE)); - HMAP_FOR_EACH (port, hmap_node, &p->ports) { + HMAP_FOR_EACH (port, hmap_node, &ofconn->ofproto->ports) { hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); } @@ -2387,8 +2363,7 @@ handle_features_request(struct ofproto *p, struct ofconn *ofconn, } static int -handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) +handle_get_config_request(struct ofconn *ofconn, struct ofp_header *oh) { struct ofpbuf *buf; struct ofp_switch_config *osc; @@ -2396,7 +2371,7 @@ handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, bool drop_frags; /* Figure out flags. */ - dpif_get_drop_frags(p->dpif, &drop_frags); + dpif_get_drop_frags(ofconn->ofproto->dpif, &drop_frags); flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL; /* Send reply. */ @@ -2409,8 +2384,7 @@ handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, } static int -handle_set_config(struct ofproto *p, struct ofconn *ofconn, - struct ofp_switch_config *osc) +handle_set_config(struct ofconn *ofconn, struct ofp_switch_config *osc) { uint16_t flags; int error; @@ -2424,10 +2398,10 @@ handle_set_config(struct ofproto *p, struct ofconn *ofconn, if (ofconn->type == OFCONN_PRIMARY && ofconn->role != NX_ROLE_SLAVE) { switch (flags & OFPC_FRAG_MASK) { case OFPC_FRAG_NORMAL: - dpif_set_drop_frags(p->dpif, false); + dpif_set_drop_frags(ofconn->ofproto->dpif, false); break; case OFPC_FRAG_DROP: - dpif_set_drop_frags(p->dpif, true); + dpif_set_drop_frags(ofconn->ofproto->dpif, true); break; default: VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")", @@ -2441,17 +2415,6 @@ handle_set_config(struct ofproto *p, struct ofconn *ofconn, return 0; } -static void -add_output_group_action(struct odp_actions *actions, uint16_t group, - uint16_t *nf_output_iface) -{ - odp_actions_add(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group; - - if (group == DP_GROUP_ALL || group == DP_GROUP_FLOOD) { - *nf_output_iface = NF_OUT_FLOOD; - } -} - static void add_controller_action(struct odp_actions *actions, uint16_t max_len) { @@ -2461,7 +2424,7 @@ add_controller_action(struct odp_actions *actions, uint16_t max_len) struct action_xlate_ctx { /* Input. */ - flow_t flow; /* Flow to which these actions correspond. */ + struct flow flow; /* Flow to which these actions correspond. */ int recurse; /* Recursion level, via xlate_table_action. */ struct ofproto *ofproto; const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a @@ -2506,10 +2469,11 @@ add_output_action(struct action_xlate_ctx *ctx, uint16_t port) } static struct rule * -lookup_valid_rule(struct ofproto *ofproto, const flow_t *flow) +lookup_valid_rule(struct ofproto *ofproto, const struct flow *flow) { struct rule *rule; - rule = rule_from_cls_rule(classifier_lookup(&ofproto->cls, flow)); + rule = rule_from_cls_rule(classifier_lookup(&ofproto->cls, flow, + CLS_INC_ALL)); /* The rule we found might not be valid, since we could be in need of * revalidation. If it is not valid, don't return it. */ @@ -2556,6 +2520,21 @@ xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port) } } +static void +flood_packets(struct ofproto *ofproto, uint16_t odp_in_port, uint32_t mask, + uint16_t *nf_output_iface, struct odp_actions *actions) +{ + struct ofport *ofport; + + HMAP_FOR_EACH (ofport, hmap_node, &ofproto->ports) { + uint16_t odp_port = ofport->odp_port; + if (odp_port != odp_in_port && !(ofport->opp.config & mask)) { + odp_actions_add(actions, ODPAT_OUTPUT)->output.port = odp_port; + } + } + *nf_output_iface = NF_OUT_FLOOD; +} + static void xlate_output_action__(struct action_xlate_ctx *ctx, uint16_t port, uint16_t max_len) @@ -2582,11 +2561,12 @@ xlate_output_action__(struct action_xlate_ctx *ctx, } break; case OFPP_FLOOD: - add_output_group_action(ctx->out, DP_GROUP_FLOOD, - &ctx->nf_output_iface); + flood_packets(ctx->ofproto, ctx->flow.in_port, OFPPC_NO_FLOOD, + &ctx->nf_output_iface, ctx->out); break; case OFPP_ALL: - add_output_group_action(ctx->out, DP_GROUP_ALL, &ctx->nf_output_iface); + flood_packets(ctx->ofproto, ctx->flow.in_port, 0, + &ctx->nf_output_iface, ctx->out); break; case OFPP_CONTROLLER: add_controller_action(ctx->out, max_len); @@ -2764,13 +2744,17 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, break; case OFPAT_SET_VLAN_VID: - oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_VID); - ctx->flow.dl_vlan = oa->vlan_vid.vlan_vid = ia->vlan_vid.vlan_vid; + oa = odp_actions_add(ctx->out, ODPAT_SET_DL_TCI); + oa->dl_tci.tci = ia->vlan_vid.vlan_vid; + oa->dl_tci.tci |= htons(ctx->flow.dl_vlan_pcp << VLAN_PCP_SHIFT); + ctx->flow.dl_vlan = ia->vlan_vid.vlan_vid; break; case OFPAT_SET_VLAN_PCP: - oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_PCP); - ctx->flow.dl_vlan_pcp = oa->vlan_pcp.vlan_pcp = ia->vlan_pcp.vlan_pcp; + oa = odp_actions_add(ctx->out, ODPAT_SET_DL_TCI); + oa->dl_tci.tci = htons(ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT); + oa->dl_tci.tci |= ctx->flow.dl_vlan; + ctx->flow.dl_vlan_pcp = ia->vlan_pcp.vlan_pcp; break; case OFPAT_STRIP_VLAN: @@ -2837,7 +2821,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, static int xlate_actions(const union ofp_action *in, size_t n_in, - const flow_t *flow, struct ofproto *ofproto, + const struct flow *flow, struct ofproto *ofproto, const struct ofpbuf *packet, struct odp_actions *out, tag_type *tags, bool *may_set_up_flow, uint16_t *nf_output_iface) @@ -2881,18 +2865,14 @@ xlate_actions(const union ofp_action *in, size_t n_in, * error message code (composed with ofp_mkerr()) for the caller to propagate * upward. Otherwise, returns 0. * - * 'oh' is used to make log messages more informative. */ + * The log message mentions 'msg_type'. */ static int -reject_slave_controller(struct ofconn *ofconn, const struct ofp_header *oh) +reject_slave_controller(struct ofconn *ofconn, const const char *msg_type) { if (ofconn->type == OFCONN_PRIMARY && ofconn->role == NX_ROLE_SLAVE) { static struct vlog_rate_limit perm_rl = VLOG_RATE_LIMIT_INIT(1, 5); - char *type_name; - - type_name = ofp_message_type_to_string(oh->type); VLOG_WARN_RL(&perm_rl, "rejecting %s message from slave controller", - type_name); - free(type_name); + msg_type); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_EPERM); } else { @@ -2901,18 +2881,18 @@ reject_slave_controller(struct ofconn *ofconn, const struct ofp_header *oh) } static int -handle_packet_out(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) +handle_packet_out(struct ofconn *ofconn, struct ofp_header *oh) { + struct ofproto *p = ofconn->ofproto; struct ofp_packet_out *opo; struct ofpbuf payload, *buffer; struct odp_actions actions; + struct flow flow; int n_actions; uint16_t in_port; - flow_t flow; int error; - error = reject_slave_controller(ofconn, oh); + error = reject_slave_controller(ofconn, "OFPT_PACKET_OUT"); if (error) { return error; } @@ -2938,15 +2918,12 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn, flow_extract(&payload, 0, ofp_port_to_odp_port(ntohs(opo->in_port)), &flow); error = xlate_actions((const union ofp_action *) opo->actions, n_actions, &flow, p, &payload, &actions, NULL, NULL, NULL); - if (error) { - return error; + if (!error) { + dpif_execute(p->dpif, actions.actions, actions.n_actions, &payload); } - - dpif_execute(p->dpif, flow.in_port, actions.actions, actions.n_actions, - &payload); ofpbuf_delete(buffer); - return 0; + return error; } static void @@ -2961,31 +2938,28 @@ update_port_config(struct ofproto *p, struct ofport *port, netdev_turn_flags_on(port->netdev, NETDEV_UP, true); } } -#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD) +#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | \ + OFPPC_NO_FWD | OFPPC_NO_FLOOD) if (mask & REVALIDATE_BITS) { COVERAGE_INC(ofproto_costly_flags); port->opp.config ^= mask & REVALIDATE_BITS; p->need_revalidate = true; } #undef REVALIDATE_BITS - if (mask & OFPPC_NO_FLOOD) { - port->opp.config ^= OFPPC_NO_FLOOD; - refresh_port_groups(p); - } if (mask & OFPPC_NO_PACKET_IN) { port->opp.config ^= OFPPC_NO_PACKET_IN; } } static int -handle_port_mod(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) +handle_port_mod(struct ofconn *ofconn, struct ofp_header *oh) { + struct ofproto *p = ofconn->ofproto; const struct ofp_port_mod *opm; struct ofport *port; int error; - error = reject_slave_controller(ofconn, oh); + error = reject_slave_controller(ofconn, "OFPT_PORT_MOD"); if (error) { return error; } @@ -3010,7 +2984,7 @@ handle_port_mod(struct ofproto *p, struct ofconn *ofconn, } static struct ofpbuf * -make_stats_reply(uint32_t xid, uint16_t type, size_t body_len) +make_stats_reply(ovs_be32 xid, ovs_be16 type, size_t body_len) { struct ofp_stats_reply *osr; struct ofpbuf *msg; @@ -3043,9 +3017,10 @@ append_stats_reply(size_t nbytes, struct ofconn *ofconn, struct ofpbuf **msgp) } static int -handle_desc_stats_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_stats_request *request) +handle_desc_stats_request(struct ofconn *ofconn, + struct ofp_stats_request *request) { + struct ofproto *p = ofconn->ofproto; struct ofp_desc_stats *ods; struct ofpbuf *msg; @@ -3062,56 +3037,34 @@ handle_desc_stats_request(struct ofproto *p, struct ofconn *ofconn, return 0; } -static void -count_subrules(struct cls_rule *cls_rule, void *n_subrules_) -{ - struct rule *rule = rule_from_cls_rule(cls_rule); - int *n_subrules = n_subrules_; - - if (rule->super) { - (*n_subrules)++; - } -} - static int -handle_table_stats_request(struct ofproto *p, struct ofconn *ofconn, +handle_table_stats_request(struct ofconn *ofconn, struct ofp_stats_request *request) { + struct ofproto *p = ofconn->ofproto; struct ofp_table_stats *ots; struct ofpbuf *msg; - struct odp_stats dpstats; - int n_exact, n_subrules, n_wild; + struct rule *rule; + int n_rules; msg = start_stats_reply(request, sizeof *ots * 2); - /* Count rules of various kinds. */ - n_subrules = 0; - classifier_for_each(&p->cls, CLS_INC_EXACT, count_subrules, &n_subrules); - n_exact = classifier_count_exact(&p->cls) - n_subrules; - n_wild = classifier_count(&p->cls) - classifier_count_exact(&p->cls); - - /* Hash table. */ - dpif_get_dp_stats(p->dpif, &dpstats); - ots = append_stats_reply(sizeof *ots, ofconn, &msg); - memset(ots, 0, sizeof *ots); - ots->table_id = TABLEID_HASH; - strcpy(ots->name, "hash"); - ots->wildcards = htonl(0); - ots->max_entries = htonl(dpstats.max_capacity); - ots->active_count = htonl(n_exact); - ots->lookup_count = htonll(dpstats.n_frags + dpstats.n_hit + - dpstats.n_missed); - ots->matched_count = htonll(dpstats.n_hit); /* XXX */ + /* Count rules other than subrules. */ + n_rules = classifier_count(&p->cls); + CLASSIFIER_FOR_EACH_EXACT_RULE (rule, cr, &p->cls) { + if (rule->super) { + n_rules--; + } + } /* Classifier table. */ ots = append_stats_reply(sizeof *ots, ofconn, &msg); memset(ots, 0, sizeof *ots); - ots->table_id = TABLEID_CLASSIFIER; strcpy(ots->name, "classifier"); - ots->wildcards = p->tun_id_from_cookie ? htonl(OVSFW_ALL) - : htonl(OFPFW_ALL); - ots->max_entries = htonl(65536); - ots->active_count = htonl(n_wild); + ots->wildcards = (ofconn->flow_format == NXFF_OPENFLOW10 + ? htonl(OFPFW_ALL) : htonl(OVSFW_ALL)); + ots->max_entries = htonl(1024 * 1024); /* An arbitrary big number. */ + ots->active_count = htonl(n_rules); ots->lookup_count = htonll(0); /* XXX */ ots->matched_count = htonll(0); /* XXX */ @@ -3149,10 +3102,10 @@ append_port_stat(struct ofport *port, struct ofconn *ofconn, } static int -handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_stats_request *osr, +handle_port_stats_request(struct ofconn *ofconn, struct ofp_stats_request *osr, size_t arg_size) { + struct ofproto *p = ofconn->ofproto; struct ofp_port_stats_request *psr; struct ofp_port_stats *ops; struct ofpbuf *msg; @@ -3180,9 +3133,8 @@ handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn, } struct flow_stats_cbdata { - struct ofproto *ofproto; struct ofconn *ofconn; - uint16_t out_port; + ovs_be16 out_port; struct ofpbuf *msg; }; @@ -3216,12 +3168,12 @@ query_stats(struct ofproto *p, struct rule *rule, if (rule->cr.wc.wildcards) { size_t i = 0; LIST_FOR_EACH (subrule, list, &rule->list) { - odp_flows[i++].key = subrule->cr.flow; + odp_flow_key_from_flow(&odp_flows[i++].key, &subrule->cr.flow); packet_count += subrule->packet_count; byte_count += subrule->byte_count; } } else { - odp_flows[0].key = rule->cr.flow; + odp_flow_key_from_flow(&odp_flows[0].key, &rule->cr.flow); } /* Fetch up-to-date statistics from the datapath and add them in. */ @@ -3240,6 +3192,14 @@ query_stats(struct ofproto *p, struct rule *rule, *byte_countp = byte_count; } +static void +calc_flow_duration(long long int start, ovs_be32 *sec, ovs_be32 *nsec) +{ + long long int msecs = time_msec() - start; + *sec = htonl(msecs / 1000); + *nsec = htonl((msecs % 1000) * (1000 * 1000)); +} + static void flow_stats_cb(struct cls_rule *rule_, void *cbdata_) { @@ -3248,9 +3208,6 @@ flow_stats_cb(struct cls_rule *rule_, void *cbdata_) struct ofp_flow_stats *ofs; uint64_t packet_count, byte_count; size_t act_len, len; - long long int tdiff = time_msec() - rule->created; - uint32_t sec = tdiff / 1000; - uint32_t msec = tdiff - (sec * 1000); if (rule_is_hidden(rule) || !rule_has_out_port(rule, cbdata->out_port)) { return; @@ -3259,16 +3216,15 @@ flow_stats_cb(struct cls_rule *rule_, void *cbdata_) act_len = sizeof *rule->actions * rule->n_actions; len = offsetof(struct ofp_flow_stats, actions) + act_len; - query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); + query_stats(cbdata->ofconn->ofproto, rule, &packet_count, &byte_count); ofs = append_stats_reply(len, cbdata->ofconn, &cbdata->msg); ofs->length = htons(len); - ofs->table_id = rule->cr.wc.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; + ofs->table_id = 0; ofs->pad = 0; flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, - cbdata->ofproto->tun_id_from_cookie, &ofs->match); - ofs->duration_sec = htonl(sec); - ofs->duration_nsec = htonl(msec * 1000000); + cbdata->ofconn->flow_format, &ofs->match); + calc_flow_duration(rule->created, &ofs->duration_sec, &ofs->duration_nsec); ofs->cookie = rule->flow_cookie; ofs->priority = htons(rule->cr.priority); ofs->idle_timeout = htons(rule->idle_timeout); @@ -3284,16 +3240,12 @@ flow_stats_cb(struct cls_rule *rule_, void *cbdata_) static int table_id_to_include(uint8_t table_id) { - return (table_id == TABLEID_HASH ? CLS_INC_EXACT - : table_id == TABLEID_CLASSIFIER ? CLS_INC_WILD - : table_id == 0xff ? CLS_INC_ALL - : 0); + return table_id == 0 || table_id == 0xff ? CLS_INC_ALL : 0; } static int -handle_flow_stats_request(struct ofproto *p, struct ofconn *ofconn, - const struct ofp_stats_request *osr, - size_t arg_size) +handle_flow_stats_request(struct ofconn *ofconn, + const struct ofp_stats_request *osr, size_t arg_size) { struct ofp_flow_stats_request *fsr; struct flow_stats_cbdata cbdata; @@ -3305,12 +3257,11 @@ handle_flow_stats_request(struct ofproto *p, struct ofconn *ofconn, fsr = (struct ofp_flow_stats_request *) osr->body; COVERAGE_INC(ofproto_flows_req); - cbdata.ofproto = p; cbdata.ofconn = ofconn; cbdata.out_port = fsr->out_port; cbdata.msg = start_stats_reply(osr, 1024); - cls_rule_from_match(&fsr->match, 0, false, 0, &target); - classifier_for_each_match(&p->cls, &target, + cls_rule_from_match(&fsr->match, 0, NXFF_OPENFLOW10, 0, &target); + classifier_for_each_match(&ofconn->ofproto->cls, &target, table_id_to_include(fsr->table_id), flow_stats_cb, &cbdata); queue_tx(cbdata.msg, ofconn, ofconn->reply_counter); @@ -3339,7 +3290,7 @@ flow_stats_ds_cb(struct cls_rule *rule_, void *cbdata_) query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, - cbdata->ofproto->tun_id_from_cookie, &match); + NXFF_OPENFLOW10, &match); ds_put_format(results, "duration=%llds, ", (time_msec() - rule->created) / 1000); @@ -3349,6 +3300,8 @@ flow_stats_ds_cb(struct cls_rule *rule_, void *cbdata_) ofp_print_match(results, &match, true); if (act_len > 0) { ofp_print_actions(results, &rule->actions->header, act_len); + } else { + ds_put_cstr(results, "drop"); } ds_put_cstr(results, "\n"); } @@ -3368,14 +3321,14 @@ ofproto_get_all_flows(struct ofproto *p, struct ds *results) cbdata.ofproto = p; cbdata.results = results; - cls_rule_from_match(&match, 0, false, 0, &target); + cls_rule_from_match(&match, 0, NXFF_OPENFLOW10, 0, &target); classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, flow_stats_ds_cb, &cbdata); } struct aggregate_stats_cbdata { struct ofproto *ofproto; - uint16_t out_port; + ovs_be16 out_port; uint64_t packet_count; uint64_t byte_count; uint32_t n_flows; @@ -3399,38 +3352,50 @@ aggregate_stats_cb(struct cls_rule *rule_, void *cbdata_) cbdata->n_flows++; } +static void +query_aggregate_stats(struct ofproto *ofproto, struct cls_rule *target, + uint16_t out_port, uint8_t table_id, + struct ofp_aggregate_stats_reply *oasr) +{ + struct aggregate_stats_cbdata cbdata; + + COVERAGE_INC(ofproto_agg_request); + cbdata.ofproto = ofproto; + cbdata.out_port = out_port; + cbdata.packet_count = 0; + cbdata.byte_count = 0; + cbdata.n_flows = 0; + classifier_for_each_match(&ofproto->cls, target, + table_id_to_include(table_id), + aggregate_stats_cb, &cbdata); + + oasr->flow_count = htonl(cbdata.n_flows); + oasr->packet_count = htonll(cbdata.packet_count); + oasr->byte_count = htonll(cbdata.byte_count); + memset(oasr->pad, 0, sizeof oasr->pad); +} + static int -handle_aggregate_stats_request(struct ofproto *p, struct ofconn *ofconn, +handle_aggregate_stats_request(struct ofconn *ofconn, const struct ofp_stats_request *osr, size_t arg_size) { - struct ofp_aggregate_stats_request *asr; + struct ofp_aggregate_stats_request *request; struct ofp_aggregate_stats_reply *reply; - struct aggregate_stats_cbdata cbdata; struct cls_rule target; struct ofpbuf *msg; - if (arg_size != sizeof *asr) { + if (arg_size != sizeof *request) { return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); } - asr = (struct ofp_aggregate_stats_request *) osr->body; + request = (struct ofp_aggregate_stats_request *) osr->body; - COVERAGE_INC(ofproto_agg_request); - cbdata.ofproto = p; - cbdata.out_port = asr->out_port; - cbdata.packet_count = 0; - cbdata.byte_count = 0; - cbdata.n_flows = 0; - cls_rule_from_match(&asr->match, 0, false, 0, &target); - classifier_for_each_match(&p->cls, &target, - table_id_to_include(asr->table_id), - aggregate_stats_cb, &cbdata); + cls_rule_from_match(&request->match, 0, NXFF_OPENFLOW10, 0, &target); msg = start_stats_reply(osr, sizeof *reply); reply = append_stats_reply(sizeof *reply, ofconn, &msg); - reply->flow_count = htonl(cbdata.n_flows); - reply->packet_count = htonll(cbdata.packet_count); - reply->byte_count = htonll(cbdata.byte_count); + query_aggregate_stats(ofconn->ofproto, &target, request->out_port, + request->table_id, reply); queue_tx(msg, ofconn, ofconn->reply_counter); return 0; } @@ -3484,10 +3449,11 @@ handle_queue_stats_for_port(struct ofport *port, uint32_t queue_id, } static int -handle_queue_stats_request(struct ofproto *ofproto, struct ofconn *ofconn, +handle_queue_stats_request(struct ofconn *ofconn, const struct ofp_stats_request *osr, size_t arg_size) { + struct ofproto *ofproto = ofconn->ofproto; struct ofp_queue_stats_request *qsr; struct queue_stats_cbdata cbdata; struct ofport *port; @@ -3525,8 +3491,7 @@ handle_queue_stats_request(struct ofproto *ofproto, struct ofconn *ofconn, } static int -handle_stats_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) +handle_stats_request(struct ofconn *ofconn, struct ofp_header *oh) { struct ofp_stats_request *osr; size_t arg_size; @@ -3541,22 +3506,22 @@ handle_stats_request(struct ofproto *p, struct ofconn *ofconn, switch (ntohs(osr->type)) { case OFPST_DESC: - return handle_desc_stats_request(p, ofconn, osr); + return handle_desc_stats_request(ofconn, osr); case OFPST_FLOW: - return handle_flow_stats_request(p, ofconn, osr, arg_size); + return handle_flow_stats_request(ofconn, osr, arg_size); case OFPST_AGGREGATE: - return handle_aggregate_stats_request(p, ofconn, osr, arg_size); + return handle_aggregate_stats_request(ofconn, osr, arg_size); case OFPST_TABLE: - return handle_table_stats_request(p, ofconn, osr); + return handle_table_stats_request(ofconn, osr); case OFPST_PORT: - return handle_port_stats_request(p, ofconn, osr, arg_size); + return handle_port_stats_request(ofconn, osr, arg_size); case OFPST_QUEUE: - return handle_queue_stats_request(p, ofconn, osr, arg_size); + return handle_queue_stats_request(ofconn, osr, arg_size); case OFPST_VENDOR: return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR); @@ -3602,28 +3567,27 @@ update_stats(struct ofproto *ofproto, struct rule *rule, * in which no matching flow already exists in the flow table. * * Adds the flow specified by 'ofm', which is followed by 'n_actions' - * ofp_actions, to 'p''s flow table. Returns 0 on success or an OpenFlow error - * code as encoded by ofp_mkerr() on failure. + * ofp_actions, to ofconn->ofproto's flow table. Returns 0 on success or an + * OpenFlow error code as encoded by ofp_mkerr() on failure. * * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, * if any. */ static int -add_flow(struct ofproto *p, struct ofconn *ofconn, - const struct ofp_flow_mod *ofm, size_t n_actions) +add_flow(struct ofconn *ofconn, const struct ofp_flow_mod *ofm, + size_t n_actions) { + struct ofproto *p = ofconn->ofproto; struct ofpbuf *packet; struct rule *rule; uint16_t in_port; int error; if (ofm->flags & htons(OFPFF_CHECK_OVERLAP)) { - flow_t flow; - uint32_t wildcards; + struct cls_rule cr; - flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie, - &flow, &wildcards); - if (classifier_rule_overlaps(&p->cls, &flow, wildcards, - ntohs(ofm->priority))) { + cls_rule_from_match(&ofm->match, ntohs(ofm->priority), + ofconn->flow_format, ofm->cookie, &cr); + if (classifier_rule_overlaps(&p->cls, &cr)) { return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_OVERLAP); } } @@ -3633,7 +3597,7 @@ add_flow(struct ofproto *p, struct ofconn *ofconn, ntohs(ofm->hard_timeout), ofm->cookie, ofm->flags & htons(OFPFF_SEND_FLOW_REM)); cls_rule_from_match(&ofm->match, ntohs(ofm->priority), - p->tun_id_from_cookie, ofm->cookie, &rule->cr); + ofconn->flow_format, ofm->cookie, &rule->cr); error = 0; if (ofm->buffer_id != htonl(UINT32_MAX)) { @@ -3649,25 +3613,23 @@ add_flow(struct ofproto *p, struct ofconn *ofconn, } static struct rule * -find_flow_strict(struct ofproto *p, const struct ofp_flow_mod *ofm) +find_flow_strict(struct ofconn *ofconn, const struct ofp_flow_mod *ofm) { - uint32_t wildcards; - flow_t flow; + struct ofproto *p = ofconn->ofproto; + struct cls_rule target; - flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie, - &flow, &wildcards); - return rule_from_cls_rule(classifier_find_rule_exactly( - &p->cls, &flow, wildcards, - ntohs(ofm->priority))); + cls_rule_from_match(&ofm->match, ntohs(ofm->priority), + ofconn->flow_format, ofm->cookie, &target); + return rule_from_cls_rule(classifier_find_rule_exactly(&p->cls, &target)); } static int -send_buffered_packet(struct ofproto *ofproto, struct ofconn *ofconn, +send_buffered_packet(struct ofconn *ofconn, struct rule *rule, const struct ofp_flow_mod *ofm) { struct ofpbuf *packet; uint16_t in_port; - flow_t flow; + struct flow flow; int error; if (ofm->buffer_id == htonl(UINT32_MAX)) { @@ -3681,7 +3643,7 @@ send_buffered_packet(struct ofproto *ofproto, struct ofconn *ofconn, } flow_extract(packet, 0, in_port, &flow); - rule_execute(ofproto, rule, packet, &flow); + rule_execute(ofconn->ofproto, rule, packet, &flow); return 0; } @@ -3705,30 +3667,30 @@ static void modify_flows_cb(struct cls_rule *, void *cbdata_); * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, * if any. */ static int -modify_flows_loose(struct ofproto *p, struct ofconn *ofconn, +modify_flows_loose(struct ofconn *ofconn, const struct ofp_flow_mod *ofm, size_t n_actions) { struct modify_flows_cbdata cbdata; struct cls_rule target; - cbdata.ofproto = p; + cbdata.ofproto = ofconn->ofproto; cbdata.ofm = ofm; cbdata.n_actions = n_actions; cbdata.match = NULL; - cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, - &target); + cls_rule_from_match(&ofm->match, 0, ofconn->flow_format, + ofm->cookie, &target); - classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, + classifier_for_each_match(&ofconn->ofproto->cls, &target, CLS_INC_ALL, modify_flows_cb, &cbdata); if (cbdata.match) { /* This credits the packet to whichever flow happened to happened to * match last. That's weird. Maybe we should do a lookup for the * flow that actually matches the packet? Who knows. */ - send_buffered_packet(p, ofconn, cbdata.match, ofm); + send_buffered_packet(ofconn, cbdata.match, ofm); return 0; } else { - return add_flow(p, ofconn, ofm, n_actions); + return add_flow(ofconn, ofm, n_actions); } } @@ -3738,15 +3700,15 @@ modify_flows_loose(struct ofproto *p, struct ofconn *ofconn, * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, * if any. */ static int -modify_flow_strict(struct ofproto *p, struct ofconn *ofconn, - struct ofp_flow_mod *ofm, size_t n_actions) +modify_flow_strict(struct ofconn *ofconn, struct ofp_flow_mod *ofm, + size_t n_actions) { - struct rule *rule = find_flow_strict(p, ofm); + struct rule *rule = find_flow_strict(ofconn, ofm); if (rule && !rule_is_hidden(rule)) { - modify_flow(p, ofm, n_actions, rule); - return send_buffered_packet(p, ofconn, rule, ofm); + modify_flow(ofconn->ofproto, ofm, n_actions, rule); + return send_buffered_packet(ofconn, rule, ofm); } else { - return add_flow(p, ofconn, ofm, n_actions); + return add_flow(ofconn, ofm, n_actions); } } @@ -3802,36 +3764,36 @@ modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm, struct delete_flows_cbdata { struct ofproto *ofproto; - uint16_t out_port; + ovs_be16 out_port; }; static void delete_flows_cb(struct cls_rule *, void *cbdata_); -static void delete_flow(struct ofproto *, struct rule *, uint16_t out_port); +static void delete_flow(struct ofproto *, struct rule *, ovs_be16 out_port); /* Implements OFPFC_DELETE. */ static void -delete_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm) +delete_flows_loose(struct ofconn *ofconn, const struct ofp_flow_mod *ofm) { struct delete_flows_cbdata cbdata; struct cls_rule target; - cbdata.ofproto = p; + cbdata.ofproto = ofconn->ofproto; cbdata.out_port = ofm->out_port; - cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, - &target); + cls_rule_from_match(&ofm->match, 0, ofconn->flow_format, + ofm->cookie, &target); - classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, + classifier_for_each_match(&ofconn->ofproto->cls, &target, CLS_INC_ALL, delete_flows_cb, &cbdata); } /* Implements OFPFC_DELETE_STRICT. */ static void -delete_flow_strict(struct ofproto *p, struct ofp_flow_mod *ofm) +delete_flow_strict(struct ofconn *ofconn, struct ofp_flow_mod *ofm) { - struct rule *rule = find_flow_strict(p, ofm); + struct rule *rule = find_flow_strict(ofconn, ofm); if (rule) { - delete_flow(p, rule, ofm->out_port); + delete_flow(ofconn->ofproto, rule, ofm->out_port); } } @@ -3854,7 +3816,7 @@ delete_flows_cb(struct cls_rule *rule_, void *cbdata_) * 'out_port' is htons(OFPP_NONE) or if 'rule' actually outputs to the * specified 'out_port'. */ static void -delete_flow(struct ofproto *p, struct rule *rule, uint16_t out_port) +delete_flow(struct ofproto *p, struct rule *rule, ovs_be16 out_port) { if (rule_is_hidden(rule)) { return; @@ -3864,19 +3826,18 @@ delete_flow(struct ofproto *p, struct rule *rule, uint16_t out_port) return; } - send_flow_removed(p, rule, time_msec(), OFPRR_DELETE); + send_flow_removed(p, rule, OFPRR_DELETE); rule_remove(p, rule); } static int -handle_flow_mod(struct ofproto *p, struct ofconn *ofconn, - struct ofp_flow_mod *ofm) +handle_flow_mod(struct ofconn *ofconn, struct ofp_flow_mod *ofm) { struct ofp_match orig_match; size_t n_actions; int error; - error = reject_slave_controller(ofconn, &ofm->header); + error = reject_slave_controller(ofconn, "OFPT_FLOW_MOD"); if (error) { return error; } @@ -3918,27 +3879,27 @@ handle_flow_mod(struct ofproto *p, struct ofconn *ofconn, } error = validate_actions((const union ofp_action *) ofm->actions, - n_actions, p->max_ports); + n_actions, ofconn->ofproto->max_ports); if (error) { return error; } switch (ntohs(ofm->command)) { case OFPFC_ADD: - return add_flow(p, ofconn, ofm, n_actions); + return add_flow(ofconn, ofm, n_actions); case OFPFC_MODIFY: - return modify_flows_loose(p, ofconn, ofm, n_actions); + return modify_flows_loose(ofconn, ofm, n_actions); case OFPFC_MODIFY_STRICT: - return modify_flow_strict(p, ofconn, ofm, n_actions); + return modify_flow_strict(ofconn, ofm, n_actions); case OFPFC_DELETE: - delete_flows_loose(p, ofm); + delete_flows_loose(ofconn, ofm); return 0; case OFPFC_DELETE_STRICT: - delete_flow_strict(p, ofm); + delete_flow_strict(ofconn, ofm); return 0; default: @@ -3947,7 +3908,7 @@ handle_flow_mod(struct ofproto *p, struct ofconn *ofconn, } static int -handle_tun_id_from_cookie(struct ofproto *p, struct nxt_tun_id_cookie *msg) +handle_tun_id_from_cookie(struct ofconn *ofconn, struct nxt_tun_id_cookie *msg) { int error; @@ -3956,13 +3917,12 @@ handle_tun_id_from_cookie(struct ofproto *p, struct nxt_tun_id_cookie *msg) return error; } - p->tun_id_from_cookie = !!msg->set; + ofconn->flow_format = msg->set ? NXFF_TUN_ID_FROM_COOKIE : NXFF_OPENFLOW10; return 0; } static int -handle_role_request(struct ofproto *ofproto, - struct ofconn *ofconn, struct nicira_header *msg) +handle_role_request(struct ofconn *ofconn, struct nicira_header *msg) { struct nx_role_request *nrr; struct nx_role_request *reply; @@ -3994,7 +3954,7 @@ handle_role_request(struct ofproto *ofproto, if (role == NX_ROLE_MASTER) { struct ofconn *other; - HMAP_FOR_EACH (other, hmap_node, &ofproto->controllers) { + HMAP_FOR_EACH (other, hmap_node, &ofconn->ofproto->controllers) { if (other->role == NX_ROLE_MASTER) { other->role = NX_ROLE_SLAVE; } @@ -4002,10 +3962,8 @@ handle_role_request(struct ofproto *ofproto, } ofconn->role = role; - reply = make_openflow_xid(sizeof *reply, OFPT_VENDOR, msg->header.xid, - &buf); - reply->nxh.vendor = htonl(NX_VENDOR_ID); - reply->nxh.subtype = htonl(NXT_ROLE_REPLY); + reply = make_nxmsg_xid(sizeof *reply, NXT_ROLE_REPLY, msg->header.xid, + &buf); reply->role = htonl(role); queue_tx(buf, ofconn, ofconn->reply_counter); @@ -4013,8 +3971,9 @@ handle_role_request(struct ofproto *ofproto, } static int -handle_vendor(struct ofproto *p, struct ofconn *ofconn, void *msg) +handle_vendor(struct ofconn *ofconn, void *msg) { + struct ofproto *p = ofconn->ofproto; struct ofp_vendor_header *ovh = msg; struct nicira_header *nh; @@ -4041,10 +4000,10 @@ handle_vendor(struct ofproto *p, struct ofconn *ofconn, void *msg) msg); case NXT_TUN_ID_FROM_COOKIE: - return handle_tun_id_from_cookie(p, msg); + return handle_tun_id_from_cookie(ofconn, msg); case NXT_ROLE_REQUEST: - return handle_role_request(p, ofconn, msg); + return handle_role_request(ofconn, msg); } return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_SUBTYPE); @@ -4064,8 +4023,7 @@ handle_barrier_request(struct ofconn *ofconn, struct ofp_header *oh) } static void -handle_openflow(struct ofconn *ofconn, struct ofproto *p, - struct ofpbuf *ofp_msg) +handle_openflow(struct ofconn *ofconn, struct ofpbuf *ofp_msg) { struct ofp_header *oh = ofp_msg->data; int error; @@ -4081,35 +4039,35 @@ handle_openflow(struct ofconn *ofconn, struct ofproto *p, break; case OFPT_FEATURES_REQUEST: - error = handle_features_request(p, ofconn, oh); + error = handle_features_request(ofconn, oh); break; case OFPT_GET_CONFIG_REQUEST: - error = handle_get_config_request(p, ofconn, oh); + error = handle_get_config_request(ofconn, oh); break; case OFPT_SET_CONFIG: - error = handle_set_config(p, ofconn, ofp_msg->data); + error = handle_set_config(ofconn, ofp_msg->data); break; case OFPT_PACKET_OUT: - error = handle_packet_out(p, ofconn, ofp_msg->data); + error = handle_packet_out(ofconn, ofp_msg->data); break; case OFPT_PORT_MOD: - error = handle_port_mod(p, ofconn, oh); + error = handle_port_mod(ofconn, oh); break; case OFPT_FLOW_MOD: - error = handle_flow_mod(p, ofconn, ofp_msg->data); + error = handle_flow_mod(ofconn, ofp_msg->data); break; case OFPT_STATS_REQUEST: - error = handle_stats_request(p, ofconn, oh); + error = handle_stats_request(ofconn, oh); break; case OFPT_VENDOR: - error = handle_vendor(p, ofconn, ofp_msg->data); + error = handle_vendor(ofconn, ofp_msg->data); break; case OFPT_BARRIER_REQUEST: @@ -4137,7 +4095,7 @@ handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet) struct odp_msg *msg = packet->data; struct rule *rule; struct ofpbuf payload; - flow_t flow; + struct flow flow; payload.data = msg + 1; payload.size = msg->length - sizeof *msg; @@ -4151,7 +4109,7 @@ handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet) memset(&action, 0, sizeof(action)); action.output.type = ODPAT_OUTPUT; action.output.port = ODPP_LOCAL; - dpif_execute(p->dpif, flow.in_port, &action, 1, &payload); + dpif_execute(p->dpif, &action, 1, &payload); } rule = lookup_valid_rule(p, &flow); @@ -4241,16 +4199,20 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet) struct expire_cbdata { struct ofproto *ofproto; + int dp_max_idle; }; +static int ofproto_dp_max_idle(const struct ofproto *); static void ofproto_update_used(struct ofproto *); static void rule_expire(struct cls_rule *, void *cbdata); /* This function is called periodically by ofproto_run(). Its job is to * collect updates for the flows that have been installed into the datapath, * most importantly when they last were used, and then use that information to - * expire flows that have not been used recently. */ -static void + * expire flows that have not been used recently. + * + * Returns the number of milliseconds after which it should be called again. */ +static int ofproto_expire(struct ofproto *ofproto) { struct expire_cbdata cbdata; @@ -4258,9 +4220,14 @@ ofproto_expire(struct ofproto *ofproto) /* Update 'used' for each flow in the datapath. */ ofproto_update_used(ofproto); - /* Expire idle flows. */ + /* Expire idle flows. + * + * A wildcarded flow is idle only when all of its subrules have expired due + * to becoming idle, so iterate through the exact-match flows first. */ cbdata.ofproto = ofproto; - classifier_for_each(&ofproto->cls, CLS_INC_ALL, rule_expire, &cbdata); + cbdata.dp_max_idle = ofproto_dp_max_idle(ofproto); + classifier_for_each(&ofproto->cls, CLS_INC_EXACT, rule_expire, &cbdata); + classifier_for_each(&ofproto->cls, CLS_INC_WILD, rule_expire, &cbdata); /* Let the hook know that we're at a stable point: all outstanding data * in existing flows has been accounted to the account_cb. Thus, the @@ -4269,6 +4236,8 @@ ofproto_expire(struct ofproto *ofproto) if (ofproto->ofhooks->account_checkpoint_cb) { ofproto->ofhooks->account_checkpoint_cb(ofproto->aux); } + + return MIN(cbdata.dp_max_idle, 1000); } /* Update 'used' member of each flow currently installed into the datapath. */ @@ -4287,10 +4256,15 @@ ofproto_update_used(struct ofproto *p) for (i = 0; i < n_flows; i++) { struct odp_flow *f = &flows[i]; + struct cls_rule target; struct rule *rule; + struct flow flow; + + odp_flow_key_to_flow(&f->key, &flow); + cls_rule_from_flow(&flow, 0, UINT16_MAX, &target); - rule = rule_from_cls_rule( - classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX)); + rule = rule_from_cls_rule(classifier_find_rule_exactly(&p->cls, + &target)); if (rule && rule->installed) { update_time(p, rule, &f->stats); @@ -4306,6 +4280,96 @@ ofproto_update_used(struct ofproto *p) free(flows); } +/* Calculates and returns the number of milliseconds of idle time after which + * flows should expire from the datapath and we should fold their statistics + * into their parent rules in userspace. */ +static int +ofproto_dp_max_idle(const struct ofproto *ofproto) +{ + /* + * Idle time histogram. + * + * Most of the time a switch has a relatively small number of flows. When + * this is the case we might as well keep statistics for all of them in + * userspace and to cache them in the kernel datapath for performance as + * well. + * + * As the number of flows increases, the memory required to maintain + * statistics about them in userspace and in the kernel becomes + * significant. However, with a large number of flows it is likely that + * only a few of them are "heavy hitters" that consume a large amount of + * bandwidth. At this point, only heavy hitters are worth caching in the + * kernel and maintaining in userspaces; other flows we can discard. + * + * The technique used to compute the idle time is to build a histogram with + * N_BUCKETS bucket whose width is BUCKET_WIDTH msecs each. Each flow that + * is installed in the kernel gets dropped in the appropriate bucket. + * After the histogram has been built, we compute the cutoff so that only + * the most-recently-used 1% of flows (but at least 1000 flows) are kept + * cached. At least the most-recently-used bucket of flows is kept, so + * actually an arbitrary number of flows can be kept in any given + * expiration run (though the next run will delete most of those unless + * they receive additional data). + * + * This requires a second pass through the exact-match flows, in addition + * to the pass made by ofproto_update_used(), because the former function + * never looks at uninstallable flows. + */ + enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) }; + enum { N_BUCKETS = 5000 / BUCKET_WIDTH }; + int buckets[N_BUCKETS] = { 0 }; + int total, bucket; + struct rule *rule; + long long int now; + int i; + + total = classifier_count_exact(&ofproto->cls); + if (total <= 1000) { + return N_BUCKETS * BUCKET_WIDTH; + } + + /* Build histogram. */ + now = time_msec(); + CLASSIFIER_FOR_EACH_EXACT_RULE (rule, cr, &ofproto->cls) { + long long int idle = now - rule->used; + int bucket = (idle <= 0 ? 0 + : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1 + : (unsigned int) idle / BUCKET_WIDTH); + buckets[bucket]++; + } + + /* Find the first bucket whose flows should be expired. */ + for (bucket = 0; bucket < N_BUCKETS; bucket++) { + if (buckets[bucket]) { + int subtotal = 0; + do { + subtotal += buckets[bucket++]; + } while (bucket < N_BUCKETS && subtotal < MAX(1000, total / 100)); + break; + } + } + + if (VLOG_IS_DBG_ENABLED()) { + struct ds s; + + ds_init(&s); + ds_put_cstr(&s, "keep"); + for (i = 0; i < N_BUCKETS; i++) { + if (i == bucket) { + ds_put_cstr(&s, ", drop"); + } + if (buckets[i]) { + ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]); + } + } + VLOG_INFO("%s: %s (msec:count)", + dpif_name(ofproto->dpif), ds_cstr(&s)); + ds_destroy(&s); + } + + return bucket * BUCKET_WIDTH; +} + static void rule_active_timeout(struct ofproto *ofproto, struct rule *rule) { @@ -4321,7 +4385,7 @@ rule_active_timeout(struct ofproto *ofproto, struct rule *rule) * ofproto_update_used() zeroed TCP flags. */ memset(&odp_flow, 0, sizeof odp_flow); if (rule->installed) { - odp_flow.key = rule->cr.flow; + odp_flow_key_from_flow(&odp_flow.key, &rule->cr.flow); odp_flow.flags = ODPFF_ZERO_TCP_FLAGS; dpif_flow_get(ofproto->dpif, &odp_flow); @@ -4371,7 +4435,7 @@ rule_expire(struct cls_rule *cls_rule, void *cbdata_) if (now < expire) { /* 'rule' has not expired according to OpenFlow rules. */ if (!rule->cr.wc.wildcards) { - if (now >= rule->used + 5000) { + if (now >= rule->used + cbdata->dp_max_idle) { /* This rule is idle, so drop it to free up resources. */ if (rule->super) { /* It's not part of the OpenFlow flow table, so we can @@ -4407,7 +4471,7 @@ rule_expire(struct cls_rule *cls_rule, void *cbdata_) /* Get rid of the rule. */ if (!rule_is_hidden(rule)) { - send_flow_removed(cbdata->ofproto, rule, now, + send_flow_removed(cbdata->ofproto, rule, (now >= hard_expire ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT)); } @@ -4431,12 +4495,13 @@ revalidate_cb(struct cls_rule *sub_, void *cbdata_) static bool revalidate_rule(struct ofproto *p, struct rule *rule) { - const flow_t *flow = &rule->cr.flow; + const struct flow *flow = &rule->cr.flow; COVERAGE_INC(ofproto_revalidate_rule); if (rule->super) { struct rule *super; - super = rule_from_cls_rule(classifier_lookup_wild(&p->cls, flow)); + super = rule_from_cls_rule(classifier_lookup(&p->cls, flow, + CLS_INC_WILD)); if (!super) { rule_remove(p, rule); return false; @@ -4457,23 +4522,19 @@ revalidate_rule(struct ofproto *p, struct rule *rule) } static struct ofpbuf * -compose_flow_removed(struct ofproto *p, const struct rule *rule, - long long int now, uint8_t reason) +compose_flow_removed(struct ofconn *ofconn, const struct rule *rule, + uint8_t reason) { struct ofp_flow_removed *ofr; struct ofpbuf *buf; - long long int tdiff = now - rule->created; - uint32_t sec = tdiff / 1000; - uint32_t msec = tdiff - (sec * 1000); ofr = make_openflow(sizeof *ofr, OFPT_FLOW_REMOVED, &buf); - flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, p->tun_id_from_cookie, + flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, ofconn->flow_format, &ofr->match); ofr->cookie = rule->flow_cookie; ofr->priority = htons(rule->cr.priority); ofr->reason = reason; - ofr->duration_sec = htonl(sec); - ofr->duration_nsec = htonl(msec * 1000000); + calc_flow_duration(rule->created, &ofr->duration_sec, &ofr->duration_nsec); ofr->idle_timeout = htons(rule->idle_timeout); ofr->packet_count = htonll(rule->packet_count); ofr->byte_count = htonll(rule->byte_count); @@ -4482,37 +4543,30 @@ compose_flow_removed(struct ofproto *p, const struct rule *rule, } static void -send_flow_removed(struct ofproto *p, struct rule *rule, - long long int now, uint8_t reason) +send_flow_removed(struct ofproto *p, struct rule *rule, uint8_t reason) { struct ofconn *ofconn; - struct ofconn *prev; - struct ofpbuf *buf = NULL; if (!rule->send_flow_removed) { return; } - /* We limit the maximum number of queued flow expirations it by accounting - * them under the counter for replies. That works because preventing - * OpenFlow requests from being processed also prevents new flows from - * being added (and expiring). (It also prevents processing OpenFlow - * requests that would not add new flows, so it is imperfect.) */ - - prev = NULL; LIST_FOR_EACH (ofconn, node, &p->all_conns) { - if (rconn_is_connected(ofconn->rconn) - && ofconn_receives_async_msgs(ofconn)) { - if (prev) { - queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter); - } else { - buf = compose_flow_removed(p, rule, now, reason); - } - prev = ofconn; + struct ofpbuf *msg; + + if (!rconn_is_connected(ofconn->rconn) + || !ofconn_receives_async_msgs(ofconn)) { + continue; } - } - if (prev) { - queue_tx(buf, prev, prev->reply_counter); + + msg = compose_flow_removed(ofconn, rule, reason); + + /* Account flow expirations under ofconn->reply_counter, the counter + * for replies to OpenFlow requests. That works because preventing + * OpenFlow requests from being processed also prevents new flows from + * being added (and expiring). (It also prevents processing OpenFlow + * requests that would not add new flows, so it is imperfect.) */ + queue_tx(msg, ofconn, ofconn->reply_counter); } } @@ -4691,7 +4745,7 @@ pick_fallback_dpid(void) } static bool -default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, +default_normal_ofhook_cb(const struct flow *flow, const struct ofpbuf *packet, struct odp_actions *actions, tag_type *tags, uint16_t *nf_output_iface, void *ofproto_) { @@ -4722,7 +4776,8 @@ default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags, NULL); if (out_port < 0) { - add_output_group_action(actions, DP_GROUP_FLOOD, nf_output_iface); + flood_packets(ofproto, flow->in_port, OFPPC_NO_FLOOD, + nf_output_iface, actions); } else if (out_port != flow->in_port) { odp_actions_add(actions, ODPAT_OUTPUT)->output.port = out_port; *nf_output_iface = out_port; @@ -4734,7 +4789,6 @@ default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, } static const struct ofhooks default_ofhooks = { - NULL, default_normal_ofhook_cb, NULL, NULL