X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=ofproto%2Fofproto.c;h=dcf8683ff37ea007a41d75e1fc375c2b1e0e71e6;hb=e2bfacb6e490bb571f2fbd243c7a2d4658149fba;hp=636e5ec80d3148450337709373cb34249cc7b56b;hpb=9deba63bdebc2e0eceab8da186b79703fe694186;p=openvswitch diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 636e5ec8..dcf8683f 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2009, 2010 Nicira Networks. + * Copyright (c) 2010 Jean Tourrilhes - HP-Labs. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -204,6 +205,7 @@ struct ofconn { struct hmap_node hmap_node; /* In struct ofproto's "controllers" map. */ struct discovery *discovery; /* Controller discovery object, if enabled. */ struct status_category *ss; /* Switch status category. */ + enum ofproto_band band; /* In-band or out-of-band? */ }; /* We use OFPR_NO_MATCH and OFPR_ACTION as indexes into struct ofconn's @@ -221,6 +223,8 @@ static struct ofconn *ofconn_create(struct ofproto *, struct rconn *, static void ofconn_destroy(struct ofconn *); static void ofconn_run(struct ofconn *, struct ofproto *); static void ofconn_wait(struct ofconn *); +static bool ofconn_receives_async_msgs(const struct ofconn *); + static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn, struct rconn_packet_counter *counter); @@ -247,11 +251,16 @@ struct ofproto { /* Configuration. */ struct switch_status *switch_status; - struct in_band *in_band; struct fail_open *fail_open; struct netflow *netflow; struct ofproto_sflow *sflow; + /* In-band control. */ + struct in_band *in_band; + long long int next_in_band_update; + struct sockaddr_in *extra_in_band_remotes; + size_t n_extra_remotes; + /* Flow table. */ struct classifier cls; bool need_revalidate; @@ -466,6 +475,9 @@ update_controller(struct ofconn *ofconn, const struct ofproto_controller *c) int probe_interval; int i; + ofconn->band = (is_in_band_controller(c) + ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND); + rconn_set_max_backoff(ofconn->rconn, c->max_backoff); probe_interval = c->probe_interval ? MAX(c->probe_interval, 5) : 0; @@ -514,17 +526,68 @@ find_controller_by_target(struct ofproto *ofproto, const char *target) return NULL; } +static void +update_in_band_remotes(struct ofproto *ofproto) +{ + const struct ofconn *ofconn; + struct sockaddr_in *addrs; + size_t max_addrs, n_addrs; + bool discovery; + size_t i; + + /* Allocate enough memory for as many remotes as we could possibly have. */ + max_addrs = ofproto->n_extra_remotes + hmap_count(&ofproto->controllers); + addrs = xmalloc(max_addrs * sizeof *addrs); + n_addrs = 0; + + /* Add all the remotes. */ + discovery = false; + HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) { + struct sockaddr_in *sin = &addrs[n_addrs]; + + sin->sin_addr.s_addr = rconn_get_remote_ip(ofconn->rconn); + if (sin->sin_addr.s_addr) { + sin->sin_port = rconn_get_remote_port(ofconn->rconn); + n_addrs++; + } + if (ofconn->discovery) { + discovery = true; + } + } + for (i = 0; i < ofproto->n_extra_remotes; i++) { + addrs[n_addrs++] = ofproto->extra_in_band_remotes[i]; + } + + /* Create or update or destroy in-band. + * + * Ordinarily we only enable in-band if there's at least one remote + * address, but discovery needs the in-band rules for DHCP to be installed + * even before we know any remote addresses. */ + if (n_addrs || discovery) { + if (!ofproto->in_band) { + in_band_create(ofproto, ofproto->dpif, ofproto->switch_status, + &ofproto->in_band); + } + in_band_set_remotes(ofproto->in_band, addrs, n_addrs); + ofproto->next_in_band_update = time_msec() + 1000; + } else { + in_band_destroy(ofproto->in_band); + ofproto->in_band = NULL; + } + + /* Clean up. */ + free(addrs); +} + void ofproto_set_controllers(struct ofproto *p, const struct ofproto_controller *controllers, size_t n_controllers) { struct shash new_controllers; - struct rconn **in_band_rconns; enum ofproto_fail_mode fail_mode; struct ofconn *ofconn, *next; bool ss_exists; - size_t n_in_band; size_t i; shash_init(&new_controllers); @@ -537,8 +600,6 @@ ofproto_set_controllers(struct ofproto *p, } } - in_band_rconns = xmalloc(n_controllers * sizeof *in_band_rconns); - n_in_band = 0; fail_mode = OFPROTO_FAIL_STANDALONE; ss_exists = false; HMAP_FOR_EACH_SAFE (ofconn, next, struct ofconn, hmap_node, @@ -550,14 +611,9 @@ ofproto_set_controllers(struct ofproto *p, ofconn_destroy(ofconn); } else { update_controller(ofconn, c); - if (ofconn->ss) { ss_exists = true; } - if (is_in_band_controller(c)) { - in_band_rconns[n_in_band++] = ofconn->rconn; - } - if (c->fail == OFPROTO_FAIL_SECURE) { fail_mode = OFPROTO_FAIL_SECURE; } @@ -565,18 +621,7 @@ ofproto_set_controllers(struct ofproto *p, } shash_destroy(&new_controllers); - if (n_in_band) { - if (!p->in_band) { - in_band_create(p, p->dpif, p->switch_status, &p->in_band); - } - if (p->in_band) { - in_band_set_remotes(p->in_band, in_band_rconns, n_in_band); - } - } else { - in_band_destroy(p->in_band); - p->in_band = NULL; - } - free(in_band_rconns); + update_in_band_remotes(p); if (!hmap_is_empty(&p->controllers) && fail_mode == OFPROTO_FAIL_STANDALONE) { @@ -608,6 +653,47 @@ ofproto_set_controllers(struct ofproto *p, } } +static bool +any_extras_changed(const struct ofproto *ofproto, + const struct sockaddr_in *extras, size_t n) +{ + size_t i; + + if (n != ofproto->n_extra_remotes) { + return true; + } + + for (i = 0; i < n; i++) { + const struct sockaddr_in *old = &ofproto->extra_in_band_remotes[i]; + const struct sockaddr_in *new = &extras[i]; + + if (old->sin_addr.s_addr != new->sin_addr.s_addr || + old->sin_port != new->sin_port) { + return true; + } + } + + return false; +} + +/* Sets the 'n' TCP port addresses in 'extras' as ones to which 'ofproto''s + * in-band control should guarantee access, in the same way that in-band + * control guarantees access to OpenFlow controllers. */ +void +ofproto_set_extra_in_band_remotes(struct ofproto *ofproto, + const struct sockaddr_in *extras, size_t n) +{ + if (!any_extras_changed(ofproto, extras, n)) { + return; + } + + free(ofproto->extra_in_band_remotes); + ofproto->n_extra_remotes = n; + ofproto->extra_in_band_remotes = xmemdup(extras, n * sizeof *extras); + + update_in_band_remotes(ofproto); +} + void ofproto_set_desc(struct ofproto *p, const char *mfr_desc, const char *hw_desc, @@ -811,6 +897,7 @@ ofproto_destroy(struct ofproto *p) in_band_destroy(p->in_band); p->in_band = NULL; + free(p->extra_in_band_remotes); ofproto_flush_flows(p); classifier_destroy(&p->cls); @@ -876,25 +963,47 @@ process_port_change(struct ofproto *ofproto, int error, char *devname) } } +/* Returns a "preference level" for snooping 'ofconn'. A higher return value + * means that 'ofconn' is more interesting for monitoring than a lower return + * value. */ +static int +snoop_preference(const struct ofconn *ofconn) +{ + switch (ofconn->role) { + case NX_ROLE_MASTER: + return 3; + case NX_ROLE_OTHER: + return 2; + case NX_ROLE_SLAVE: + return 1; + default: + /* Shouldn't happen. */ + return 0; + } +} + /* One of ofproto's "snoop" pvconns has accepted a new connection on 'vconn'. * Connects this vconn to a controller. */ static void add_snooper(struct ofproto *ofproto, struct vconn *vconn) { - struct ofconn *ofconn; + struct ofconn *ofconn, *best; - /* Arbitrarily pick the first controller in the list for monitoring. We - * could do something smarter or more flexible later, if it ever proves - * useful. */ + /* Pick a controller for monitoring. */ + best = NULL; LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) { - if (ofconn->type == OFCONN_CONTROLLER) { - rconn_add_monitor(ofconn->rconn, vconn); - return; + if (ofconn->type == OFCONN_CONTROLLER + && (!best || snoop_preference(ofconn) > snoop_preference(best))) { + best = ofconn; } + } + if (best) { + rconn_add_monitor(best->rconn, vconn); + } else { + VLOG_INFO_RL(&rl, "no controller connection to snoop"); + vconn_close(vconn); } - VLOG_INFO_RL(&rl, "no controller connection to monitor"); - vconn_close(vconn); } int @@ -939,6 +1048,9 @@ ofproto_run1(struct ofproto *p) } if (p->in_band) { + if (time_msec() >= p->next_in_band_update) { + update_in_band_remotes(p); + } in_band_run(p->in_band); } @@ -1043,6 +1155,7 @@ ofproto_wait(struct ofproto *p) ofconn_wait(ofconn); } if (p->in_band) { + poll_timer_wait(p->next_in_band_update - time_msec()); in_band_wait(p->in_band); } if (p->fail_open) { @@ -1313,7 +1426,7 @@ send_port_status(struct ofproto *p, const struct ofport *ofport, struct ofp_port_status *ops; struct ofpbuf *b; - if (ofconn->role == NX_ROLE_SLAVE) { + if (!ofconn_receives_async_msgs(ofconn)) { continue; } @@ -1562,6 +1675,22 @@ ofconn_wait(struct ofconn *ofconn) COVERAGE_INC(ofproto_ofconn_stuck); } } + +/* Returns true if 'ofconn' should receive asynchronous messages. */ +static bool +ofconn_receives_async_msgs(const struct ofconn *ofconn) +{ + if (ofconn->type == OFCONN_CONTROLLER) { + /* Ordinary controllers always get asynchronous messages unless they + * have configured themselves as "slaves". */ + return ofconn->role != NX_ROLE_SLAVE; + } else { + /* Transient connections don't get asynchronous messages unless they + * have explicitly asked for them by setting a nonzero miss send + * length. */ + return ofconn->miss_send_len > 0; + } +} /* Caller is responsible for initializing the 'cr' member of the returned * rule. */ @@ -2116,7 +2245,7 @@ add_controller_action(struct odp_actions *actions, const struct ofp_action_output *oao) { union odp_action *a = odp_actions_add(actions, ODPAT_CONTROLLER); - a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX; + a->controller.arg = ntohs(oao->max_len); } struct action_xlate_ctx { @@ -2672,7 +2801,7 @@ handle_table_stats_request(struct ofproto *p, struct ofconn *ofconn, static void append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn, - struct ofpbuf *msg) + struct ofpbuf **msgp) { struct netdev_stats stats; struct ofp_port_stats *ops; @@ -2682,7 +2811,7 @@ append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn, * netdev_get_stats() will log errors. */ netdev_get_stats(port->netdev, &stats); - ops = append_stats_reply(sizeof *ops, ofconn, &msg); + ops = append_stats_reply(sizeof *ops, ofconn, msgp); ops->port_no = htons(odp_port_to_ofp_port(port_no)); memset(ops->pad, 0, sizeof ops->pad); ops->rx_packets = htonll(stats.rx_packets); @@ -2720,11 +2849,11 @@ handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn, port = port_array_get(&p->ports, ofp_port_to_odp_port(ntohs(psr->port_no))); if (port) { - append_port_stat(port, ntohs(psr->port_no), ofconn, msg); + append_port_stat(port, ntohs(psr->port_no), ofconn, &msg); } } else { PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { - append_port_stat(port, port_no, ofconn, msg); + append_port_stat(port, port_no, ofconn, &msg); } } @@ -3775,7 +3904,7 @@ send_flow_removed(struct ofproto *p, struct rule *rule, prev = NULL; LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn) - && ofconn->role != NX_ROLE_SLAVE) { + && ofconn_receives_async_msgs(ofconn)) { if (prev) { queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter); } else { @@ -3905,65 +4034,147 @@ update_used(struct ofproto *p) free(flows); } +/* pinsched callback for sending 'packet' on 'ofconn'. */ static void do_send_packet_in(struct ofpbuf *packet, void *ofconn_) { struct ofconn *ofconn = ofconn_; + + rconn_send_with_limit(ofconn->rconn, packet, + ofconn->packet_in_counter, 100); +} + +/* Takes 'packet', which has been converted with do_convert_to_packet_in(), and + * finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s + * packet scheduler for sending. + * + * 'max_len' specifies the maximum number of bytes of the packet to send on + * 'ofconn' (INT_MAX specifies no limit). + * + * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise, + * ownership is transferred to this function. */ +static void +schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, int max_len, + bool clone) +{ struct ofproto *ofproto = ofconn->ofproto; - struct odp_msg *msg = packet->data; - struct ofpbuf payload; - struct ofpbuf *opi; + struct ofp_packet_in *opi = packet->data; + uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port)); + int send_len, trim_size; uint32_t buffer_id; - int send_len; - /* Extract packet payload from 'msg'. */ - payload.data = msg + 1; - payload.size = msg->length - sizeof *msg; + /* Get buffer. */ + if (opi->reason == OFPR_ACTION) { + buffer_id = UINT32_MAX; + } else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) { + buffer_id = pktbuf_get_null(); + } else if (!ofconn->pktbuf) { + buffer_id = UINT32_MAX; + } else { + struct ofpbuf payload; + payload.data = opi->data; + payload.size = packet->size - offsetof(struct ofp_packet_in, data); + buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port); + } + + /* Figure out how much of the packet to send. */ + send_len = ntohs(opi->total_len); + if (buffer_id != UINT32_MAX) { + send_len = MIN(send_len, ofconn->miss_send_len); + } + send_len = MIN(send_len, max_len); + + /* Adjust packet length and clone if necessary. */ + trim_size = offsetof(struct ofp_packet_in, data) + send_len; + if (clone) { + packet = ofpbuf_clone_data(packet->data, trim_size); + opi = packet->data; + } else { + packet->size = trim_size; + } - /* Construct packet-in message. */ - send_len = INT_MAX; + /* Update packet headers. */ + opi->buffer_id = htonl(buffer_id); + update_openflow_length(packet); + + /* Hand over to packet scheduler. It might immediately call into + * do_send_packet_in() or it might buffer it for a while (until a later + * call to pinsched_run()). */ + pinsched_send(ofconn->schedulers[opi->reason], in_port, + packet, do_send_packet_in, ofconn); +} + +/* Replace struct odp_msg header in 'packet' by equivalent struct + * ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as + * returned by dpif_recv()). + * + * The conversion is not complete: the caller still needs to trim any unneeded + * payload off the end of the buffer, set the length in the OpenFlow header, + * and set buffer_id. Those require us to know the controller settings and so + * must be done on a per-controller basis. + * + * Returns the maximum number of bytes of the packet that should be sent to + * the controller (INT_MAX if no limit). */ +static int +do_convert_to_packet_in(struct ofpbuf *packet) +{ + struct odp_msg *msg = packet->data; + struct ofp_packet_in *opi; + uint8_t reason; + uint16_t total_len; + uint16_t in_port; + int max_len; + + /* Extract relevant header fields */ if (msg->type == _ODPL_ACTION_NR) { - buffer_id = UINT32_MAX; + reason = OFPR_ACTION; + max_len = msg->arg; } else { - if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) { - buffer_id = pktbuf_get_null(); - } else { - buffer_id = pktbuf_save(ofconn->pktbuf, &payload, msg->port); - } - if (buffer_id != UINT32_MAX) { - send_len = ofconn->miss_send_len; - } + reason = OFPR_NO_MATCH; + max_len = INT_MAX; } - opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port), - msg->type, &payload, send_len); + total_len = msg->length - sizeof *msg; + in_port = odp_port_to_ofp_port(msg->port); - /* Send. */ - rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100); + /* Repurpose packet buffer by overwriting header. */ + ofpbuf_pull(packet, sizeof(struct odp_msg)); + opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data)); + opi->header.version = OFP_VERSION; + opi->header.type = OFPT_PACKET_IN; + opi->total_len = htons(total_len); + opi->in_port = htons(in_port); + opi->reason = reason; - ofpbuf_delete(packet); + return max_len; } +/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or + * _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller + * as necessary according to their individual configurations. + * + * 'packet' must have sufficient headroom to convert it into a struct + * ofp_packet_in (e.g. as returned by dpif_recv()). + * + * Takes ownership of 'packet'. */ static void send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet) { - struct odp_msg *msg = packet->data; struct ofconn *ofconn, *prev; + int max_len; - assert(msg->type == _ODPL_MISS_NR || msg->type == _ODPL_ACTION_NR); + max_len = do_convert_to_packet_in(packet); prev = NULL; LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) { - if (ofconn->role != NX_ROLE_SLAVE) { + if (ofconn_receives_async_msgs(ofconn)) { if (prev) { - pinsched_send(prev->schedulers[msg->type], msg->port, - ofpbuf_clone(packet), do_send_packet_in, prev); + schedule_packet_in(prev, packet, max_len, true); } prev = ofconn; } } if (prev) { - pinsched_send(prev->schedulers[msg->type], msg->port, - packet, do_send_packet_in, prev); + schedule_packet_in(prev, packet, max_len, false); } else { ofpbuf_delete(packet); }