From d356d7cdd0349c6d047bca84db634f16db2498f1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 5 Jan 2009 16:23:32 -0800 Subject: [PATCH] vswitchd: Implement spanning tree protocol. --- vswitchd/bridge.c | 390 +++++++++++++++++++++++++++++++++------ vswitchd/vswitchd.conf.5 | 69 +++++++ 2 files changed, 404 insertions(+), 55 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index b92a5fce..b8533b92 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -63,6 +63,7 @@ #include "rconn.h" #include "socket-util.h" #include "stats.h" +#include "stp.h" #include "svec.h" #include "timeval.h" #include "util.h" @@ -79,6 +80,8 @@ struct iface { char *name; /* Host network device name. */ int dp_ifidx; /* Index within kernel datapath. */ + uint8_t mac[ETH_ADDR_LEN]; /* Ethernet address (all zeros if unknowns). */ + tag_type tag; /* Tag associated with this interface. */ bool enabled; /* May be chosen for flows? */ long long delay_expires; /* Time after which 'enabled' may change. */ @@ -135,6 +138,10 @@ struct port { mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */ bool is_mirror_output_port; /* Does port mirroring send frames here? */ + + /* Spanning tree info. */ + enum stp_state stp_state; /* Always STP_FORWARDING if STP not in use. */ + tag_type stp_state_tag; /* Tag for STP state change. */ }; #define DP_MAX_PORTS 255 @@ -184,6 +191,10 @@ struct bridge { /* Port mirroring. */ struct mirror *mirrors[MAX_MIRRORS]; + + /* Spanning tree. */ + struct stp *stp; + long long int stp_last_tick; }; /* List of all bridges. */ @@ -228,6 +239,13 @@ static void mirror_reconfigure(struct bridge *); static void mirror_reconfigure_one(struct mirror *); static bool vlan_is_mirrored(const struct mirror *, int vlan); +static void brstp_reconfigure(struct bridge *); +static void brstp_adjust_timers(struct bridge *); +static void brstp_run(struct bridge *); +static void brstp_wait(struct bridge *); +static void brstp_receive(struct bridge *, const struct flow *, + const struct ofpbuf *); + static void iface_create(struct port *, const char *name); static void iface_destroy(struct iface *); static struct iface *iface_lookup(const struct bridge *, const char *name); @@ -368,6 +386,10 @@ bridge_reconfigure(void) } } } + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + brstp_reconfigure(br); + } } void @@ -403,6 +425,7 @@ bridge_wait(void) stats_mgr_wait(br->stats_mgr); flowstats_wait(br); bond_wait(br); + brstp_wait(br); if (!tag_set_is_empty(&br->revalidate_set)) { poll_immediate_wake(); } @@ -783,6 +806,7 @@ bridge_run_one(struct bridge *br) } flowstats_run(br); bond_run(br); + brstp_run(br); /* Start or restart secchan if necessary. */ run_secchan(br); @@ -1045,59 +1069,38 @@ static void flow_from_match(struct flow *, const struct ofp_match *); static void bridge_process_msg(struct bridge *br, struct ofpbuf *msg) { - struct processor { - uint8_t type; - packet_handler_func *handler; - }; - static const struct processor processors[] = { - { - OFPT_ECHO_REQUEST, - process_echo_request - }, - { - OFPT_PACKET_IN, - process_packet_in - }, - { - OFPT_FLOW_EXPIRED, - process_flow_expired - }, - { - OFPT_STATS_REPLY, - process_stats_reply - }, - { - OFPT_ERROR, - process_error_msg - }, - { - OFPT_FEATURES_REPLY, - process_features_reply, - }, - { - OFPT_PORT_STATUS, - process_port_status - }, - }; - const size_t n_processors = ARRAY_SIZE(processors); - const struct processor *p; - struct ofp_header *oh; - - oh = msg->data; - for (p = processors; p < &processors[n_processors]; p++) { - if (oh->type == p->type) { - if (p->handler) { - (p->handler)(br, msg->data); - } - return; + struct ofp_header *oh = msg->data; + switch (oh->type) { + case OFPT_ECHO_REQUEST: + process_echo_request(br, msg->data); + break; + case OFPT_PACKET_IN: + process_packet_in(br, msg->data); + break; + case OFPT_FLOW_EXPIRED: + process_flow_expired(br, msg->data); + break; + case OFPT_STATS_REPLY: + process_stats_reply(br, msg->data); + break; + case OFPT_ERROR: + process_error_msg(br, msg->data); + break; + case OFPT_FEATURES_REPLY: + process_features_reply(br, msg->data); + break; + case OFPT_PORT_STATUS: + process_port_status(br, msg->data); + break; + default: + if (VLOG_IS_DBG_ENABLED()) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + char *p = ofp_to_string(msg->data, msg->size, 2); + VLOG_DBG_RL(&rl, "bridge %s: OpenFlow packet ignored: %s", + br->name, p); + free(p); } - } - if (VLOG_IS_DBG_ENABLED()) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); - char *p = ofp_to_string(msg->data, msg->size, 2); - VLOG_DBG_RL(&rl, "bridge %s: OpenFlow packet ignored: %s", - br->name, p); - free(p); + break; } } @@ -1291,6 +1294,18 @@ set_dst(struct ft_dst *p, const struct flow *flow, const struct port *in_port, const struct port *out_port, tag_type *tags) { + /* STP handling. + * + * XXX This uses too many tags: any broadcast flow will get one tag per + * destination port, and thus a broadcast on a switch of any size is likely + * to have all tag bits set. We should figure out a way to be smarter. + * + * This is OK when STP is disabled, because stp_state_tag is 0 then. */ + *tags |= out_port->stp_state_tag; + if (!(out_port->stp_state & (STP_DISABLED | STP_FORWARDING))) { + return false; + } + p->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE : in_port->vlan >= 0 ? in_port->vlan : ntohs(flow->dl_vlan)); @@ -1413,6 +1428,7 @@ compose_dsts(const struct bridge *br, const struct flow *flow, uint16_t vlan, struct ft_dst *dst = dsts; size_t i; + *tags |= in_port->stp_state_tag; if (out_port == FLOOD_PORT) { /* XXX use OFPP_FLOOD if no vlans or bonding. */ for (i = 0; i < br->n_ports; i++) { @@ -1642,6 +1658,21 @@ process_flow(struct bridge *br, const struct flow *flow, } } + /* Drop frames for ports that STP wants entirely killed (both for + * forwarding and for learning). Later, after we do learning, we'll drop + * the frames that STP doesn't want to do learning on. */ + if (in_port->stp_state & (STP_LISTENING | STP_BLOCKING)) { + printf("drop on %s: %s (%x)\n", + in_port->name, stp_state_name(in_port->stp_state), + in_port->stp_state_tag); + goto done; + } + + /* Drop frames for reserved multicast addresses. */ + if (eth_addr_is_reserved(flow->dl_dst)) { + goto done; + } + /* Drop frames on ports reserved for mirroring. */ if (in_port->is_mirror_output_port) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -1712,8 +1743,9 @@ process_flow(struct bridge *br, const struct flow *flow, } } - /* Don't send packets out their input ports. */ - if (in_port == out_port) { + /* Don't send packets out their input ports. Don't forward frames that STP + * wants us to discard. */ + if (in_port == out_port || in_port->stp_state == STP_LEARNING) { out_port = NULL; } @@ -1774,6 +1806,12 @@ process_packet_in(struct bridge *br, void *opi_) } } + if (flow.dl_type == htons(OFP_DL_TYPE_NOT_ETH_TYPE) + && eth_addr_equals(flow.dl_dst, stp_eth_addr)) { + brstp_receive(br, &flow, &buf); + return; + } + process_flow(br, &flow, &pkt); } @@ -1846,6 +1884,9 @@ phy_port_changed(struct bridge *br, enum ofp_port_reason reason, (reason != OFPPR_DELETE && !(opp->state & htonl(OFPPS_LINK_DOWN)))); } + if (reason != OFPPR_DELETE) { + memcpy(iface->mac, opp->hw_addr, ETH_ADDR_LEN); + } } static void @@ -2244,6 +2285,8 @@ port_create(struct bridge *br, const char *name) port->trunks = NULL; port->name = xstrdup(name); port->active_iface = -1; + port->stp_state = STP_DISABLED; + port->stp_state_tag = 0; if (br->n_ports >= br->allocated_ports) { br->ports = x2nrealloc(br->ports, &br->allocated_ports, @@ -2808,4 +2851,241 @@ mirror_reconfigure_one(struct mirror *m) svec_destroy(&dst_ports); free(pfx); } + +/* Spanning tree protocol. */ + +static void brstp_update_port_state(struct port *); + +static void +brstp_send_bpdu(struct ofpbuf *pkt, int port_no, void *br_) +{ + struct bridge *br = br_; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + struct iface *iface = iface_from_dp_ifidx(br, port_no); + if (!iface) { + VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", + br->name, port_no); + } else if (eth_addr_is_zero(iface->mac)) { + VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d with unknown MAC", + br->name, port_no); + } else { + struct eth_header *eth = pkt->l2; + memcpy(eth->eth_src, iface->mac, ETH_ADDR_LEN); + queue_tx(br, make_unbuffered_packet_out(pkt, OFPP_NONE, port_no)); + } + ofpbuf_delete(pkt); +} + +static void +brstp_reconfigure(struct bridge *br) +{ + size_t i; + + if (!cfg_get_bool(0, "stp.%s.enabled", br->name)) { + if (br->stp) { + stp_destroy(br->stp); + br->stp = NULL; + + bridge_flush(br); + } + } else { + uint64_t bridge_address, bridge_id; + int bridge_priority; + + bridge_address = cfg_get_mac(0, "stp.%s.address", br->name); + if (!bridge_address) { + if (br->stp) { + bridge_address = (stp_get_bridge_id(br->stp) + & ((UINT64_C(1) << 48) - 1)); + } else { + uint8_t mac[ETH_ADDR_LEN]; + eth_addr_random(mac); + bridge_address = eth_addr_to_uint64(mac); + } + } + + if (cfg_is_valid(CFG_INT | CFG_REQUIRED, "stp.%s.priority", + br->name)) { + bridge_priority = cfg_get_int(0, "stp.%s.priority", br->name); + } else { + bridge_priority = STP_DEFAULT_BRIDGE_PRIORITY; + } + + bridge_id = bridge_address | ((uint64_t) bridge_priority << 48); + if (!br->stp) { + br->stp = stp_create(br->name, bridge_id, brstp_send_bpdu, br); + br->stp_last_tick = time_msec(); + bridge_flush(br); + } else { + if (bridge_id != stp_get_bridge_id(br->stp)) { + stp_set_bridge_id(br->stp, bridge_id); + bridge_flush(br); + } + } + + for (i = 0; i < br->n_ports; i++) { + struct port *p = br->ports[i]; + int dp_ifidx; + struct stp_port *sp; + int path_cost, priority; + bool enable; + if (!p->n_ifaces) { + continue; + } + dp_ifidx = p->ifaces[0]->dp_ifidx; + if (dp_ifidx < 0 || dp_ifidx >= STP_MAX_PORTS) { + continue; + } + + sp = stp_get_port(br->stp, dp_ifidx); + enable = (!cfg_is_valid(CFG_BOOL | CFG_REQUIRED, + "stp.%s.port.%s.enabled", + br->name, p->name) + || cfg_get_bool(0, "stp.%s.port.%s.enabled", + br->name, p->name)); + if (p->is_mirror_output_port) { + enable = false; + } + if (enable != (stp_port_get_state(sp) != STP_DISABLED)) { + bridge_flush(br); /* Might not be necessary. */ + if (enable) { + stp_port_enable(sp); + } else { + stp_port_disable(sp); + } + } + + path_cost = cfg_get_int(0, "stp.%s.port.%s.path-cost", + br->name, p->name); + stp_port_set_path_cost(sp, path_cost ? path_cost : 19 /* XXX */); + + priority = (cfg_is_valid(CFG_INT | CFG_REQUIRED, + "stp.%s.port.%s.priority", + br->name, p->name) + ? cfg_get_int(0, "stp.%s.port.%s.priority", + br->name, p->name) + : STP_DEFAULT_PORT_PRIORITY); + stp_port_set_priority(sp, priority); + } + + brstp_adjust_timers(br); + } + for (i = 0; i < br->n_ports; i++) { + brstp_update_port_state(br->ports[i]); + } +} + +static void +brstp_update_port_state(struct port *p) +{ + struct bridge *br = p->bridge; + enum stp_state state; + + /* Figure out new state. */ + state = STP_DISABLED; + if (br->stp && p->n_ifaces > 0) { + int dp_ifidx = p->ifaces[0]->dp_ifidx; + if (dp_ifidx >= 0 && dp_ifidx < STP_MAX_PORTS) { + state = stp_port_get_state(stp_get_port(br->stp, dp_ifidx)); + } + } + + /* Update state. */ + if (p->stp_state != state) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + VLOG_WARN_RL(&rl, "port %s: STP state changed from %s to %s", + p->name, stp_state_name(p->stp_state), + stp_state_name(state)); + if (p->stp_state == STP_DISABLED) { + bridge_flush(br); + } else { + tag_set_add(&p->bridge->revalidate_set, p->stp_state_tag); + printf("invalidate %x\n", p->stp_state_tag); + } + p->stp_state = state; + p->stp_state_tag = (p->stp_state == STP_DISABLED ? 0 + : tag_create_random()); + } +} + +static void +brstp_adjust_timers(struct bridge *br) +{ + int hello_time = cfg_get_int(0, "stp.%s.hello-time", br->name); + int max_age = cfg_get_int(0, "stp.%s.max-age", br->name); + int forward_delay = cfg_get_int(0, "stp.%s.forward-delay", br->name); + + stp_set_hello_time(br->stp, hello_time ? hello_time : 2000); + stp_set_max_age(br->stp, max_age ? max_age : 20000); + stp_set_forward_delay(br->stp, forward_delay ? forward_delay : 15000); +} + +static void +brstp_run(struct bridge *br) +{ + if (br->stp) { + long long int now = time_msec(); + long long int elapsed = now - br->stp_last_tick; + struct stp_port *sp; + + if (elapsed > 0) { + stp_tick(br->stp, MIN(INT_MAX, elapsed)); + br->stp_last_tick = now; + } + while (stp_get_changed_port(br->stp, &sp)) { + struct port *p = port_from_dp_ifidx(br, stp_port_no(sp)); + if (p) { + brstp_update_port_state(p); + } + } + } +} + +static void +brstp_wait(struct bridge *br) +{ + if (br->stp) { + poll_timer_wait(1000); + } +} + +static void +brstp_receive(struct bridge *br, const struct flow *flow, + const struct ofpbuf *pkt) +{ + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + struct ofpbuf payload = *pkt; + struct eth_header *eth; + struct llc_header *llc; + struct stp_port *sp; + int in_ifidx; + + /* Find the interface and port structure for the received packet. */ + in_ifidx = ntohs(flow->in_port); + if (in_ifidx >= STP_MAX_PORTS) { + return; + } + sp = stp_get_port(br->stp, in_ifidx); + if (stp_port_get_state(sp) == STP_DISABLED) { + return; + } + + /* Check LLC DSAP. (Caller already verified dl_type and dl_dst.) */ + eth = payload.data; + llc = ofpbuf_at_assert(&payload, sizeof(struct eth_header), sizeof *llc); + if (llc->llc_dsap != STP_LLC_DSAP) { + VLOG_DBG_RL(&rl, "bad DSAP 0x%02"PRIx8" received on STP multicast " + "address", llc->llc_dsap); + return; + } + + /* Trim off padding on payload. */ + if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) { + payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN; + } + if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) { + struct stp_port *p = stp_get_port(br->stp, ntohs(flow->in_port)); + stp_received_bpdu(p, payload.data, payload.size); + } +} diff --git a/vswitchd/vswitchd.conf.5 b/vswitchd/vswitchd.conf.5 index 121cb545..fba38056 100644 --- a/vswitchd/vswitchd.conf.5 +++ b/vswitchd/vswitchd.conf.5 @@ -267,6 +267,75 @@ on \fBeth1\fR or \fBeth2\fR to \fBeth3\fR, setting their VLAN tags to output.port = eth3 .fi +.SS "IEEE 802.1D-1998 Spanning Tree Support" +.PP +\fBvswitchd\fR supports IEEE 802.1D-1998 Spanning Tree Protocol (STP), +which detects and prevents loops in switch topology. By default, STP +is disabled. To turn it on for a given \fIbridge\fR, set +\fBstp.\fIbridge\fB.enabled\fR to \fBtrue\fR. +.PP +By default, \fBvswitchd\fR chooses a random bridge address each time +STP is enabled for the switch. To use a specific bridge address +(which is recommended), set \fBstp.\fIbridge\fB.address\fR to a MAC +address in the format +\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR, where each +\fIx\fR is a hex digit. (A common choice of bridge address is the MAC +address of one of the bridge ports.) +.PP +Bridge priority allows a network administrator to influence the +construction of the spanning tree. The default bridge priority is +32768. It may be overridden by setting +\fBstp.\fIbridge\fB.priority\fR to a number between 0 and 65535. +Lower numbers correspond to higher priorities. +.PP +\fBvswitchd\fR does not implement IEEE 802.1D-2004 Rapid Spanning Tree +Protocol (RSTP). +.PP +.I "STP Port Configuration" +.PP +Some STP features may be configured on a port-by-port basis. +.PP +To disable STP on an individual \fIport\fR within \fIbridge\fR, set +\fBstp.\fIbridge\fB.port.\fIport\fB.enabled\fR to \fBfalse\fR. STP is +never enabled on a port that is used as an output port for port +mirroring. +.PP +The "path cost" of a port reflects how expensive it is (generally, in +time) to send data out a particular port. \fBvswitchd\fR uses a cost +of 19, which is generally appropriate for 100 MB ports, as the default +path cost for STP ports. Set +\fBstp.\fIbridge\fB.port.\fIport\fB.path-cost\fR to a number between 1 +and 65535 to override this default. (Future versions of +\fBvswitchd\fR will choose a default path cost based on the port's +current data rate.) +.PP +Port priority allows a network administrator to influence the +construction of the spanning tree. The default port priority is 128. +It may be overridden by setting +\fBstp.\fIbridge\fB.port.\fIport\fB.priority\fR to a number between 0 and +255. Lower numbers correspond to higher priorities. +.PP +.I "Example" +.PP +The following syntax enables STP on bridge \fBmybr\fR that consists of +network devices \fBeth0\fR, \fBeth1\fR, and \fBeth2\fR. The bridge +address is set to 00:02:e3:0f:80:a4 and \fBeth2\fR's port priority is +set to 64: +.PP +.RS +.nf + +[bridge "mybr"] + port = eth0 + port = eth1 + port = eth2 + +[stp "mybr"] + enabled = true + address = 00:02:e3:0f:80:a4 + port.eth2.priority = 64 +.fi +.RE .SS "OpenFlow controller connectivity" By default, \fBvswitchd\fR performs all configured bridging and switching locally. It can also be configured to connect a given -- 2.30.2