From fadf2608a76a169e5d560e01df3232fd9517b591 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 Mar 2008 09:58:13 -0700 Subject: [PATCH] Merge datapath.[ch] with forward.[ch]. --- switch/Makefile.am | 2 - switch/datapath.c | 576 +++++++++++++++++++++++++++++++++++++++++++-- switch/datapath.h | 50 +--- switch/forward.c | 566 -------------------------------------------- switch/forward.h | 70 ------ switch/switch.c | 3 - 6 files changed, 562 insertions(+), 705 deletions(-) delete mode 100644 switch/forward.c delete mode 100644 switch/forward.h diff --git a/switch/Makefile.am b/switch/Makefile.am index 727d1bea..24c23891 100644 --- a/switch/Makefile.am +++ b/switch/Makefile.am @@ -11,8 +11,6 @@ switch_SOURCES = \ crc32.h \ datapath.c \ datapath.h \ - forward.c \ - forward.h \ netdev.c \ netdev.h \ switch.c \ diff --git a/switch/datapath.c b/switch/datapath.c index e98ffbe7..34914dfd 100644 --- a/switch/datapath.c +++ b/switch/datapath.c @@ -41,7 +41,6 @@ #include "chain.h" #include "controller.h" #include "flow.h" -#include "forward.h" #include "netdev.h" #include "packets.h" #include "poll-loop.h" @@ -53,8 +52,83 @@ #define BRIDGE_PORT_NO_FLOOD 0x00000001 +/* Capabilities supported by this implementation. */ +#define OFP_SUPPORTED_CAPABILITIES (OFPC_MULTI_PHY_TX) + +/* Actions supported by this implementation. */ +#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ + | (1 << OFPAT_SET_DL_VLAN) \ + | (1 << OFPAT_SET_DL_SRC) \ + | (1 << OFPAT_SET_DL_DST) \ + | (1 << OFPAT_SET_NW_SRC) \ + | (1 << OFPAT_SET_NW_DST) \ + | (1 << OFPAT_SET_TP_SRC) \ + | (1 << OFPAT_SET_TP_DST) ) + +struct sw_port { + uint32_t flags; + struct datapath *dp; + struct netdev *netdev; + struct list node; /* Element in datapath.ports. */ +}; + +struct datapath { + struct controller_connection *cc; + + time_t last_timeout; + + /* Unique identifier for this datapath */ + uint64_t id; + + struct sw_chain *chain; /* Forwarding rules. */ + + /* Flags from the control hello message */ + uint16_t hello_flags; + + /* Maximum number of bytes that should be sent for flow misses */ + uint16_t miss_send_len; + + /* Switch ports. */ + struct sw_port ports[OFPP_MAX]; + struct list port_list; /* List of ports, for flooding. */ +}; + +void dp_output_port(struct datapath *, struct buffer *, + int in_port, int out_port); +void dp_send_hello(struct datapath *); +void dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp); +void dp_output_control(struct datapath *, struct buffer *, int in_port, + size_t max_len, int reason); static void send_port_status(struct sw_port *p, uint8_t status); static void del_switch_port(struct sw_port *p); +static void execute_actions(struct datapath *, struct buffer *, + int in_port, const struct sw_flow_key *, + const struct ofp_action *, int n_actions); +static void modify_vlan(struct buffer *buffer, const struct sw_flow_key *key, + const struct ofp_action *a); +static void modify_nh(struct buffer *buffer, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a); +static void modify_th(struct buffer *buffer, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a); + +/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID + * into a buffer number (low bits) and a cookie (high bits). The buffer number + * is an index into an array of buffers. The cookie distinguishes between + * different packets that have occupied a single buffer. Thus, the more + * buffers we have, the lower-quality the cookie... */ +#define PKT_BUFFER_BITS 8 +#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) +#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) + +#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) + +void fwd_port_input(struct datapath *, struct buffer *, int in_port); +int fwd_control_input(struct datapath *, const void *, size_t); + +uint32_t save_buffer(struct buffer *); +static struct buffer *retrieve_buffer(uint32_t id); +static void discard_buffer(uint32_t id); + static int port_no(struct datapath *dp, struct sw_port *p) { assert(p >= dp->ports && p < &dp->ports[ARRAY_SIZE(dp->ports)]); @@ -143,6 +217,7 @@ dp_run(struct datapath *dp) time_t now = time(0); struct sw_port *p, *n; struct buffer *buffer = NULL; + int i; if (now != dp->last_timeout) { chain_timeout(dp->chain, dp); @@ -216,25 +291,20 @@ static int flood(struct datapath *dp, struct buffer *buffer, int in_port) { struct sw_port *p; - struct sw_port *prev_port; + int prev_port; - prev_port = NULL; + prev_port = -1; LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { if (port_no(dp, p) == in_port || p->flags & BRIDGE_PORT_NO_FLOOD) { continue; } - if (prev_port) { - struct buffer *clone = buffer_clone(buffer); - if (!clone) { - buffer_delete(buffer); - return -ENOMEM; - } - dp_output_port(dp, clone, in_port, port_no(dp, prev_port)); + if (prev_port != -1) { + dp_output_port(dp, buffer_clone(buffer), in_port, prev_port); } - prev_port = p; + prev_port = port_no(dp, p); } - if (prev_port) - dp_output_port(dp, buffer, in_port, port_no(dp, prev_port)); + if (prev_port != -1) + dp_output_port(dp, buffer, in_port, prev_port); else buffer_delete(buffer); @@ -268,8 +338,7 @@ dp_output_port(struct datapath *dp, struct buffer *buffer, if (out_port == OFPP_FLOOD) { flood(dp, buffer, in_port); } else if (out_port == OFPP_CONTROLLER) { - dp_output_control(dp, buffer, in_port, fwd_save_buffer(buffer), 0, - OFPR_ACTION); + dp_output_control(dp, buffer, in_port, 0, OFPR_ACTION); } else { output_packet(dp, buffer, out_port); } @@ -282,11 +351,13 @@ dp_output_port(struct datapath *dp, struct buffer *buffer, * to be sent; a value of 0 indicates the entire packet should be sent. */ void dp_output_control(struct datapath *dp, struct buffer *buffer, int in_port, - uint32_t buffer_id, size_t max_len, int reason) + size_t max_len, int reason) { struct ofp_packet_in *opi; size_t total_len; + uint32_t buffer_id; + buffer_id = save_buffer(buffer); total_len = buffer->size; if (buffer_id != UINT32_MAX && max_len > buffer->size) { buffer->size = max_len; @@ -399,3 +470,476 @@ dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow) ofe->byte_count = htonll(flow->byte_count); controller_send(dp->cc, buffer); } + +/* 'buffer' was received on 'in_port', a physical switch port between 0 and + * OFPP_MAX. Process it according to 'chain'. */ +void fwd_port_input(struct datapath *dp, struct buffer *buffer, int in_port) +{ + struct sw_flow_key key; + struct sw_flow *flow; + + key.wildcards = 0; + flow_extract(buffer, in_port, &key.flow); + flow = chain_lookup(dp->chain, &key); + if (flow != NULL) { + flow_used(flow, buffer); + execute_actions(dp, buffer, in_port, &key, + flow->actions, flow->n_actions); + } else { + dp_output_control(dp, buffer, in_port, dp->miss_send_len, + OFPR_NO_MATCH); + } +} + +static void +do_output(struct datapath *dp, struct buffer *buffer, int in_port, + size_t max_len, int out_port) +{ + if (out_port != OFPP_CONTROLLER) { + dp_output_port(dp, buffer, in_port, out_port); + } else { + dp_output_control(dp, buffer, in_port, max_len, OFPR_ACTION); + } +} + +static void +execute_actions(struct datapath *dp, struct buffer *buffer, + int in_port, const struct sw_flow_key *key, + const struct ofp_action *actions, int n_actions) +{ + /* Every output action needs a separate clone of 'buffer', but the common + * case is just a single output action, so that doing a clone and then + * freeing the original buffer is wasteful. So the following code is + * slightly obscure just to avoid that. */ + int prev_port; + size_t max_len=0; /* Initialze to make compiler happy */ + uint16_t eth_proto; + int i; + + prev_port = -1; + eth_proto = ntohs(key->flow.dl_type); + + for (i = 0; i < n_actions; i++) { + const struct ofp_action *a = &actions[i]; + struct eth_header *eh = buffer->l2; + + if (prev_port != -1) { + do_output(dp, buffer_clone(buffer), in_port, max_len, prev_port); + prev_port = -1; + } + + switch (ntohs(a->type)) { + case OFPAT_OUTPUT: + prev_port = ntohs(a->arg.output.port); + max_len = ntohs(a->arg.output.max_len); + break; + + case OFPAT_SET_DL_VLAN: + modify_vlan(buffer, key, a); + break; + + case OFPAT_SET_DL_SRC: + memcpy(eh->eth_src, a->arg.dl_addr, sizeof eh->eth_src); + break; + + case OFPAT_SET_DL_DST: + memcpy(eh->eth_dst, a->arg.dl_addr, sizeof eh->eth_dst); + break; + + case OFPAT_SET_NW_SRC: + case OFPAT_SET_NW_DST: + modify_nh(buffer, eth_proto, key->flow.nw_proto, a); + break; + + case OFPAT_SET_TP_SRC: + case OFPAT_SET_TP_DST: + modify_th(buffer, eth_proto, key->flow.nw_proto, a); + break; + + default: + NOT_REACHED(); + } + } + if (prev_port != -1) + do_output(dp, buffer, in_port, max_len, prev_port); + else + buffer_delete(buffer); +} + +/* Returns the new checksum for a packet in which the checksum field previously + * contained 'old_csum' and in which a field that contained 'old_u16' was + * changed to contain 'new_u16'. */ +static uint16_t +recalc_csum16(uint16_t old_csum, uint16_t old_u16, uint16_t new_u16) +{ + /* Ones-complement arithmetic is endian-independent, so this code does not + * use htons() or ntohs(). + * + * See RFC 1624 for formula and explanation. */ + uint16_t hc_complement = ~old_csum; + uint16_t m_complement = ~old_u16; + uint16_t m_prime = new_u16; + uint32_t sum = hc_complement + m_complement + m_prime; + uint16_t hc_prime_complement = sum + (sum >> 16); + return ~hc_prime_complement; +} + +/* Returns the new checksum for a packet in which the checksum field previously + * contained 'old_csum' and in which a field that contained 'old_u32' was + * changed to contain 'new_u32'. */ +static uint16_t +recalc_csum32(uint16_t old_csum, uint32_t old_u32, uint32_t new_u32) +{ + return recalc_csum16(recalc_csum16(old_csum, old_u32, new_u32), + old_u32 >> 16, new_u32 >> 16); +} + +static void modify_nh(struct buffer *buffer, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_TYPE_IP) { + struct ip_header *nh = buffer->l3; + uint32_t new, *field; + + new = a->arg.nw_addr; + field = a->type == OFPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + if (nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = buffer->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, *field, new); + } else if (nw_proto == IP_TYPE_UDP) { + struct udp_header *th = buffer->l4; + if (th->udp_csum) { + th->udp_csum = recalc_csum32(th->udp_csum, *field, new); + if (!th->udp_csum) { + th->udp_csum = 0xffff; + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, *field, new); + *field = new; + } +} + +static void modify_th(struct buffer *buffer, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_TYPE_IP) { + uint16_t new, *field; + + new = a->arg.tp; + + if (nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = buffer->l4; + field = a->type == OFPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; + th->tcp_csum = recalc_csum16(th->tcp_csum, *field, new); + *field = new; + } else if (nw_proto == IP_TYPE_UDP) { + struct udp_header *th = buffer->l4; + field = a->type == OFPAT_SET_TP_SRC ? &th->udp_src : &th->udp_dst; + th->udp_csum = recalc_csum16(th->udp_csum, *field, new); + *field = new; + } + } +} + +static void +modify_vlan(struct buffer *buffer, + const struct sw_flow_key *key, const struct ofp_action *a) +{ + uint16_t new_id = a->arg.vlan_id; + struct vlan_eth_header *veh; + + if (new_id != OFP_VLAN_NONE) { + if (key->flow.dl_vlan != htons(OFP_VLAN_NONE)) { + /* Modify vlan id, but maintain other TCI values */ + veh = buffer->l2; + veh->veth_tci &= ~htons(VLAN_VID); + veh->veth_tci |= htons(new_id); + } else { + /* Insert new vlan id. */ + struct eth_header *eh = buffer->l2; + struct vlan_eth_header tmp; + memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); + memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); + tmp.veth_type = htons(ETH_TYPE_VLAN); + tmp.veth_tci = new_id; + tmp.veth_next_type = eh->eth_type; + + veh = buffer_push_uninit(buffer, VLAN_HEADER_LEN); + memcpy(veh, &tmp, sizeof tmp); + buffer->l2 -= VLAN_HEADER_LEN; + } + } else { + /* Remove an existing vlan header if it exists */ + veh = buffer->l2; + if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + struct eth_header tmp; + + memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); + memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); + tmp.eth_type = veh->veth_next_type; + + buffer->size -= VLAN_HEADER_LEN; + buffer->data += VLAN_HEADER_LEN; + buffer->l2 += VLAN_HEADER_LEN; + memcpy(buffer->data, &tmp, sizeof tmp); + } + } +} + +static int +recv_control_hello(struct datapath *dp, const void *msg) +{ + const struct ofp_control_hello *och = msg; + + printf("control_hello(version=%d)\n", ntohl(och->version)); + + if (ntohs(och->miss_send_len) != OFP_MISS_SEND_LEN_UNCHANGED) { + dp->miss_send_len = ntohs(och->miss_send_len); + } + + dp->hello_flags = ntohs(och->flags); + + dp_send_hello(dp); + + return 0; +} + +static int +recv_packet_out(struct datapath *dp, const void *msg) +{ + const struct ofp_packet_out *opo = msg; + + if (ntohl(opo->buffer_id) == (uint32_t) -1) { + /* FIXME: can we avoid copying data here? */ + int data_len = ntohs(opo->header.length) - sizeof *opo; + struct buffer *buffer = buffer_new(data_len); + buffer_put(buffer, opo->u.data, data_len); + dp_output_port(dp, buffer, + ntohs(opo->in_port), ntohs(opo->out_port)); + } else { + struct sw_flow_key key; + struct buffer *buffer; + int n_acts; + + buffer = retrieve_buffer(ntohl(opo->buffer_id)); + if (!buffer) { + return -ESRCH; + } + + n_acts = (ntohs(opo->header.length) - sizeof *opo) + / sizeof *opo->u.actions; + flow_extract(buffer, ntohs(opo->in_port), &key.flow); + execute_actions(dp, buffer, ntohs(opo->in_port), + &key, opo->u.actions, n_acts); + } + return 0; +} + +static int +recv_port_mod(struct datapath *dp, const void *msg) +{ + const struct ofp_port_mod *opm = msg; + + dp_update_port_flags(dp, &opm->desc); + + return 0; +} + +static int +add_flow(struct datapath *dp, const struct ofp_flow_mod *ofm) +{ + int error = -ENOMEM; + int n_acts; + struct sw_flow *flow; + + + /* Check number of actions. */ + n_acts = (ntohs(ofm->header.length) - sizeof *ofm) / sizeof *ofm->actions; + if (n_acts > MAX_ACTIONS) { + error = -E2BIG; + goto error; + } + + /* Allocate memory. */ + flow = flow_alloc(n_acts); + if (flow == NULL) + goto error; + + /* Fill out flow. */ + flow_extract_match(&flow->key, &ofm->match); + flow->group_id = ntohl(ofm->group_id); + flow->max_idle = ntohs(ofm->max_idle); + flow->timeout = time(0) + flow->max_idle; /* FIXME */ + flow->n_actions = n_acts; + flow->created = time(0); /* FIXME */ + flow->byte_count = 0; + flow->packet_count = 0; + memcpy(flow->actions, ofm->actions, n_acts * sizeof *flow->actions); + + /* Act. */ + error = chain_insert(dp->chain, flow); + if (error) { + goto error_free_flow; + } + error = 0; + if (ntohl(ofm->buffer_id) != UINT32_MAX) { + struct buffer *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); + if (buffer) { + struct sw_flow_key key; + uint16_t in_port = ntohs(ofm->match.in_port); + flow_used(flow, buffer); + flow_extract(buffer, in_port, &key.flow); + execute_actions(dp, buffer, in_port, &key, ofm->actions, n_acts); + } else { + error = -ESRCH; + } + } + return error; + +error_free_flow: + flow_free(flow); +error: + if (ntohl(ofm->buffer_id) != (uint32_t) -1) + discard_buffer(ntohl(ofm->buffer_id)); + return error; +} + +static int +recv_flow(struct datapath *dp, const void *msg) +{ + const struct ofp_flow_mod *ofm = msg; + uint16_t command = ntohs(ofm->command); + + if (command == OFPFC_ADD) { + return add_flow(dp, ofm); + } else if (command == OFPFC_DELETE) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(dp->chain, &key, 0) ? 0 : -ESRCH; + } else if (command == OFPFC_DELETE_STRICT) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(dp->chain, &key, 1) ? 0 : -ESRCH; + } else { + return -ENODEV; + } +} + +/* 'msg', which is 'length' bytes long, was received from the control path. + * Apply it to 'chain'. */ +int +fwd_control_input(struct datapath *dp, const void *msg, size_t length) +{ + + struct openflow_packet { + size_t min_size; + int (*handler)(struct datapath *, const void *); + }; + + static const struct openflow_packet packets[] = { + [OFPT_CONTROL_HELLO] = { + sizeof (struct ofp_control_hello), + recv_control_hello, + }, + [OFPT_PACKET_OUT] = { + sizeof (struct ofp_packet_out), + recv_packet_out, + }, + [OFPT_FLOW_MOD] = { + sizeof (struct ofp_flow_mod), + recv_flow, + }, + [OFPT_PORT_MOD] = { + sizeof (struct ofp_port_mod), + recv_port_mod, + }, + }; + + const struct openflow_packet *pkt; + struct ofp_header *oh; + + if (length < sizeof(struct ofp_header)) + return -EINVAL; + + oh = (struct ofp_header *) msg; + if (oh->version != 1 || oh->type >= ARRAY_SIZE(packets) + || ntohs(oh->length) > length) + return -EINVAL; + + pkt = &packets[oh->type]; + if (!pkt->handler) + return -ENOSYS; + if (length < pkt->min_size) + return -EFAULT; + + return pkt->handler(dp, msg); +} + +/* Packet buffering. */ + +#define OVERWRITE_SECS 1 + +struct packet_buffer { + struct buffer *buffer; + uint32_t cookie; + time_t timeout; +}; + +static struct packet_buffer buffers[N_PKT_BUFFERS]; +static unsigned int buffer_idx; + +uint32_t save_buffer(struct buffer *buffer) +{ + struct packet_buffer *p; + uint32_t id; + + buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; + p = &buffers[buffer_idx]; + if (p->buffer) { + /* Don't buffer packet if existing entry is less than + * OVERWRITE_SECS old. */ + if (time(0) < p->timeout) { /* FIXME */ + return -1; + } else { + buffer_delete(p->buffer); + } + } + /* Don't use maximum cookie value since the all-bits-1 id is + * special. */ + if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) + p->cookie = 0; + p->buffer = buffer_clone(buffer); /* FIXME */ + p->timeout = time(0) + OVERWRITE_SECS; /* FIXME */ + id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); + + return id; +} + +static struct buffer *retrieve_buffer(uint32_t id) +{ + struct buffer *buffer = NULL; + struct packet_buffer *p; + + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + buffer = p->buffer; + p->buffer = NULL; + } else { + printf("cookie mismatch: %x != %x\n", + id >> PKT_BUFFER_BITS, p->cookie); + } + + return buffer; +} + +static void discard_buffer(uint32_t id) +{ + struct packet_buffer *p; + + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + buffer_delete(p->buffer); + p->buffer = NULL; + } +} diff --git a/switch/datapath.h b/switch/datapath.h index 09824897..1c52e435 100644 --- a/switch/datapath.h +++ b/switch/datapath.h @@ -42,60 +42,14 @@ #include "buffer.h" #include "list.h" -#define NL_FLOWS_PER_MESSAGE 100 - -/* Capabilities supported by this implementation. */ -#define OFP_SUPPORTED_CAPABILITIES (OFPC_MULTI_PHY_TX) - -/* Actions supported by this implementation. */ -#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ - | (1 << OFPAT_SET_DL_VLAN) \ - | (1 << OFPAT_SET_DL_SRC) \ - | (1 << OFPAT_SET_DL_DST) \ - | (1 << OFPAT_SET_NW_SRC) \ - | (1 << OFPAT_SET_NW_DST) \ - | (1 << OFPAT_SET_TP_SRC) \ - | (1 << OFPAT_SET_TP_DST) ) - -struct sw_port { - uint32_t flags; - struct datapath *dp; - struct netdev *netdev; - struct list node; /* Element in datapath.ports. */ -}; - -struct datapath { - struct controller_connection *cc; - - time_t last_timeout; - - /* Unique identifier for this datapath */ - uint64_t id; - - struct sw_chain *chain; /* Forwarding rules. */ - - /* Flags from the control hello message */ - uint16_t hello_flags; - - /* Maximum number of bytes that should be sent for flow misses */ - uint16_t miss_send_len; - - /* Switch ports. */ - struct sw_port ports[OFPP_MAX]; - struct list port_list; /* List of ports, for flooding. */ -}; +struct datapath; +struct controller_connection; int dp_new(struct datapath **, uint64_t dpid, struct controller_connection *); int dp_add_port(struct datapath *, const char *netdev); void dp_run(struct datapath *); void dp_wait(struct datapath *); -void dp_output_port(struct datapath *, struct buffer *, - int in_port, int out_port); -void dp_output_control(struct datapath *, struct buffer *, int in_port, - uint32_t buffer_id, size_t max_len, int reason); -void dp_send_hello(struct datapath *); void dp_send_flow_expired(struct datapath *, struct sw_flow *); -void dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp); #endif /* datapath.h */ diff --git a/switch/forward.c b/switch/forward.c deleted file mode 100644 index 908b4dfa..00000000 --- a/switch/forward.c +++ /dev/null @@ -1,566 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#include "forward.h" -#include -#include -#include -#include -#include -#include "controller.h" -#include "datapath.h" -#include "chain.h" -#include "flow.h" -#include "packets.h" - -static void execute_actions(struct datapath *, struct buffer *, - int in_port, const struct sw_flow_key *, - const struct ofp_action *, int n_actions); - -static struct buffer *retrieve_buffer(uint32_t id); -static void discard_buffer(uint32_t id); - -void -fwd_run(struct datapath *dp) -{ - int i; - - for (i = 0; i < 50; i++) { - struct buffer *buffer = controller_recv(dp->cc); - if (!buffer) { - break; - } - fwd_control_input(dp, buffer->data, buffer->size); - buffer_delete(buffer); - } -} - -void -fwd_run_wait(struct datapath *dp) -{ - controller_recv_wait(dp->cc); -} - -/* 'buffer' was received on 'in_port', a physical switch port between 0 and - * OFPP_MAX. Process it according to 'chain'. */ -void fwd_port_input(struct datapath *dp, struct buffer *buffer, int in_port) -{ - struct sw_flow_key key; - struct sw_flow *flow; - - key.wildcards = 0; - flow_extract(buffer, in_port, &key.flow); - flow = chain_lookup(dp->chain, &key); - if (flow != NULL) { - flow_used(flow, buffer); - execute_actions(dp, buffer, in_port, &key, - flow->actions, flow->n_actions); - } else { - dp_output_control(dp, buffer, in_port, fwd_save_buffer(buffer), - dp->miss_send_len, OFPR_NO_MATCH); - } -} - -static void -do_output(struct datapath *dp, struct buffer *buffer, int in_port, - size_t max_len, int out_port) -{ - if (out_port != OFPP_CONTROLLER) { - dp_output_port(dp, buffer, in_port, out_port); - } else { - dp_output_control(dp, buffer, in_port, fwd_save_buffer(buffer), - max_len, OFPR_ACTION); - } -} - -static void execute_actions(struct datapath *dp, struct buffer *buffer, - int in_port, const struct sw_flow_key *key, - const struct ofp_action *actions, int n_actions) -{ - /* Every output action needs a separate clone of 'buffer', but the common - * case is just a single output action, so that doing a clone and then - * freeing the original buffer is wasteful. So the following code is - * slightly obscure just to avoid that. */ - int prev_port; - size_t max_len=0; /* Initialze to make compiler happy */ - uint16_t eth_proto; - int i; - - prev_port = -1; - eth_proto = ntohs(key->flow.dl_type); - - for (i = 0; i < n_actions; i++) { - const struct ofp_action *a = &actions[i]; - - if (prev_port != -1) { - do_output(dp, buffer_clone(buffer), in_port, max_len, prev_port); - prev_port = -1; - } - - if (a->type == ntohs(OFPAT_OUTPUT)) { - prev_port = ntohs(a->arg.output.port); - max_len = ntohs(a->arg.output.max_len); - } else { - buffer = execute_setter(buffer, eth_proto, key, a); - } - } - if (prev_port != -1) - do_output(dp, buffer, in_port, max_len, prev_port); - else - buffer_delete(buffer); -} - -/* Returns the new checksum for a packet in which the checksum field previously - * contained 'old_csum' and in which a field that contained 'old_u16' was - * changed to contain 'new_u16'. */ -static uint16_t -recalc_csum16(uint16_t old_csum, uint16_t old_u16, uint16_t new_u16) -{ - /* Ones-complement arithmetic is endian-independent, so this code does not - * use htons() or ntohs(). - * - * See RFC 1624 for formula and explanation. */ - uint16_t hc_complement = ~old_csum; - uint16_t m_complement = ~old_u16; - uint16_t m_prime = new_u16; - uint32_t sum = hc_complement + m_complement + m_prime; - uint16_t hc_prime_complement = sum + (sum >> 16); - return ~hc_prime_complement; -} - -/* Returns the new checksum for a packet in which the checksum field previously - * contained 'old_csum' and in which a field that contained 'old_u32' was - * changed to contain 'new_u32'. */ -static uint16_t -recalc_csum32(uint16_t old_csum, uint32_t old_u32, uint32_t new_u32) -{ - return recalc_csum16(recalc_csum16(old_csum, old_u32, new_u32), - old_u32 >> 16, new_u32 >> 16); -} - -static void modify_nh(struct buffer *buffer, uint16_t eth_proto, - uint8_t nw_proto, const struct ofp_action *a) -{ - if (eth_proto == ETH_TYPE_IP) { - struct ip_header *nh = buffer->l3; - uint32_t new, *field; - - new = a->arg.nw_addr; - field = a->type == OFPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; - if (nw_proto == IP_TYPE_TCP) { - struct tcp_header *th = buffer->l4; - th->tcp_csum = recalc_csum32(th->tcp_csum, *field, new); - } else if (nw_proto == IP_TYPE_UDP) { - struct udp_header *th = buffer->l4; - if (th->udp_csum) { - th->udp_csum = recalc_csum32(th->udp_csum, *field, new); - if (!th->udp_csum) { - th->udp_csum = 0xffff; - } - } - } - nh->ip_csum = recalc_csum32(nh->ip_csum, *field, new); - *field = new; - } -} - -static void modify_th(struct buffer *buffer, uint16_t eth_proto, - uint8_t nw_proto, const struct ofp_action *a) -{ - if (eth_proto == ETH_TYPE_IP) { - uint16_t new, *field; - - new = a->arg.tp; - - if (nw_proto == IP_TYPE_TCP) { - struct tcp_header *th = buffer->l4; - field = a->type == OFPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; - th->tcp_csum = recalc_csum16(th->tcp_csum, *field, new); - *field = new; - } else if (nw_proto == IP_TYPE_UDP) { - struct udp_header *th = buffer->l4; - field = a->type == OFPAT_SET_TP_SRC ? &th->udp_src : &th->udp_dst; - th->udp_csum = recalc_csum16(th->udp_csum, *field, new); - *field = new; - } - } -} - -static struct buffer * -modify_vlan(struct buffer *buffer, - const struct sw_flow_key *key, const struct ofp_action *a) -{ - uint16_t new_id = a->arg.vlan_id; - struct vlan_eth_header *veh; - - if (new_id != OFP_VLAN_NONE) { - if (key->flow.dl_vlan != htons(OFP_VLAN_NONE)) { - /* Modify vlan id, but maintain other TCI values */ - veh = buffer->l2; - veh->veth_tci &= ~htons(VLAN_VID); - veh->veth_tci |= htons(new_id); - } else { - /* Insert new vlan id. */ - struct eth_header *eh = buffer->l2; - struct vlan_eth_header tmp; - memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); - memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); - tmp.veth_type = htons(ETH_TYPE_VLAN); - tmp.veth_tci = new_id; - tmp.veth_next_type = eh->eth_type; - - veh = buffer_push_uninit(buffer, VLAN_HEADER_LEN); - memcpy(veh, &tmp, sizeof tmp); - buffer->l2 -= VLAN_HEADER_LEN; - } - } else { - /* Remove an existing vlan header if it exists */ - veh = buffer->l2; - if (veh->veth_type == htons(ETH_TYPE_VLAN)) { - struct eth_header tmp; - - memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); - memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); - tmp.eth_type = veh->veth_next_type; - - buffer->size -= VLAN_HEADER_LEN; - buffer->data += VLAN_HEADER_LEN; - buffer->l2 += VLAN_HEADER_LEN; - memcpy(buffer->data, &tmp, sizeof tmp); - } - } - - return buffer; -} - -struct buffer *execute_setter(struct buffer *buffer, uint16_t eth_proto, - const struct sw_flow_key *key, const struct ofp_action *a) -{ - switch (a->type) { - case OFPAT_SET_DL_VLAN: - buffer = modify_vlan(buffer, key, a); - break; - - case OFPAT_SET_DL_SRC: { - struct eth_header *eh = buffer->l2; - memcpy(eh->eth_src, a->arg.dl_addr, sizeof eh->eth_src); - break; - } - case OFPAT_SET_DL_DST: { - struct eth_header *eh = buffer->l2; - memcpy(eh->eth_dst, a->arg.dl_addr, sizeof eh->eth_dst); - break; - } - - case OFPAT_SET_NW_SRC: - case OFPAT_SET_NW_DST: - modify_nh(buffer, eth_proto, key->flow.nw_proto, a); - break; - - case OFPAT_SET_TP_SRC: - case OFPAT_SET_TP_DST: - modify_th(buffer, eth_proto, key->flow.nw_proto, a); - break; - - default: - NOT_REACHED(); - } - - return buffer; -} - -static int -recv_control_hello(struct datapath *dp, const void *msg) -{ - const struct ofp_control_hello *och = msg; - - printf("control_hello(version=%d)\n", ntohl(och->version)); - - if (ntohs(och->miss_send_len) != OFP_MISS_SEND_LEN_UNCHANGED) { - dp->miss_send_len = ntohs(och->miss_send_len); - } - - dp->hello_flags = ntohs(och->flags); - - dp_send_hello(dp); - - return 0; -} - -static int -recv_packet_out(struct datapath *dp, const void *msg) -{ - const struct ofp_packet_out *opo = msg; - - if (ntohl(opo->buffer_id) == (uint32_t) -1) { - /* FIXME: can we avoid copying data here? */ - int data_len = ntohs(opo->header.length) - sizeof *opo; - struct buffer *buffer = buffer_new(data_len); - buffer_put(buffer, opo->u.data, data_len); - dp_output_port(dp, buffer, - ntohs(opo->in_port), ntohs(opo->out_port)); - } else { - struct sw_flow_key key; - struct buffer *buffer; - int n_acts; - - buffer = retrieve_buffer(ntohl(opo->buffer_id)); - if (!buffer) { - return -ESRCH; - } - - n_acts = (ntohs(opo->header.length) - sizeof *opo) - / sizeof *opo->u.actions; - flow_extract(buffer, ntohs(opo->in_port), &key.flow); - execute_actions(dp, buffer, ntohs(opo->in_port), - &key, opo->u.actions, n_acts); - } - return 0; -} - -static int -recv_port_mod(struct datapath *dp, const void *msg) -{ - const struct ofp_port_mod *opm = msg; - - dp_update_port_flags(dp, &opm->desc); - - return 0; -} - -static int -add_flow(struct datapath *dp, const struct ofp_flow_mod *ofm) -{ - int error = -ENOMEM; - int n_acts; - struct sw_flow *flow; - - - /* Check number of actions. */ - n_acts = (ntohs(ofm->header.length) - sizeof *ofm) / sizeof *ofm->actions; - if (n_acts > MAX_ACTIONS) { - error = -E2BIG; - goto error; - } - - /* Allocate memory. */ - flow = flow_alloc(n_acts); - if (flow == NULL) - goto error; - - /* Fill out flow. */ - flow_extract_match(&flow->key, &ofm->match); - flow->group_id = ntohl(ofm->group_id); - flow->max_idle = ntohs(ofm->max_idle); - flow->timeout = time(0) + flow->max_idle; /* FIXME */ - flow->n_actions = n_acts; - flow->created = time(0); /* FIXME */ - flow->byte_count = 0; - flow->packet_count = 0; - memcpy(flow->actions, ofm->actions, n_acts * sizeof *flow->actions); - - /* Act. */ - error = chain_insert(dp->chain, flow); - if (error) { - goto error_free_flow; - } - error = 0; - if (ntohl(ofm->buffer_id) != UINT32_MAX) { - struct buffer *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); - if (buffer) { - struct sw_flow_key key; - uint16_t in_port = ntohs(ofm->match.in_port); - flow_used(flow, buffer); - flow_extract(buffer, in_port, &key.flow); - execute_actions(dp, buffer, in_port, - &key, ofm->actions, n_acts); - } else { - error = -ESRCH; - } - } - return error; - -error_free_flow: - flow_free(flow); -error: - if (ntohl(ofm->buffer_id) != (uint32_t) -1) - discard_buffer(ntohl(ofm->buffer_id)); - return error; -} - -static int -recv_flow(struct datapath *dp, const void *msg) -{ - const struct ofp_flow_mod *ofm = msg; - uint16_t command = ntohs(ofm->command); - - if (command == OFPFC_ADD) { - return add_flow(dp, ofm); - } else if (command == OFPFC_DELETE) { - struct sw_flow_key key; - flow_extract_match(&key, &ofm->match); - return chain_delete(dp->chain, &key, 0) ? 0 : -ESRCH; - } else if (command == OFPFC_DELETE_STRICT) { - struct sw_flow_key key; - flow_extract_match(&key, &ofm->match); - return chain_delete(dp->chain, &key, 1) ? 0 : -ESRCH; - } else { - return -ENODEV; - } -} - -/* 'msg', which is 'length' bytes long, was received from the control path. - * Apply it to 'chain'. */ -int -fwd_control_input(struct datapath *dp, const void *msg, size_t length) -{ - - struct openflow_packet { - size_t min_size; - int (*handler)(struct datapath *, const void *); - }; - - static const struct openflow_packet packets[] = { - [OFPT_CONTROL_HELLO] = { - sizeof (struct ofp_control_hello), - recv_control_hello, - }, - [OFPT_PACKET_OUT] = { - sizeof (struct ofp_packet_out), - recv_packet_out, - }, - [OFPT_FLOW_MOD] = { - sizeof (struct ofp_flow_mod), - recv_flow, - }, - [OFPT_PORT_MOD] = { - sizeof (struct ofp_port_mod), - recv_port_mod, - }, - }; - - const struct openflow_packet *pkt; - struct ofp_header *oh; - - if (length < sizeof(struct ofp_header)) - return -EINVAL; - - oh = (struct ofp_header *) msg; - if (oh->version != 1 || oh->type >= ARRAY_SIZE(packets) - || ntohs(oh->length) > length) - return -EINVAL; - - pkt = &packets[oh->type]; - if (!pkt->handler) - return -ENOSYS; - if (length < pkt->min_size) - return -EFAULT; - - return pkt->handler(dp, msg); -} - -/* Packet buffering. */ - -#define OVERWRITE_SECS 1 - -struct packet_buffer { - struct buffer *buffer; - uint32_t cookie; - time_t timeout; -}; - -static struct packet_buffer buffers[N_PKT_BUFFERS]; -static unsigned int buffer_idx; - -uint32_t fwd_save_buffer(struct buffer *buffer) -{ - struct packet_buffer *p; - uint32_t id; - - buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; - p = &buffers[buffer_idx]; - if (p->buffer) { - /* Don't buffer packet if existing entry is less than - * OVERWRITE_SECS old. */ - if (time(0) < p->timeout) { /* FIXME */ - return -1; - } else { - buffer_delete(p->buffer); - } - } - /* Don't use maximum cookie value since the all-bits-1 id is - * special. */ - if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) - p->cookie = 0; - p->buffer = buffer_clone(buffer); /* FIXME */ - p->timeout = time(0) + OVERWRITE_SECS; /* FIXME */ - id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); - - return id; -} - -static struct buffer *retrieve_buffer(uint32_t id) -{ - struct buffer *buffer = NULL; - struct packet_buffer *p; - - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - buffer = p->buffer; - p->buffer = NULL; - } else { - printf("cookie mismatch: %x != %x\n", - id >> PKT_BUFFER_BITS, p->cookie); - } - - return buffer; -} - -static void discard_buffer(uint32_t id) -{ - struct packet_buffer *p; - - p = &buffers[id & PKT_BUFFER_MASK]; - if (p->cookie == id >> PKT_BUFFER_BITS) { - buffer_delete(p->buffer); - p->buffer = NULL; - } -} - -void fwd_exit(void) -{ - int i; - - for (i = 0; i < N_PKT_BUFFERS; i++) - buffer_delete(buffers[i].buffer); -} diff --git a/switch/forward.h b/switch/forward.h deleted file mode 100644 index cffbafc9..00000000 --- a/switch/forward.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef FORWARD_H -#define FORWARD_H 1 - -#include -#include - -struct buffer; -struct datapath; -struct ofp_action; -struct sw_flow_key; - -/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID - * into a buffer number (low bits) and a cookie (high bits). The buffer number - * is an index into an array of buffers. The cookie distinguishes between - * different packets that have occupied a single buffer. Thus, the more - * buffers we have, the lower-quality the cookie... */ -#define PKT_BUFFER_BITS 8 -#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) -#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) - -#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) - -void fwd_run(struct datapath *); -void fwd_run_wait(struct datapath *); - -void fwd_port_input(struct datapath *, struct buffer *, int in_port); -int fwd_control_input(struct datapath *, const void *, size_t); - -uint32_t fwd_save_buffer(struct buffer *); - -void fwd_exit(void); - -struct buffer *execute_setter(struct buffer *, uint16_t, - const struct sw_flow_key *, - const struct ofp_action *); - -#endif /* forward.h */ diff --git a/switch/switch.c b/switch/switch.c index b36d9486..324084d9 100644 --- a/switch/switch.c +++ b/switch/switch.c @@ -41,7 +41,6 @@ #include "controller.h" #include "datapath.h" #include "fault.h" -#include "forward.h" #include "openflow.h" #include "poll-loop.h" #include "queue.h" @@ -94,11 +93,9 @@ main(int argc, char *argv[]) for (;;) { dp_run(dp); - fwd_run(dp); controller_run(cc); dp_wait(dp); - fwd_run_wait(dp); controller_run_wait(cc); poll_block(); } -- 2.30.2