From: Justin Pettit Date: Sat, 11 Oct 2008 07:30:02 +0000 (-0700) Subject: Add support for Source-NAT to Linux 2.6 datapaths. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=94f287969dff66aa4963dc24cc8a03a50ba3f532;p=openvswitch Add support for Source-NAT to Linux 2.6 datapaths. To enable SNAT, run configure with the "--enable-snat" flag. This has only been tested with the 2.6.23 kernel...more diverse testing will follow. Documentation and a cleaner build setup will also be in a future check-in. --- diff --git a/datapath/Modules.mk b/datapath/Modules.mk index ab5cf87f..054d352f 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -11,6 +11,8 @@ openflow_sources = \ flow.c \ forward.c \ nx_act.c \ + nx_act_snat.c \ + nx_msg.c \ table-hash.c \ table-linear.c @@ -22,6 +24,9 @@ openflow_headers = \ dp_dev.h \ flow.h \ forward.h \ + nx_act.h \ + nx_act_snat.h \ + nx_msg.h \ snap.h \ table.h diff --git a/datapath/datapath.c b/datapath/datapath.c index d2fcfa97..885593f6 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include "openflow-netlink.h" #include "datapath.h" +#include "nx_act_snat.h" #include "table.h" #include "chain.h" #include "dp_dev.h" @@ -387,6 +389,10 @@ int add_switch_port(struct datapath *dp, struct net_device *dev) /* Delete 'p' from switch. */ int dp_del_switch_port(struct net_bridge_port *p) { +#ifdef SUPPORT_SNAT + unsigned long flags; +#endif + /* First drop references to device. */ cancel_work_sync(&p->port_task); rtnl_lock(); @@ -400,6 +406,13 @@ int dp_del_switch_port(struct net_bridge_port *p) /* Then wait until no one is still using it, and destroy it. */ synchronize_rcu(); +#ifdef SUPPORT_SNAT + /* Free any SNAT configuration on the port. */ + spin_lock_irqsave(&p->lock, flags); + snat_free_conf(p); + spin_unlock_irqrestore(&p->lock, flags); +#endif + /* Notify the ctlpath that this port no longer exists */ dp_send_port_status(p, OFPPR_DELETE); @@ -441,6 +454,16 @@ static int dp_maint_func(void *data) struct datapath *dp = (struct datapath *) data; while (!kthread_should_stop()) { +#ifdef SUPPORT_SNAT + struct net_bridge_port *p; + + /* Expire old SNAT entries */ + rcu_read_lock(); + list_for_each_entry_rcu (p, &dp->port_list, node) + snat_maint(p); + rcu_read_unlock(); +#endif + /* Timeout old entries */ chain_timeout(dp->chain); msleep_interruptible(MAINT_SLEEP_MSECS); @@ -452,6 +475,14 @@ static int dp_maint_func(void *data) static void do_port_input(struct net_bridge_port *p, struct sk_buff *skb) { +#ifdef SUPPORT_SNAT + /* Check if this packet needs early SNAT processing. */ + if (snat_pre_route(skb)) { + kfree_skb(skb); + return; + } +#endif + /* Push the Ethernet header back on. */ skb_push(skb, ETH_HLEN); fwd_port_input(p->dp->chain, skb, p); @@ -545,7 +576,8 @@ void dp_set_origin(struct datapath *dp, uint16_t in_port, skb->dev = NULL; } -static int xmit_skb(struct sk_buff *skb) +int +dp_xmit_skb(struct sk_buff *skb) { int len = skb->len; if (packet_length(skb) > skb->dev->mtu) { @@ -576,7 +608,7 @@ int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port, kfree_skb(skb); return -ESRCH; } - return xmit_skb(skb); + return dp_xmit_skb(skb); case OFPP_TABLE: { int retval = run_flow_through_tables(dp->chain, skb, @@ -617,7 +649,7 @@ int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port, return 0; } skb->dev = p->dev; - return xmit_skb(skb); + return dp_xmit_skb(skb); } default: diff --git a/datapath/datapath.h b/datapath/datapath.h index 10a42c08..bf5d4d68 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -79,13 +79,15 @@ struct net_bridge_port { struct work_struct port_task; struct datapath *dp; struct net_device *dev; - struct list_head node; /* Element in datapath.ports. */ + struct snat_conf *snat; /* Only set if SNAT is configured for this port. */ + struct list_head node; /* Element in datapath.ports. */ }; extern struct mutex dp_mutex; extern struct notifier_block dp_device_notifier; int dp_del_switch_port(struct net_bridge_port *); +int dp_xmit_skb(struct sk_buff *skb); int dp_output_port(struct datapath *, struct sk_buff *, int out_port, int ignore_no_fwd); int dp_output_control(struct datapath *, struct sk_buff *, uint32_t, diff --git a/datapath/dp_act.c b/datapath/dp_act.c index 3401c4a6..02e1346d 100644 --- a/datapath/dp_act.c +++ b/datapath/dp_act.c @@ -19,8 +19,6 @@ #include "nicira-ext.h" #include "nx_act.h" -static int make_writable(struct sk_buff **); - static uint16_t validate_output(struct datapath *dp, const struct sw_flow_key *key, @@ -341,7 +339,7 @@ validate_vendor(struct datapath *dp, const struct sw_flow_key *key, switch(ntohl(avh->vendor)) { case NX_VENDOR_ID: - ret = nx_validate_act(dp, key, avh, len); + ret = nx_validate_act(dp, key, (struct nx_action_header *)avh, len); break; default: @@ -426,7 +424,7 @@ execute_vendor(struct sk_buff *skb, const struct sw_flow_key *key, switch(ntohl(avh->vendor)) { case NX_VENDOR_ID: - skb = nx_execute_act(skb, key, avh); + skb = nx_execute_act(skb, key, (struct nx_action_header *)avh); break; default: @@ -501,7 +499,7 @@ void execute_actions(struct datapath *dp, struct sk_buff *skb, /* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to * the copy. * Returns 1 if successful, 0 on failure. */ -static int +int make_writable(struct sk_buff **pskb) { /* Based on skb_make_writable() in net/netfilter/core.c. */ diff --git a/datapath/dp_act.h b/datapath/dp_act.h index a93d20fe..d601eca0 100644 --- a/datapath/dp_act.h +++ b/datapath/dp_act.h @@ -10,5 +10,6 @@ uint16_t validate_actions(struct datapath *, const struct sw_flow_key *, void execute_actions(struct datapath *, struct sk_buff *, struct sw_flow_key *, const struct ofp_action_header *, size_t action_len, int ignore_no_fwd); +int make_writable(struct sk_buff **pskb); #endif /* dp_act.h */ diff --git a/datapath/forward.c b/datapath/forward.c index 48bec7ca..6a7fcb74 100644 --- a/datapath/forward.c +++ b/datapath/forward.c @@ -12,7 +12,9 @@ #include #include "forward.h" #include "datapath.h" +#include "nicira-ext.h" #include "dp_act.h" +#include "nx_msg.h" #include "chain.h" #include "flow.h" @@ -333,6 +335,25 @@ recv_flow(struct sw_chain *chain, const struct sender *sender, const void *msg) } } +static int +recv_vendor(struct sw_chain *chain, const struct sender *sender, + const void *msg) +{ + const struct ofp_vendor_header *ovh = msg; + + switch(ntohl(ovh->vendor)) + { + case NX_VENDOR_ID: + return nx_recv_msg(chain, sender, msg); + default: + if (net_ratelimit()) + printk("Uknown vendor: %#x\n", ntohl(ovh->vendor)); + dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, + OFPBRC_BAD_VENDOR, msg, ovh->header.length); + return -EINVAL; + } +} + /* 'msg', which is 'length' bytes long, was received across Netlink from * 'sender'. Apply it to 'chain'. */ int @@ -351,6 +372,18 @@ fwd_control_input(struct sw_chain *chain, const struct sender *sender, sizeof (struct ofp_header), recv_hello, }, + [OFPT_ECHO_REQUEST] = { + sizeof (struct ofp_header), + recv_echo_request, + }, + [OFPT_ECHO_REPLY] = { + sizeof (struct ofp_header), + recv_echo_reply, + }, + [OFPT_VENDOR] = { + sizeof (struct ofp_vendor_header), + recv_vendor, + }, [OFPT_FEATURES_REQUEST] = { sizeof (struct ofp_header), recv_features_request, @@ -374,15 +407,7 @@ fwd_control_input(struct sw_chain *chain, const struct sender *sender, [OFPT_PORT_MOD] = { sizeof (struct ofp_port_mod), recv_port_mod, - }, - [OFPT_ECHO_REQUEST] = { - sizeof (struct ofp_header), - recv_echo_request, - }, - [OFPT_ECHO_REPLY] = { - sizeof (struct ofp_header), - recv_echo_reply, - }, + } }; struct ofp_header *oh; @@ -399,8 +424,12 @@ fwd_control_input(struct sw_chain *chain, const struct sender *sender, OFPBRC_BAD_VERSION, msg, length); return -EINVAL; } - if (ntohs(oh->length) > length) + if (ntohs(oh->length) != length) { + if (net_ratelimit()) + printk("received message length wrong: %d/%d\n", + ntohs(oh->length), length); return -EINVAL; + } if (oh->type < ARRAY_SIZE(packets)) { const struct openflow_packet *pkt = &packets[oh->type]; @@ -528,4 +557,3 @@ void fwd_exit(void) { fwd_discard_all(); } - diff --git a/datapath/forward.h b/datapath/forward.h index 35457f8a..f69a8f07 100644 --- a/datapath/forward.h +++ b/datapath/forward.h @@ -30,7 +30,6 @@ int fwd_control_input(struct sw_chain *, const struct sender *, uint32_t fwd_save_skb(struct sk_buff *skb); void fwd_discard_all(void); - void fwd_exit(void); #endif /* forward.h */ diff --git a/datapath/linux-2.4/.gitignore b/datapath/linux-2.4/.gitignore index 03289061..ad178b13 100644 --- a/datapath/linux-2.4/.gitignore +++ b/datapath/linux-2.4/.gitignore @@ -20,6 +20,8 @@ /kthread.c /netlink.c /nx_act.c +/nx_act_snat.c +/nx_msg.c /random32.c /rcupdate.c /sched.c diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore index 4dd9ed73..8e8a3b54 100644 --- a/datapath/linux-2.6/.gitignore +++ b/datapath/linux-2.6/.gitignore @@ -14,6 +14,8 @@ /genetlink.c /hwtable_dummy.c /nx_act.c +/nx_act_snat.c +/nx_msg.c /random32.c /table-hash.c /table-linear.c diff --git a/datapath/linux-2.6/Kbuild.in b/datapath/linux-2.6/Kbuild.in index 36eb8716..0d2e0dd5 100644 --- a/datapath/linux-2.6/Kbuild.in +++ b/datapath/linux-2.6/Kbuild.in @@ -12,6 +12,7 @@ EXTRA_CFLAGS := -DVERSION=\"$(VERSION)\" EXTRA_CFLAGS += -I$(srcdir)/.. EXTRA_CFLAGS += -I$(builddir)/.. EXTRA_CFLAGS += -I$(top_srcdir)/include +EXTRA_CFLAGS += @SUPPORT_SNAT@ # These include directories have to go before -I$(KSRC)/include. # NOSTDINC_FLAGS just happens to be a variable that goes in the diff --git a/datapath/nx_act.c b/datapath/nx_act.c index 01aca734..07e03f30 100644 --- a/datapath/nx_act.c +++ b/datapath/nx_act.c @@ -1,26 +1,46 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008 The Board of Trustees of The Leland - * Stanford Junior University + * Copyright (c) 2008 Nicira Networks */ /* Functions for Nicira-extended actions. */ #include "nicira-ext.h" +#include "dp_act.h" #include "nx_act.h" +#include "nx_act_snat.h" uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh, uint16_t len) + const struct nx_action_header *nah, uint16_t len) { - /* Nothing to validate yet */ + if (len < sizeof *nah) + return OFPBAC_BAD_LEN; + +#ifdef SUPPORT_SNAT + if (nah->subtype == ntohs(NXAST_SNAT)) { + struct nx_action_snat *nas = (struct nx_action_snat *)nah; + if (len != sizeof(*nas)) + return OFPBAC_BAD_LEN; + else if (ntohs(nas->port) >= OFPP_MAX) + return OFPBAC_BAD_ARGUMENT; + + return ACT_VALIDATION_OK; + } +#endif return OFPBAC_BAD_VENDOR_TYPE; } struct sk_buff * nx_execute_act(struct sk_buff *skb, const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh) + const struct nx_action_header *nah) { - /* Nothing to execute yet */ +#ifdef SUPPORT_SNAT + if (nah->subtype == ntohs(NXAST_SNAT)) { + struct nx_action_snat *nas = (struct nx_action_snat *)nah; + snat_skb(skb->dev->br_port->dp, skb, ntohs(nas->port)); + } +#endif + return skb; } diff --git a/datapath/nx_act.h b/datapath/nx_act.h index fe398823..6dda65dd 100644 --- a/datapath/nx_act.h +++ b/datapath/nx_act.h @@ -5,10 +5,10 @@ uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh, uint16_t len); + const struct nx_action_header *nah, uint16_t len); struct sk_buff *nx_execute_act(struct sk_buff *skb, const struct sw_flow_key *key, - const struct ofp_action_vendor_header *avh); + const struct nx_action_header *nah); #endif /* nx_act.h */ diff --git a/datapath/nx_act_snat.c b/datapath/nx_act_snat.c new file mode 100644 index 00000000..0bb98e02 --- /dev/null +++ b/datapath/nx_act_snat.c @@ -0,0 +1,540 @@ +#ifdef SUPPORT_SNAT +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2008 Nicira Networks + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "forward.h" +#include "dp_act.h" +#include "nx_act_snat.h" + + +/* We need these fake structures to make netfilter happy -- + * lots of places assume that skb->dst != NULL, which isn't + * all that unreasonable. + * + * Currently, we fill in the PMTU entry because netfilter + * refragmentation needs it, and the rt_flags entry because + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. */ +static struct net_device __fake_net_device = { + .hard_header_len = ETH_HLEN +}; + +static struct rtable __fake_rtable = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .dev = &__fake_net_device, + .path = &__fake_rtable.u.dst, + .metrics = {[RTAX_MTU - 1] = 1500}, + .flags = DST_NOXFRM, + } + }, + .rt_flags = 0, +}; + +/* Define ARP for IP since the Linux headers don't do it cleanly. */ +struct ip_arphdr { + uint16_t ar_hrd; + uint16_t ar_pro; + uint8_t ar_hln; + uint8_t ar_pln; + uint16_t ar_op; + uint8_t ar_sha[ETH_ALEN]; + uint32_t ar_sip; + uint8_t ar_tha[ETH_ALEN]; + uint32_t ar_tip; +} __attribute__((packed)); +OFP_ASSERT(sizeof(struct ip_arphdr) == 28); + + +/* Push the Ethernet header back on and tranmit the packet. */ +static int +dp_xmit_skb_push(struct sk_buff *skb) +{ + skb_push(skb, ETH_HLEN); + return dp_xmit_skb(skb); +} + +/* Perform maintainence related to a SNAT'd interface. Currently, this only + * checks whether MAC->IP bindings have expired. + * + * Called with the RCU read lock */ +void +snat_maint(struct net_bridge_port *p) +{ + struct snat_conf *sc; + struct snat_mapping *m, *n; + unsigned long flags; + unsigned long timeout; + + spin_lock_irqsave(&p->lock, flags); + sc = p->snat; + if (!sc) + goto done; + + timeout = sc->mac_timeout * HZ; + + list_for_each_entry_safe (m, n, &sc->mappings, node) { + if (time_after(jiffies, m->used + timeout)) { + list_del(&m->node); + kfree(m); + } + } + +done: + spin_unlock_irqrestore(&p->lock, flags); +} + +/* Check whether destination IP's address is in the IP->MAC mappings. + * If it is, then overwrite the destination MAC with the value from the + * cache. + * + * Returns -1 if there is a problem, otherwise 0. */ +static int +dnat_mac(struct net_bridge_port *p, struct sk_buff *skb) +{ + struct snat_conf *sc; + struct iphdr *iph = ip_hdr(skb); + struct ethhdr *eh = eth_hdr(skb); + struct snat_mapping *m; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + sc = p->snat; + if (!sc) { + spin_unlock_irqrestore(&p->lock, flags); + return -EINVAL; + } + + if (skb->protocol != htons(ETH_P_IP)) { + spin_unlock_irqrestore(&p->lock, flags); + return 0; + } + + list_for_each_entry (m, &sc->mappings, node) { + if (m->ip_addr == iph->daddr){ + /* Found it! */ + if (!make_writable(&skb)) { + if (net_ratelimit()) + printk("make_writable failed\n"); + spin_unlock_irqrestore(&p->lock, flags); + return -EINVAL; + } + m->used = jiffies; + memcpy(eh->h_dest, m->hw_addr, ETH_ALEN); + break; + } + } + + spin_unlock_irqrestore(&p->lock, flags); + return 0; +} + +static int +snat_pre_route_finish(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + + /* If SNAT is configured for this input device, check the IP->MAC + * mappings to see if we should update the destination MAC. */ + if (p->snat) + dnat_mac(skb->dev->br_port, skb); + + return 0; +} + +/* Checks whether 'skb' is an ARP request for an SNAT'd interface. If + * so, it will generate a response. + * + * Returns 0 if the packet was not handled. Otherwise, -1 is returned + * and the caller is responsible for freeing 'skb'. */ +static int +handle_arp_snat(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + struct ip_arphdr *ah = (struct ip_arphdr *)arp_hdr(skb); + uint32_t ip_addr; + unsigned long flags; + struct snat_conf *sc; + + if ((ah->ar_op != htons(ARPOP_REQUEST)) + || ah->ar_hln != ETH_ALEN + || ah->ar_pro != htons(ETH_P_IP) + || ah->ar_pln != 4) + return 0; + + ip_addr = ntohl(ah->ar_tip); + spin_lock_irqsave(&p->lock, flags); + sc = p->snat; + + /* We're only interested in addresses we rewrite. */ + if (!sc || (sc && ((ip_addr < sc->ip_addr_start) + || (ip_addr > sc->ip_addr_end)))) { + spin_unlock_irqrestore(&p->lock, flags); + return 0; + } + spin_unlock_irqrestore(&p->lock, flags); + + arp_send(ARPOP_REPLY, ETH_P_ARP, ah->ar_sip, skb->dev, ah->ar_tip, + ah->ar_sha, p->dp->netdev->dev_addr, ah->ar_sha); + + return -1; +} + +/* Checks whether 'skb' is a ping request for an SNAT'd interface. If + * so, it will generate a response. + * + * Returns 0 if the packet was not handled. Otherwise, -1 is returned + * and the caller is responsible for freeing 'skb'. */ +static int +handle_icmp_snat(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + struct snat_conf *sc; + struct ethhdr *eh; + struct iphdr *iph = ip_hdr(skb); + uint32_t ip_addr; + struct icmphdr *icmph; + unsigned int datalen; + uint8_t tmp_eth[ETH_ALEN]; + uint32_t tmp_ip; + struct sk_buff *nskb; + unsigned long flags; + + + ip_addr = ntohl(iph->daddr); + spin_lock_irqsave(&p->lock, flags); + sc = p->snat; + + /* We're only interested in addresses we rewrite. */ + if (!sc || (sc && ((ip_addr < sc->ip_addr_start) + || (ip_addr > sc->ip_addr_end)))) { + spin_unlock_irqrestore(&p->lock, flags); + return 0; + } + spin_unlock_irqrestore(&p->lock, flags); + + icmph = (struct icmphdr *) ((u_int32_t *)iph + iph->ihl); + datalen = skb->len - iph->ihl * 4; + + /* Drop fragments and packets not long enough to hold the ICMP + * header. */ + if (((ntohs(iph->frag_off) & IP_OFFSET) != 0) || datalen < 4) + return 0; + + /* We only respond to echo requests to our address. Continue + * processing replies and other ICMP messages since they may be + * intended for NAT'd hosts. */ + if (icmph->type != ICMP_ECHO) + return 0; + + /* Send an echo reply in response */ + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) { + if (net_ratelimit()) + printk("skb copy failed for icmp reply\n"); + return -1; + } + + eh = eth_hdr(nskb); + iph = ip_hdr(nskb); + icmph = (struct icmphdr *) ((u_int32_t *)iph + iph->ihl); + + tmp_ip = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_ip; + + memcpy(tmp_eth, eh->h_dest, ETH_ALEN); + memcpy(eh->h_dest, eh->h_source, ETH_ALEN); + memcpy(eh->h_source, tmp_eth, ETH_ALEN); + + icmph->type = ICMP_ECHOREPLY; + + dp_xmit_skb_push(nskb); + + return -1; +} + +/* Check if any SNAT maintenance needs to be done on 'skb' before it's + * checked against the datapath's tables. This includes DNAT + * modification based on prior SNAT action and responding to ARP and + * echo requests for the SNAT interface. + * + * Returns 0 if 'skb' should continue to be processed by the caller. + * Returns -1 if the packet was handled, and the caller should free + * 'skb'. + */ +int +snat_pre_route(struct sk_buff *skb) +{ + struct iphdr *iph; + int len; + + if (skb->protocol == htons(ETH_P_ARP)) + return handle_arp_snat(skb); + else if (skb->protocol != htons(ETH_P_IP)) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + goto ipv4_error; + + if (!pskb_may_pull(skb, iph->ihl*4)) + goto ipv4_error; + + /* Check if we need to echo reply for this address */ + if ((iph->protocol == IPPROTO_ICMP) && (handle_icmp_snat(skb))) + return -1; + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto ipv4_error; + + len = ntohs(iph->tot_len); + if ((skb->len < len) || len < (iph->ihl*4)) + goto ipv4_error; + + if (pskb_trim_rcsum(skb, len)) + goto ipv4_error; + + skb->dst = (struct dst_entry *)&__fake_rtable; + dst_hold(skb->dst); + + return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + snat_pre_route_finish); + +ipv4_error: + return -1; +} + + +static int +snat_skb_finish(struct sk_buff *skb) +{ + NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, + dp_xmit_skb_push); + + return 0; +} + +/* Update the MAC->IP mappings for the private side of the SNAT'd + * interface. */ +static void +update_mapping(struct net_bridge_port *p, struct sk_buff *skb) +{ + unsigned long flags; + struct snat_conf *sc; + struct iphdr *iph = ip_hdr(skb); + struct ethhdr *eh = eth_hdr(skb); + struct snat_mapping *m; + + spin_lock_irqsave(&p->lock, flags); + sc = p->snat; + if (!sc) + goto done; + + list_for_each_entry (m, &sc->mappings, node) { + if (m->ip_addr == iph->saddr){ + if (memcmp(m->hw_addr, eh->h_source, ETH_ALEN)) { + memcpy(m->hw_addr, eh->h_source, ETH_ALEN); + } + m->used = jiffies; + goto done; + } + } + + m = kmalloc(sizeof *m, GFP_ATOMIC); + m->ip_addr = iph->saddr; + memcpy(m->hw_addr, eh->h_source, ETH_ALEN); + m->used = jiffies; + + list_add(&m->node, &sc->mappings); + +done: + spin_unlock_irqrestore(&p->lock, flags); +} + +/* Perform SNAT modification on 'skb' and send out 'out_port'. If the + * port was not configured for SNAT, it will be sent through the interface + * unmodified. 'skb' is not consumed, so caller will need to free it. + */ +void +snat_skb(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct net_bridge_port *p = dp->ports[out_port]; + struct sk_buff *nskb; + + if (!p) + return; + + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + return; + + nskb->dev = p->dev; + + /* We only SNAT IP, so just send it on its way if not */ + if (skb->protocol != htons(ETH_P_IP)) { + dp_xmit_skb(nskb); + return; + } + + /* Set the source MAC to the OF interface */ + memcpy(eth_hdr(nskb)->h_source, dp->netdev->dev_addr, ETH_ALEN); + + update_mapping(p, skb); + + /* Take the Ethernet header back off for netfilter hooks. */ + skb_pull(nskb, ETH_HLEN); + + NF_HOOK(PF_INET, NF_IP_FORWARD, nskb, skb->dev, nskb->dev, + snat_skb_finish); +} + +/* Remove SNAT configuration on port 'p'. + * + * NB: The caller must hold the port's spinlock. */ +int +snat_free_conf(struct net_bridge_port *p) +{ + struct snat_conf *sc = p->snat; + + if (!sc) + return -EINVAL; + + /* Free existing mapping entries */ + while (!list_empty(&sc->mappings)) { + struct snat_mapping *m = list_entry(sc->mappings.next, + struct snat_mapping, node); + list_del(&m->node); + kfree(m); + } + + kfree(p->snat); + p->snat = NULL; + + return 0; +} + +/* Remove SNAT configuration from an interface. */ +static int +snat_del_port(struct datapath *dp, uint16_t port) +{ + unsigned long flags; + struct net_bridge_port *p = dp->ports[port]; + + if (!p) { + if (net_ratelimit()) + printk("Attempt to remove snat on non-existent port: %d\n", port); + return -EINVAL; + } + + spin_lock_irqsave(&p->lock, flags); + if (snat_free_conf(p)) { + /* SNAT not configured on this port */ + spin_unlock_irqrestore(&p->lock, flags); + if (net_ratelimit()) + printk("Attempt to remove snat on non-snat port: %d\n", port); + return -EINVAL; + } + + spin_unlock_irqrestore(&p->lock, flags); + + return 0; +} + +/* Add SNAT configuration to an interface. */ +static int +snat_add_port(struct datapath *dp, uint16_t port, + uint32_t ip_addr_start, uint32_t ip_addr_end, + uint16_t mac_timeout) +{ + unsigned long flags; + struct net_bridge_port *p = dp->ports[port]; + struct snat_conf *sc; + + + if (mac_timeout == 0) + mac_timeout = MAC_TIMEOUT_DEFAULT; + + if (!p) { + if (net_ratelimit()) + printk("Attempt to add snat on non-existent port: %d\n", port); + return -EINVAL; + } + + /* If SNAT is already configured on the port, check whether the same + * IP addresses are used. If so, just update the mac timeout + * configuration. Otherwise, drop all SNAT configuration and + * reconfigure it. */ + spin_lock_irqsave(&p->lock, flags); + if (p->snat) { + if ((p->snat->ip_addr_start == ip_addr_start) + && (p->snat->ip_addr_end = ip_addr_end)) { + p->snat->mac_timeout = mac_timeout; + spin_unlock_irqrestore(&p->lock, flags); + return 0; + } + + /* Free the existing configuration and mappings. */ + snat_free_conf(p); + } + + sc = kzalloc(sizeof *sc, GFP_ATOMIC); + if (!sc) { + spin_unlock_irqrestore(&p->lock, flags); + return -ENOMEM; + } + + sc->ip_addr_start = ip_addr_start; + sc->ip_addr_end = ip_addr_end; + sc->mac_timeout = mac_timeout; + INIT_LIST_HEAD(&sc->mappings); + + p->snat = sc; + spin_unlock_irqrestore(&p->lock, flags); + + return 0; +} + +/* Handle a SNAT configuration message. + * + * Returns 0 if no problems are found. Otherwise, a negative errno. */ +int +snat_mod_config(struct datapath *dp, const struct nx_act_config *nac) +{ + int n_entries = (ntohs(nac->header.header.length) - sizeof *nac) + / sizeof (struct nx_snat_config); + int ret = 0; + int i; + + for (i=0; isnat[i]; + uint16_t port = ntohs(sc->port); + int r = 0; + + if (sc->command == NXSC_ADD) + r = snat_add_port(dp, port, + ntohl(sc->ip_addr_start), ntohl(sc->ip_addr_end), + ntohs(sc->mac_timeout)); + else + r = snat_del_port(dp, port); + + if (r) + ret = r; + } + + return ret; +} +#endif diff --git a/datapath/nx_act_snat.h b/datapath/nx_act_snat.h new file mode 100644 index 00000000..1e549d8a --- /dev/null +++ b/datapath/nx_act_snat.h @@ -0,0 +1,38 @@ +#ifdef SUPPORT_SNAT +#ifndef ACT_SNAT_H +#define ACT_SNAT_H + +#include +#include +#include + +#include "nicira-ext.h" +#include "datapath.h" + +/* Cache of IP->MAC mappings on the side hidden by the SNAT */ +struct snat_mapping { + struct list_head node; + uint32_t ip_addr; /* Stored in network-order */ + uint8_t hw_addr[ETH_ALEN]; + unsigned long used; /* Last used time (in jiffies). */ + + struct rcu_head rcu; +}; + +struct snat_conf { + uint32_t ip_addr_start; /* Stored in host-order */ + uint32_t ip_addr_end; /* Stored in host-order */ + uint16_t mac_timeout; + struct list_head mappings; /* List of snat_mapping entries */ +}; + +#define MAC_TIMEOUT_DEFAULT 120 + +int snat_pre_route(struct sk_buff *skb); +void snat_skb(struct datapath *dp, struct sk_buff *skb, int out_port); +void snat_maint(struct net_bridge_port *p); +int snat_mod_config(struct datapath *, const struct nx_act_config *); +int snat_free_conf(struct net_bridge_port *p); + +#endif +#endif diff --git a/datapath/nx_msg.c b/datapath/nx_msg.c new file mode 100644 index 00000000..19bdefde --- /dev/null +++ b/datapath/nx_msg.c @@ -0,0 +1,41 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2008 Nicira Networks + */ + +#include "chain.h" +#include "datapath.h" +#include "nicira-ext.h" +#include "nx_act_snat.h" +#include "nx_msg.h" + + +int +nx_recv_msg(struct sw_chain *chain, const struct sender *sender, + const void *msg) +{ + const struct nicira_header *nh = msg; + + switch (ntohl(nh->subtype)) { +#ifdef SUPPORT_SNAT + case NXT_ACT_SET_CONFIG: { + const struct nx_act_config *nac = msg; + if (ntohs(nh->header.length) < sizeof(*nac)) + return -EINVAL; + + if (nac->type == htons(NXAST_SNAT)) + return snat_mod_config(chain->dp, nac); + else + return -EINVAL; + break; + } +#endif + + default: + dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST, + OFPBRC_BAD_SUBTYPE, msg, nh->header.length); + return -EINVAL; + } + + return -EINVAL; +} diff --git a/datapath/nx_msg.h b/datapath/nx_msg.h new file mode 100644 index 00000000..1eb1bb39 --- /dev/null +++ b/datapath/nx_msg.h @@ -0,0 +1,7 @@ +#ifndef NX_MSG_H +#define NX_MSG_H 1 + +int nx_recv_msg(struct sw_chain *chain, const struct sender *sender, + const void *msg); + +#endif /* nx_msg.h */ diff --git a/include/nicira-ext.h b/include/nicira-ext.h index 6e16fafc..4c5604e7 100644 --- a/include/nicira-ext.h +++ b/include/nicira-ext.h @@ -1,34 +1,6 @@ -/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2008 Nicira Networks */ #ifndef NICIRA_EXT_H @@ -50,14 +22,89 @@ enum nicira_type { /* Switch status reply. The reply body is an ASCII string of key-value * pairs in the form "key=value\n". */ - NXT_STATUS_REPLY + NXT_STATUS_REPLY, + + /* Configure an action. Most actions do not require configuration + * beyond that supplied in the actual action call. */ + NXT_ACT_SET_CONFIG, + + /* Get configuration of action. */ + NXT_ACT_GET_CONFIG }; struct nicira_header { struct ofp_header header; - uint32_t vendor_id; /* NX_VENDOR_ID. */ + uint32_t vendor; /* NX_VENDOR_ID. */ uint32_t subtype; /* One of NXT_* above. */ }; -OFP_ASSERT(sizeof(struct nicira_header) == sizeof(struct ofp_vendor) + 4); +OFP_ASSERT(sizeof(struct nicira_header) == sizeof(struct ofp_vendor_header) + 4); + + +enum nx_snat_command { + NXSC_ADD, + NXSC_DELETE +}; + +/* Configuration for source-NATing */ +struct nx_snat_config { + uint8_t command; /* One of NXSC_*. */ + uint8_t pad[3]; + uint16_t port; /* Physical switch port. */ + uint16_t mac_timeout; /* Time to cache MAC addresses of SNAT'd hosts + in seconds. 0 uses the default value. */ + + /* Range of IP addresses to impersonate. Set both values to the + * same to support a single address. */ + uint32_t ip_addr_start; + uint32_t ip_addr_end; + + /* Range of transport ports that should be used as new source port. A + * value of zero, let's the switch choose.*/ + uint16_t tcp_start; + uint16_t tcp_end; + uint16_t udp_start; + uint16_t udp_end; +}; +OFP_ASSERT(sizeof(struct nx_snat_config) == 24); + +/* Action configuration. Not all actions require separate configuration. */ +struct nx_act_config { + struct nicira_header header; + uint16_t type; /* One of OFPAT_* */ + uint8_t pad[2]; + union { + struct nx_snat_config snat[0]; + }; /* Array of action configurations. The number + is inferred from the length field in the + header. */ +}; +OFP_ASSERT(sizeof(struct nx_act_config) == 20); + + +enum nx_action_subtype { + NXAST_SNAT /* Source-NAT */ +}; + +/* Action structure for NXAST_SNAT. */ +struct nx_action_snat { + uint16_t type; /* OFPAT_VENDOR. */ + uint16_t len; /* Length is 8. */ + uint32_t vendor; /* NX_VENDOR_ID. */ + uint16_t subtype; /* NXAST_SNAT. */ + uint16_t port; /* Output port--it must be previously + configured. */ + uint8_t pad[4]; +}; +OFP_ASSERT(sizeof(struct nx_action_snat) == 16); + +/* Header for Nicira-defined actions. */ +struct nx_action_header { + uint16_t type; /* OFPAT_VENDOR. */ + uint16_t len; /* Length is 8. */ + uint32_t vendor; /* NX_VENDOR_ID. */ + uint16_t subtype; /* NXAST_*. */ + uint8_t pad[6]; +}; +OFP_ASSERT(sizeof(struct nx_action_header) == 16); #endif /* nicira-ext.h */ diff --git a/include/openflow.h b/include/openflow.h index a1ca791c..c45b926c 100644 --- a/include/openflow.h +++ b/include/openflow.h @@ -572,8 +572,9 @@ enum ofp_bad_request_code { OFPBRC_BAD_VERSION, /* ofp_header.version not supported. */ OFPBRC_BAD_TYPE, /* ofp_header.type not supported. */ OFPBRC_BAD_STAT, /* ofp_stats_request.type not supported. */ - OFPBRC_BAD_VENDOR /* Vendor not supported (in ofp_vendor or + OFPBRC_BAD_VENDOR, /* Vendor not supported (in ofp_vendor or * ofp_stats_request or ofp_stats_reply). */ + OFPBRC_BAD_SUBTYPE /* Vendor subtype not supported. */ }; /* ofp_error_msg 'code' values for OFPET_BAD_ACTION. 'data' contains at least @@ -583,7 +584,8 @@ enum ofp_bad_action_code { OFPBAC_BAD_LEN, /* Length problem in actions. */ OFPBAC_BAD_VENDOR, /* Unknown vendor id specified. */ OFPBAC_BAD_VENDOR_TYPE, /* Unknown action type for vendor id. */ - OFPBAC_BAD_OUT_PORT /* Problem validating output action. */ + OFPBAC_BAD_OUT_PORT, /* Problem validating output action. */ + OFPBAC_BAD_ARGUMENT /* Bad action argument. */ }; /* OFPT_ERROR: Error message (datapath -> controller). */ @@ -746,7 +748,7 @@ struct ofp_port_stats { OFP_ASSERT(sizeof(struct ofp_port_stats) == 104); /* Vendor extension. */ -struct ofp_vendor { +struct ofp_vendor_header { struct ofp_header header; /* Type OFPT_VENDOR. */ uint32_t vendor; /* Vendor ID: * - MSB 0: low-order bytes are IEEE OUI. @@ -754,6 +756,6 @@ struct ofp_vendor { * consortium. */ /* Vendor-defined arbitrary additional data. */ }; -OFP_ASSERT(sizeof(struct ofp_vendor) == 12); +OFP_ASSERT(sizeof(struct ofp_vendor_header) == 12); #endif /* openflow.h */ diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 8f3dbe9d..9dcb8b1b 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -48,6 +48,7 @@ #include "flow.h" #include "ofpbuf.h" #include "openflow.h" +#include "nicira-ext.h" #include "packets.h" #include "util.h" @@ -243,6 +244,25 @@ static void ofp_print_port_name(struct ds *string, uint16_t port) ds_put_cstr(string, name); } +static void +ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah) +{ + + if (nah->subtype == htonl(NXAST_SNAT)) { + const struct nx_action_snat *nas = (struct nx_action_snat *)nah; + uint16_t port = ntohs(nas->port); + + if (port < OFPP_MAX) { + ds_put_format(string, "nat:%"PRIu16, port); + } else { + ds_put_format(string, "nat:%"PRIu16" (invalid port)", port); + } + } else { + ds_put_format(string, "***unknown Nicira action:%d***\n", + ntohl(nah->subtype)); + } +} + static int ofp_print_action(struct ds *string, const struct ofp_action_header *ah, size_t actions_len) @@ -408,7 +428,11 @@ ofp_print_action(struct ds *string, const struct ofp_action_header *ah, ds_put_format(string, "***ofpat_vendor truncated***\n"); return -1; } - ds_put_format(string, "vendor action:0x%x", ntohl(avh->vendor)); + if (avh->vendor == htonl(NX_VENDOR_ID)) { + ofp_print_nx_action(string, (struct nx_action_header *)avh); + } else { + ds_put_format(string, "vendor action:0x%x", ntohl(avh->vendor)); + } break; } diff --git a/m4/libopenflow.m4 b/m4/libopenflow.m4 index 5c8520d2..adee69ee 100644 --- a/m4/libopenflow.m4 +++ b/m4/libopenflow.m4 @@ -89,6 +89,25 @@ AC_DEFUN([OFP_CHECK_OPENSSL], AC_DEFINE([HAVE_OPENSSL], [1], [Define to 1 if OpenSSL is installed.]) fi]) +dnl Checks for --enable-snat and defines SUPPORT_SNAT if it is specified. +AC_DEFUN([OFP_CHECK_SNAT], + [AC_ARG_ENABLE( + [snat], + [AC_HELP_STRING([--enable-snat], + [Enable support for source-NAT action])], + [case "${enableval}" in + (yes) snat=true ;; + (no) snat=false ;; + (*) AC_MSG_ERROR([bad value ${enableval} for --enable-snat]) ;; + esac], + [snat=false]) + AM_CONDITIONAL([SUPPORT_SNAT], [test x$snat = xtrue]) + if test x$snat = xtrue; then + AC_DEFINE([SUPPORT_SNAT], [1], [Define to 1 if SNAT is desired.]) + SUPPORT_SNAT=-DSUPPORT_SNAT + AC_SUBST([SUPPORT_SNAT]) + fi]) + dnl Checks for libraries needed by lib/fault.c. AC_DEFUN([OFP_CHECK_FAULT_LIBS], [AC_CHECK_LIB([dl], [dladdr], [FAULT_LIBS=-ldl]) @@ -106,6 +125,7 @@ AC_DEFUN([OFP_CHECK_LIBOPENFLOW], AC_REQUIRE([OFP_CHECK_NDEBUG]) AC_REQUIRE([OFP_CHECK_NETLINK]) AC_REQUIRE([OFP_CHECK_OPENSSL]) + AC_REQUIRE([OFP_CHECK_SNAT]) AC_REQUIRE([OFP_CHECK_FAULT_LIBS]) AC_REQUIRE([OFP_CHECK_SOCKET_LIBS])]) diff --git a/secchan/secchan.c b/secchan/secchan.c index 2fcd3f04..516cfde5 100644 --- a/secchan/secchan.c +++ b/secchan/secchan.c @@ -198,6 +198,8 @@ static void port_watcher_set_flags(struct port_watcher *, int port_no, uint32_t config, uint32_t c_mask, uint32_t state, uint32_t s_mask); +static struct hook snat_hook_create(struct port_watcher *pw); + static struct hook stp_hook_create(const struct settings *, struct port_watcher *, struct rconn *local, struct rconn *remote); @@ -291,6 +293,7 @@ main(int argc, char *argv[]) /* Set up hooks. */ hooks[n_hooks++] = port_watcher_create(local_rconn, remote_rconn, &pw); discovery = s.discovery ? discovery_init(&s, pw, switch_status) : NULL; + hooks[n_hooks++] = snat_hook_create(pw); if (s.enable_stp) { hooks[n_hooks++] = stp_hook_create(&s, pw, local_rconn, remote_rconn); } @@ -1050,6 +1053,254 @@ port_watcher_create(struct rconn *local_rconn, struct rconn *remote_rconn, port_watcher_wait_cb, pw); } +struct snat_port_conf { + struct list node; + struct nx_snat_config config; +}; + +struct snat_data { + struct port_watcher *pw; + struct list port_list; +}; + + +/* Source-NAT configuration monitor. */ +#define SNAT_CMD_LEN 1024 + +/* Commands to configure iptables. There is no programmatic interface + * to iptables from the kernel, so we're stuck making command-line calls + * in user-space. */ +#define SNAT_FLUSH_ALL_CMD "/sbin/iptables -t nat -F" +#define SNAT_FLUSH_CHAIN_CMD "/sbin/iptables -t nat -F of-snat-%s" + +#define SNAT_ADD_CHAIN_CMD "/sbin/iptables -t nat -N of-snat-%s" +#define SNAT_CONF_CHAIN_CMD "/sbin/iptables -t nat -A POSTROUTING -o %s -j of-snat-%s" + +#define SNAT_ADD_IP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT --to %s-%s" +#define SNAT_ADD_TCP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p TCP --to %s-%s:%d-%d" +#define SNAT_ADD_UDP_CMD "/sbin/iptables -t nat -A of-snat-%s -j SNAT -p UDP --to %s-%s:%d-%d" + +#define SNAT_UNSET_CHAIN_CMD "/sbin/iptables -t nat -D POSTROUTING -o %s -j of-snat-%s" +#define SNAT_DEL_CHAIN_CMD "/sbin/iptables -t nat -X of-snat-%s" + +static void +snat_add_rules(const struct nx_snat_config *sc, const uint8_t *dev_name) +{ + char command[SNAT_CMD_LEN]; + char ip_str_start[16]; + char ip_str_end[16]; + + + snprintf(ip_str_start, sizeof ip_str_start, IP_FMT, + IP_ARGS(&sc->ip_addr_start)); + snprintf(ip_str_end, sizeof ip_str_end, IP_FMT, + IP_ARGS(&sc->ip_addr_end)); + + /* We always attempt to remove existing entries, so that we know + * there's a pristine state for SNAT on the interface. We just ignore + * the results of these calls, since iptables will complain about + * any non-existent entries. */ + + /* Flush the chain that does the SNAT. */ + snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name); + system(command); + + /* We always try to create the a new chain. */ + snprintf(command, sizeof(command), SNAT_ADD_CHAIN_CMD, dev_name); + system(command); + + /* Disassociate any old SNAT chain from the POSTROUTING chain. */ + snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name, + dev_name); + system(command); + + /* Associate the new chain with the POSTROUTING hook. */ + snprintf(command, sizeof(command), SNAT_CONF_CHAIN_CMD, dev_name, + dev_name); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem flushing chain for add"); + return; + } + + /* If configured, restrict TCP source port ranges. */ + if ((sc->tcp_start != 0) && (sc->tcp_end != 0)) { + snprintf(command, sizeof(command), SNAT_ADD_TCP_CMD, + dev_name, ip_str_start, ip_str_end, + ntohs(sc->tcp_start), ntohs(sc->tcp_end)); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem adding TCP rule"); + return; + } + } + + /* If configured, restrict UDP source port ranges. */ + if ((sc->udp_start != 0) && (sc->udp_end != 0)) { + snprintf(command, sizeof(command), SNAT_ADD_UDP_CMD, + dev_name, ip_str_start, ip_str_end, + ntohs(sc->udp_start), ntohs(sc->udp_end)); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem adding UDP rule"); + return; + } + } + + /* Add a rule that covers all IP traffic that would not be covered + * by the prior TCP or UDP ranges. */ + snprintf(command, sizeof(command), SNAT_ADD_IP_CMD, + dev_name, ip_str_start, ip_str_end); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem adding base rule"); + return; + } +} + +static void +snat_del_rules(const uint8_t *dev_name) +{ + char command[SNAT_CMD_LEN]; + + /* Flush the chain that does the SNAT. */ + snprintf(command, sizeof(command), SNAT_FLUSH_CHAIN_CMD, dev_name); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem flushing chain for deletion"); + return; + } + + /* Disassociate the SNAT chain from the POSTROUTING chain. */ + snprintf(command, sizeof(command), SNAT_UNSET_CHAIN_CMD, dev_name, + dev_name); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem unsetting chain"); + return; + } + + /* Now we can finally delete our SNAT chain. */ + snprintf(command, sizeof(command), SNAT_DEL_CHAIN_CMD, dev_name); + if (system(command) != 0) { + VLOG_ERR("SNAT: problem deleting chain"); + return; + } +} + +static void +snat_config(const struct nx_snat_config *sc, struct snat_data *snat) +{ + int idx; + struct port_watcher *pw = snat->pw; + struct ofp_phy_port *pw_opp; + struct snat_port_conf *c, *spc=NULL; + uint16_t port_no; + + port_no = ntohs(sc->port); + idx = port_no_to_pw_idx(port_no); + if (idx < 0) { + return; + } + + pw_opp = &pw->ports[idx]; + if (htons(pw_opp->port_no) != port_no) { + return; + } + + LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) { + if (c->config.port == sc->port) { + spc = c; + break; + } + } + + if (sc->command == NXSC_ADD) { + if (!spc) { + spc = xmalloc(sizeof(*c)); + if (!spc) { + VLOG_ERR("SNAT: no memory for new entry"); + return; + } + list_push_back(&snat->port_list, &spc->node); + } + memcpy(&spc->config, sc, sizeof(spc->config)); + snat_add_rules(sc, pw_opp->name); + } else if (spc) { + snat_del_rules(pw_opp->name); + list_remove(&spc->node); + } +} + +static bool +snat_remote_packet_cb(struct relay *r, void *snat_) +{ + struct snat_data *snat = snat_; + struct ofpbuf *msg = r->halves[HALF_REMOTE].rxbuf; + struct nicira_header *request = msg->data; + struct nx_act_config *nac = msg->data; + int n_configs, i; + + + if (msg->size < sizeof(struct nx_act_config)) { + return false; + } + request = msg->data; + if (request->header.type != OFPT_VENDOR + || request->vendor != htonl(NX_VENDOR_ID) + || request->subtype != htonl(NXT_ACT_SET_CONFIG)) { + return false; + } + + /* We're only interested in attempts to configure SNAT */ + if (nac->type != htons(NXAST_SNAT)) { + return false; + } + + n_configs = (msg->size - sizeof *nac) / sizeof *nac->snat; + for (i=0; isnat[i], snat); + } + + return false; +} + +static void +snat_port_changed_cb(uint16_t port_no, + const struct ofp_phy_port *old, + const struct ofp_phy_port *new, + void *snat_) +{ + struct snat_data *snat = snat_; + struct snat_port_conf *c; + + /* We're only interested in ports that went away */ + if (new->port_no != htons(OFPP_NONE)) { + return; + } + + LIST_FOR_EACH(c, struct snat_port_conf, node, &snat->port_list) { + if (c->config.port == old->port_no) { + snat_del_rules(old->name); + list_remove(&c->node); + return; + } + } +} + +static struct hook +snat_hook_create(struct port_watcher *pw) +{ + int ret; + struct snat_data *snat; + + ret = system(SNAT_FLUSH_ALL_CMD); + if (ret != 0) { + VLOG_ERR("SNAT: problum flushing tables"); + } + + snat = xcalloc(1, sizeof *snat); + snat->pw = pw; + list_init(&snat->port_list); + + port_watcher_register_callback(pw, snat_port_changed_cb, snat); + return make_hook(NULL, snat_remote_packet_cb, NULL, NULL, snat); +} + /* Spanning tree protocol. */ /* Extra time, in seconds, at boot before going into fail-open, to give the @@ -1890,7 +2141,7 @@ switch_status_remote_packet_cb(struct relay *r, void *ss_) } request = msg->data; if (request->header.type != OFPT_VENDOR - || request->vendor_id != htonl(NX_VENDOR_ID) + || request->vendor != htonl(NX_VENDOR_ID) || request->subtype != htonl(NXT_STATUS_REQUEST)) { return false; } @@ -1907,7 +2158,7 @@ switch_status_remote_packet_cb(struct relay *r, void *ss_) } reply = make_openflow_xid(sizeof *reply + sr.output.length, OFPT_VENDOR, request->header.xid, &b); - reply->vendor_id = htonl(NX_VENDOR_ID); + reply->vendor = htonl(NX_VENDOR_ID); reply->subtype = htonl(NXT_STATUS_REPLY); memcpy(reply + 1, sr.output.string, sr.output.length); retval = rconn_send(rc, b, NULL); diff --git a/utilities/dpctl.c b/utilities/dpctl.c index b0d2353e..8599a167 100644 --- a/utilities/dpctl.c +++ b/utilities/dpctl.c @@ -222,6 +222,8 @@ usage(void) " dump-flows SWITCH FLOW print matching FLOWs\n" " dump-aggregate SWITCH print aggregate flow statistics\n" " dump-aggregate SWITCH FLOW print aggregate stats for FLOWs\n" + " add-snat SWITCH IFACE IP add SNAT config to IFACE\n" + " del-snat SWITCH IFACE delete SNAT config on IFACE\n" " add-flow SWITCH FLOW add flow described by FLOW\n" " add-flows SWITCH FILE add flows from FILE\n" " mod-flows SWITCH FLOW modify actions of matching FLOWs\n" @@ -451,7 +453,7 @@ do_status(const struct settings *s, int argc, char *argv[]) struct ofpbuf *b; request = make_openflow(sizeof *request, OFPT_VENDOR, &b); - request->vendor_id = htonl(NX_VENDOR_ID); + request->vendor = htonl(NX_VENDOR_ID); request->subtype = htonl(NXT_STATUS_REQUEST); if (argc > 2) { ofpbuf_put(b, argv[2], strlen(argv[2])); @@ -465,7 +467,7 @@ do_status(const struct settings *s, int argc, char *argv[]) } reply = b->data; if (reply->header.type != OFPT_VENDOR - || reply->vendor_id != ntohl(NX_VENDOR_ID) + || reply->vendor != ntohl(NX_VENDOR_ID) || reply->subtype != ntohl(NXT_STATUS_REPLY)) { ofp_print(stderr, b->data, b->size, 2); ofp_fatal(0, "bad reply"); @@ -615,6 +617,22 @@ str_to_action(char *str, struct ofp_action_header *actions, ah->type = htons(OFPAT_STRIP_VLAN); } else if (!strcasecmp(act, "output")) { port = str_to_int(arg); + } else if (!strcasecmp(act, "nat")) { + struct nx_action_snat *sa = (struct nx_action_snat *)ah; + + if (len < sizeof *sa) { + ofp_fatal(0, "Insufficient room for SNAT action\n"); + } + + if (str_to_int(arg) > OFPP_MAX) { + ofp_fatal(0, "Invalid nat port: %s\n", arg); + } + + act_len = sizeof *sa; + sa->type = htons(OFPAT_VENDOR); + sa->vendor = htonl(NX_VENDOR_ID); + sa->subtype = htons(NXAST_SNAT); + sa->port = htons(str_to_int(arg)); } else if (!strcasecmp(act, "TABLE")) { port = OFPP_TABLE; } else if (!strcasecmp(act, "NORMAL")) { @@ -853,6 +871,56 @@ static void do_dump_aggregate(const struct settings *s, int argc, dump_stats_transaction(argv[1], request); } +static void do_add_snat(const struct settings *s, int argc, char *argv[]) +{ + struct vconn *vconn; + struct ofpbuf *buffer; + struct nx_act_config *nac; + size_t size; + + /* Parse and send. */ + size = sizeof *nac + sizeof nac->snat[0]; + nac = make_openflow(size, OFPT_VENDOR, &buffer); + + nac->header.vendor = htonl(NX_VENDOR_ID); + nac->header.subtype = htonl(NXT_ACT_SET_CONFIG); + + nac->type = htons(NXAST_SNAT); + nac->snat[0].command = NXSC_ADD; + nac->snat[0].port = htons(str_to_int(argv[2])); + nac->snat[0].mac_timeout = htons(0); + str_to_ip(argv[3], &nac->snat[0].ip_addr_start); + str_to_ip(argv[3], &nac->snat[0].ip_addr_end); + + open_vconn(argv[1], &vconn); + send_openflow_buffer(vconn, buffer); + vconn_close(vconn); +} + +static void do_del_snat(const struct settings *s, int argc, char *argv[]) +{ + struct vconn *vconn; + struct ofpbuf *buffer; + struct nx_act_config *nac; + size_t size; + + /* Parse and send. */ + size = sizeof *nac + sizeof nac->snat[0]; + nac = make_openflow(size, OFPT_VENDOR, &buffer); + + nac->header.vendor = htonl(NX_VENDOR_ID); + nac->header.subtype = htonl(NXT_ACT_SET_CONFIG); + + nac->type = htons(NXAST_SNAT); + nac->snat[0].command = NXSC_DELETE; + nac->snat[0].port = htons(str_to_int(argv[2])); + nac->snat[0].mac_timeout = htons(0); + + open_vconn(argv[1], &vconn); + send_openflow_buffer(vconn, buffer); + vconn_close(vconn); +} + static void do_add_flow(const struct settings *s, int argc, char *argv[]) { struct vconn *vconn; @@ -1236,6 +1304,8 @@ static struct command all_commands[] = { { "dump-tables", 1, 1, do_dump_tables }, { "dump-flows", 1, 2, do_dump_flows }, { "dump-aggregate", 1, 2, do_dump_aggregate }, + { "add-snat", 3, 3, do_add_snat }, + { "del-snat", 2, 2, do_del_snat }, { "add-flow", 2, 2, do_add_flow }, { "add-flows", 2, 2, do_add_flows }, { "mod-flows", 2, 2, do_mod_flows },