1 /* ip_gre driver port to Linux 2.6.18 and greater */
3 #include <linux/version.h>
4 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5 #define HAVE_NETDEV_STATS
7 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
8 #define HAVE_NETDEV_HEADER_OPS
10 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
11 #define HAVE_NETDEV_NEEDED_HEADROOM
15 * Linux NET3: GRE over IP protocol decoder.
17 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
26 #include <linux/capability.h>
27 #include <linux/ethtool.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <asm/uaccess.h>
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
35 #include <linux/tcp.h>
36 #include <linux/udp.h>
37 #include <linux/if_arp.h>
38 #include <linux/mroute.h>
39 #include <linux/init.h>
40 #include <linux/in6.h>
41 #include <linux/inetdevice.h>
42 #include <linux/igmp.h>
43 #include <linux/netfilter_ipv4.h>
44 #include <linux/etherdevice.h>
45 #include <linux/if_ether.h>
50 #include <net/protocol.h>
53 #include <net/checksum.h>
54 #include <net/dsfield.h>
55 #include <net/inet_ecn.h>
57 #include <net/net_namespace.h>
58 #include <net/netns/generic.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
67 #include "openvswitch/gre.h"
69 #ifndef GRE_IOCTL_ONLY
70 #include <net/rtnetlink.h>
77 1. The most important issue is detecting local dead loops.
78 They would cause complete host lockup in transmit, which
79 would be "resolved" by stack overflow or, if queueing is enabled,
80 with infinite looping in net_bh.
82 We cannot track such dead loops during route installation,
83 it is infeasible task. The most general solutions would be
84 to keep skb->encapsulation counter (sort of local ttl),
85 and silently drop packet when it expires. It is the best
86 solution, but it supposes maintaing new variable in ALL
87 skb, even if no tunneling is used.
89 Current solution: HARD_TX_LOCK lock breaks dead loops.
93 2. Networking dead loops would not kill routers, but would really
94 kill network. IP hop limit plays role of "t->recursion" in this case,
95 if we copy it from packet being encapsulated to upper header.
96 It is very good solution, but it introduces two problems:
98 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
99 do not work over tunnels.
100 - traceroute does not work. I planned to relay ICMP from tunnel,
101 so that this problem would be solved and traceroute output
102 would even more informative. This idea appeared to be wrong:
103 only Linux complies to rfc1812 now (yes, guys, Linux is the only
104 true router now :-)), all routers (at least, in neighbourhood of mine)
105 return only 8 bytes of payload. It is the end.
107 Hence, if we want that OSPF worked or traceroute said something reasonable,
108 we should search for another solution.
110 One of them is to parse packet trying to detect inner encapsulation
111 made by our node. It is difficult or even impossible, especially,
112 taking into account fragmentation. TO be short, tt is not solution at all.
114 Current solution: The solution was UNEXPECTEDLY SIMPLE.
115 We force DF flag on tunnels with preconfigured hop limit,
116 that is ALL. :-) Well, it does not remove the problem completely,
117 but exponential growth of network traffic is changed to linear
118 (branches, that exceed pmtu are pruned) and tunnel mtu
119 fastly degrades to value <68, where looping stops.
120 Yes, it is not good if there exists a router in the loop,
121 which does not force DF, even when encapsulating packets have DF set.
122 But it is not our problem! Nobody could accuse us, we made
123 all that we could make. Even if it is your gated who injected
124 fatal route to network, even if it were you who configured
125 fatal static route: you are innocent. :-)
127 XXX: Forcing the DF flag on was done only when setting up tunnels via the
128 ioctl interface and not Netlink. Since it prevents some operations
129 and isn't very transparent I removed it. It seems nobody really
130 cared about it anyways.
131 Moral: don't create loops.
133 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
134 practically identical code. It would be good to glue them
135 together, but it is not very evident, how to make them modular.
136 sit is integral part of IPv6, ipip and gre are naturally modular.
137 We could extract common parts (hash table, ioctl etc)
138 to a separate module (ip_tunnel.c).
143 #ifndef GRE_IOCTL_ONLY
144 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
145 static struct rtnl_link_ops ipgre_tap_ops __read_mostly;
147 static int ipgre_tunnel_init(struct net_device *dev);
148 static void ipgre_tunnel_setup(struct net_device *dev);
149 static void ipgre_tap_setup(struct net_device *dev);
150 static int ipgre_tunnel_bind_dev(struct net_device *dev);
154 static int ipgre_net_id __read_mostly;
156 struct ip_tunnel *tunnels[4][HASH_SIZE];
158 struct net_device *fb_tunnel_dev;
161 /* Tunnel hash table */
171 We require exact key match i.e. if a key is present in packet
172 it will match only tunnel with the same key; if it is not present,
173 it will match only keyless tunnel.
175 All keysless packets, if not matched configured keyless tunnels
176 will match fallback tunnel.
179 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
181 #define tunnels_r_l tunnels[3]
182 #define tunnels_r tunnels[2]
183 #define tunnels_l tunnels[1]
184 #define tunnels_wc tunnels[0]
186 * Locking : hash tables are protected by RCU and a spinlock
188 static DEFINE_SPINLOCK(ipgre_lock);
190 #define for_each_ip_tunnel_rcu(start) \
191 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
193 /* Given src, dst and key, find appropriate for input tunnel. */
195 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
196 __be32 remote, __be32 local,
197 __be32 key, __be16 gre_proto)
199 struct net *net = dev_net(dev);
200 int link = dev->ifindex;
201 unsigned h0 = HASH(remote);
202 unsigned h1 = HASH(key);
203 struct ip_tunnel *t, *cand = NULL;
204 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
205 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
206 ARPHRD_ETHER : ARPHRD_IPGRE;
207 int score, cand_score = 4;
209 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
210 if (local != t->parms.iph.saddr ||
211 remote != t->parms.iph.daddr ||
212 key != t->parms.i_key ||
213 !(t->dev->flags & IFF_UP))
216 if (t->dev->type != ARPHRD_IPGRE &&
217 t->dev->type != dev_type)
221 if (t->parms.link != link)
223 if (t->dev->type != dev_type)
228 if (score < cand_score) {
234 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
235 if (remote != t->parms.iph.daddr ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
245 if (t->parms.link != link)
247 if (t->dev->type != dev_type)
252 if (score < cand_score) {
258 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
259 if ((local != t->parms.iph.saddr &&
260 (local != t->parms.iph.daddr ||
261 !ipv4_is_multicast(local))) ||
262 key != t->parms.i_key ||
263 !(t->dev->flags & IFF_UP))
266 if (t->dev->type != ARPHRD_IPGRE &&
267 t->dev->type != dev_type)
271 if (t->parms.link != link)
273 if (t->dev->type != dev_type)
278 if (score < cand_score) {
284 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
285 if (t->parms.i_key != key ||
286 !(t->dev->flags & IFF_UP))
289 if (t->dev->type != ARPHRD_IPGRE &&
290 t->dev->type != dev_type)
294 if (t->parms.link != link)
296 if (t->dev->type != dev_type)
301 if (score < cand_score) {
310 dev = ign->fb_tunnel_dev;
311 if (dev->flags & IFF_UP)
312 return netdev_priv(dev);
317 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
318 struct ip_tunnel_parm *parms)
320 __be32 remote = parms->iph.daddr;
321 __be32 local = parms->iph.saddr;
322 __be32 key = parms->i_key;
323 unsigned h = HASH(key);
328 if (remote && !ipv4_is_multicast(remote)) {
333 return &ign->tunnels[prio][h];
336 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
339 return __ipgre_bucket(ign, &t->parms);
342 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
344 struct ip_tunnel **tp = ipgre_bucket(ign, t);
346 spin_lock_bh(&ipgre_lock);
348 rcu_assign_pointer(*tp, t);
349 spin_unlock_bh(&ipgre_lock);
352 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
354 struct ip_tunnel **tp;
356 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
358 spin_lock_bh(&ipgre_lock);
360 spin_unlock_bh(&ipgre_lock);
366 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
367 struct ip_tunnel_parm *parms,
370 __be32 remote = parms->iph.daddr;
371 __be32 local = parms->iph.saddr;
372 __be32 key = parms->i_key;
373 int link = parms->link;
374 struct ip_tunnel *t, **tp;
375 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
377 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
378 if (local == t->parms.iph.saddr &&
379 remote == t->parms.iph.daddr &&
380 key == t->parms.i_key &&
381 link == t->parms.link &&
382 type == t->dev->type)
388 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
389 struct ip_tunnel_parm *parms, int gretap, int create)
391 struct ip_tunnel *t, *nt;
392 struct net_device *dev;
394 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
396 t = ipgre_tunnel_find(net, parms, gretap ? ARPHRD_ETHER : ARPHRD_IPGRE);
401 strlcpy(name, parms->name, IFNAMSIZ);
403 sprintf(name, "gre%%d");
405 dev = alloc_netdev(sizeof(*t), name, gretap ? ipgre_tap_setup
406 : ipgre_tunnel_setup);
410 dev_net_set(dev, net);
412 if (strchr(name, '%')) {
413 if (dev_alloc_name(dev, name) < 0)
418 random_ether_addr(dev->dev_addr);
420 #ifndef GRE_IOCTL_ONLY
421 dev->rtnl_link_ops = gretap ? &ipgre_tap_ops : &ipgre_link_ops;
423 nt = netdev_priv(dev);
426 dev->mtu = ipgre_tunnel_bind_dev(dev);
428 if (register_netdevice(dev) < 0)
432 ipgre_tunnel_link(ign, nt);
440 static void ipgre_tunnel_uninit(struct net_device *dev)
442 struct net *net = dev_net(dev);
443 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
445 ipgre_tunnel_unlink(ign, netdev_priv(dev));
449 static unsigned int tunnel_hard_header_len(struct net_device *dev)
451 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
452 return dev->hard_header_len;
454 return (dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0;
458 static void ipgre_err(struct sk_buff *skb, u32 info)
461 /* All the routers (except for Linux) return only
462 8 bytes of packet payload. It means, that precise relaying of
463 ICMP in the real Internet is absolutely infeasible.
465 Moreover, Cisco "wise men" put GRE key to the third word
466 in GRE header. It makes impossible maintaining even soft state for keyed
467 GRE tunnels with enabled checksum. Tell them "thank you".
469 Well, I wonder, rfc1812 was written by Cisco employee,
470 what the hell these idiots break standrads established
474 struct iphdr *iph = (struct iphdr *)skb->data;
475 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
476 int grehlen = (iph->ihl<<2) + 4;
477 const int type = icmp_hdr(skb)->type;
478 const int code = icmp_hdr(skb)->code;
482 if (skb_headlen(skb) < grehlen)
486 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
487 if (flags&(GRE_VERSION|GRE_ROUTING))
496 /* If only 8 bytes returned, keyed message will be dropped here */
497 if (skb_headlen(skb) < grehlen)
502 case ICMP_PARAMETERPROB:
505 case ICMP_DEST_UNREACH:
508 case ICMP_PORT_UNREACH:
509 /* Impossible event. */
511 case ICMP_FRAG_NEEDED:
512 /* Soft state for pmtu is maintained by IP core. */
515 /* All others are translated to HOST_UNREACH.
516 rfc2003 contains "deep thoughts" about NET_UNREACH,
517 I believe they are just ether pollution. --ANK
522 case ICMP_TIME_EXCEEDED:
523 if (code != ICMP_EXC_TTL)
529 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
531 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
533 if (t == NULL || t->parms.iph.daddr == 0 ||
534 ipv4_is_multicast(t->parms.iph.daddr))
537 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
540 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
544 t->err_time = jiffies;
550 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
552 if (INET_ECN_is_ce(iph->tos)) {
553 if (skb->protocol == htons(ETH_P_IP)) {
554 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
555 + sizeof(struct iphdr) - skb->data)))
558 IP_ECN_set_ce(ip_hdr(skb));
559 } else if (skb->protocol == htons(ETH_P_IPV6)) {
560 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
561 + sizeof(struct ipv6hdr) - skb->data)))
564 IP6_ECN_set_ce(ipv6_hdr(skb));
570 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
573 if (skb->protocol == htons(ETH_P_IP))
574 inner = old_iph->tos;
575 else if (skb->protocol == htons(ETH_P_IPV6))
576 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
577 return INET_ECN_encapsulate(tos, inner);
580 static int ipgre_rcv(struct sk_buff *skb)
588 struct ip_tunnel *tunnel;
593 if (!pskb_may_pull(skb, 16))
600 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
601 /* - Version must be 0.
602 - We do not support routing headers.
604 if (flags&(GRE_VERSION|GRE_ROUTING))
607 if (flags&GRE_CSUM) {
608 switch (skb->ip_summed) {
609 case CHECKSUM_COMPLETE:
610 csum = csum_fold(skb->csum);
616 csum = __skb_checksum_complete(skb);
617 skb->ip_summed = CHECKSUM_COMPLETE;
622 key = *(__be32*)(h + offset);
626 seqno = ntohl(*(__be32*)(h + offset));
631 gre_proto = *(__be16 *)(h + 2);
634 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
635 iph->saddr, iph->daddr, key,
637 struct net_device_stats *stats;
638 #ifdef HAVE_NETDEV_STATS
639 stats = &tunnel->dev->stats;
641 stats = &tunnel->stat;
646 skb->protocol = gre_proto;
647 /* WCCP version 1 and 2 protocol decoding.
648 * - Change protocol to IP
649 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
651 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
652 skb->protocol = htons(ETH_P_IP);
653 if ((*(h + offset) & 0xF0) != 0x40)
657 skb->mac_header = skb->network_header;
658 __pskb_pull(skb, offset);
659 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
660 skb->pkt_type = PACKET_HOST;
661 #ifdef CONFIG_NET_IPGRE_BROADCAST
662 if (ipv4_is_multicast(iph->daddr)) {
663 /* Looped back packet, drop it! */
664 if (skb_rtable(skb)->fl.iif == 0)
667 skb->pkt_type = PACKET_BROADCAST;
671 if (((flags&GRE_CSUM) && csum) ||
672 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
673 stats->rx_crc_errors++;
677 if (tunnel->parms.i_flags&GRE_SEQ) {
678 if (!(flags&GRE_SEQ) ||
679 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
680 stats->rx_fifo_errors++;
684 tunnel->i_seqno = seqno + 1;
689 /* Warning: All skb pointers will be invalidated! */
690 if (tunnel->dev->type == ARPHRD_ETHER) {
691 if (!pskb_may_pull(skb, ETH_HLEN)) {
692 stats->rx_length_errors++;
698 skb->protocol = eth_type_trans(skb, tunnel->dev);
699 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
703 stats->rx_bytes += len;
704 skb->dev = tunnel->dev;
708 skb_reset_network_header(skb);
710 /* Invalidates pointers. */
711 ipgre_ecn_decapsulate(iph, skb);
717 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
726 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
728 struct ip_tunnel *tunnel = netdev_priv(dev);
729 struct net_device_stats *stats;
730 #ifdef HAVE_NETDEV_QUEUE_STATS
731 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
733 struct iphdr *old_iph = ip_hdr(skb);
737 struct rtable *rt; /* Route to the other host */
738 struct net_device *tdev; /* Device to other host */
739 struct iphdr *iph; /* Our new IP header */
740 unsigned int max_headroom; /* The extra header space needed */
744 u8 original_protocol;
746 #ifdef HAVE_NETDEV_STATS
749 stats = &tunnel->stat;
752 /* Validate the protocol headers before we try to use them. */
753 original_protocol = skb->protocol;
754 if (skb->protocol == htons(ETH_P_IP)) {
755 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
756 + sizeof(struct iphdr) - skb->data)))
758 } else if (skb->protocol == htons(ETH_P_IPV6)) {
759 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
760 + sizeof(struct ipv6hdr) - skb->data)))
764 if (dev->type == ARPHRD_ETHER)
765 IPCB(skb)->flags = 0;
767 #ifdef HAVE_NETDEV_HEADER_OPS
768 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
770 if (dev->hard_header && dev->type == ARPHRD_IPGRE) {
773 tiph = (struct iphdr *)skb->data;
775 gre_hlen = tunnel->hlen;
776 tiph = &tunnel->parms.iph;
779 if ((dst = tiph->daddr) == 0) {
782 if (skb_dst(skb) == NULL) {
783 stats->tx_fifo_errors++;
787 if (skb->protocol == htons(ETH_P_IP)) {
788 rt = skb_rtable(skb);
789 if ((dst = rt->rt_gateway) == 0)
793 else if (skb->protocol == htons(ETH_P_IPV6)) {
794 struct in6_addr *addr6;
796 struct neighbour *neigh = skb_dst(skb)->neighbour;
801 addr6 = (struct in6_addr *)&neigh->primary_key;
802 addr_type = ipv6_addr_type(addr6);
804 if (addr_type == IPV6_ADDR_ANY) {
805 addr6 = &ipv6_hdr(skb)->daddr;
806 addr_type = ipv6_addr_type(addr6);
809 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
812 dst = addr6->s6_addr32[3];
822 if (skb->protocol == htons(ETH_P_IP))
824 else if (skb->protocol == htons(ETH_P_IPV6))
825 tos = ipv6_get_dsfield(ipv6_hdr(skb));
829 struct flowi fl = { .oif = tunnel->parms.link,
832 .saddr = tiph->saddr,
833 .tos = RT_TOS(tos) } },
834 .proto = IPPROTO_GRE };
835 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
836 stats->tx_carrier_errors++;
840 tdev = rt->u.dst.dev;
850 mtu = dst_mtu(&rt->u.dst) - tunnel_hard_header_len(dev)
853 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
856 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
858 /* XXX: Temporarily allow fragmentation since DF doesn't
859 * do the right thing with bridging. */
861 if (skb->protocol == htons(ETH_P_IP)) {
862 df |= (old_iph->frag_off&htons(IP_DF));
864 if ((old_iph->frag_off&htons(IP_DF)) &&
865 mtu < ntohs(old_iph->tot_len)) {
866 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
872 else if (skb->protocol == htons(ETH_P_IPV6)) {
873 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
875 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
876 if ((tunnel->parms.iph.daddr &&
877 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
878 rt6->rt6i_dst.plen == 128) {
879 rt6->rt6i_flags |= RTF_MODIFIED;
880 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
884 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
885 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
892 if (tunnel->err_count > 0) {
893 if (time_before(jiffies,
894 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
897 dst_link_failure(skb);
899 tunnel->err_count = 0;
902 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
904 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
905 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
906 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
909 #ifdef HAVE_NETDEV_QUEUE_STATS
918 skb_set_owner_w(new_skb, skb->sk);
921 old_iph = ip_hdr(skb);
924 skb_reset_transport_header(skb);
925 skb_push(skb, gre_hlen);
926 skb_reset_network_header(skb);
927 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
928 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
931 skb_dst_set(skb, &rt->u.dst);
934 * Push down and install the IPIP header.
939 iph->ihl = sizeof(struct iphdr) >> 2;
941 iph->protocol = IPPROTO_GRE;
942 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
943 iph->daddr = rt->rt_dst;
944 iph->saddr = rt->rt_src;
946 if ((iph->ttl = tiph->ttl) == 0) {
947 if (skb->protocol == htons(ETH_P_IP))
948 iph->ttl = old_iph->ttl;
950 else if (skb->protocol == htons(ETH_P_IPV6))
951 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
954 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
957 skb->protocol = original_protocol;
959 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
960 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
961 htons(ETH_P_TEB) : skb->protocol;
963 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
964 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
966 if (tunnel->parms.o_flags&GRE_SEQ) {
968 *ptr = htonl(tunnel->o_seqno);
971 if (tunnel->parms.o_flags&GRE_KEY) {
972 *ptr = tunnel->parms.o_key;
975 if (tunnel->parms.o_flags&GRE_CSUM) {
977 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
987 dst_link_failure(skb);
995 static int ipgre_tunnel_bind_dev(struct net_device *dev)
997 struct net_device *tdev = NULL;
998 struct ip_tunnel *tunnel;
1000 int hlen = LL_MAX_HEADER;
1001 int mtu = ETH_DATA_LEN;
1002 int addend = sizeof(struct iphdr) + 4;
1004 tunnel = netdev_priv(dev);
1005 iph = &tunnel->parms.iph;
1007 /* Guess output device to choose reasonable mtu and needed_headroom */
1010 struct flowi fl = { .oif = tunnel->parms.link,
1012 { .daddr = iph->daddr,
1013 .saddr = iph->saddr,
1014 .tos = RT_TOS(iph->tos) } },
1015 .proto = IPPROTO_GRE };
1017 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
1018 tdev = rt->u.dst.dev;
1022 if (dev->type != ARPHRD_ETHER)
1023 dev->flags |= IFF_POINTOPOINT;
1026 if (!tdev && tunnel->parms.link)
1027 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1030 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1031 hlen = tdev->hard_header_len + tdev->needed_headroom;
1033 hlen = tdev->hard_header_len;
1037 dev->iflink = tunnel->parms.link;
1039 /* Precalculate GRE options length */
1040 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1041 if (tunnel->parms.o_flags&GRE_CSUM)
1043 if (tunnel->parms.o_flags&GRE_KEY)
1045 if (tunnel->parms.o_flags&GRE_SEQ)
1048 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1049 dev->needed_headroom = hlen + addend;
1051 dev->hard_header_len = hlen + addend;
1053 mtu -= tunnel_hard_header_len(dev) + addend;
1054 tunnel->hlen = addend;
1059 /* XXX: Set MTU to the maximum possible value. If we are bridged to a
1060 * device with a larger MTU then packets will be dropped. */
1067 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1070 struct ip_tunnel_parm p;
1071 struct ip_tunnel *t;
1072 struct net *net = dev_net(dev);
1073 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1074 int add_tunnel, gretap;
1079 if (dev == ign->fb_tunnel_dev) {
1080 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1084 t = ipgre_tunnel_locate(net, &p, false, 0);
1087 t = netdev_priv(dev);
1088 memcpy(&p, &t->parms, sizeof(p));
1089 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1098 if (!capable(CAP_NET_ADMIN))
1102 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1106 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1107 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1108 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1111 add_tunnel = (cmd == SIOCADDTUNNEL || cmd == SIOCADDGRETAP);
1112 gretap = (cmd == SIOCADDGRETAP || cmd == SIOCCHGGRETAP);
1114 if (!(p.i_flags&GRE_KEY))
1116 if (!(p.o_flags&GRE_KEY))
1119 t = ipgre_tunnel_locate(net, &p, gretap, add_tunnel);
1121 if (dev != ign->fb_tunnel_dev && !add_tunnel) {
1123 if (t->dev != dev) {
1128 unsigned nflags = 0;
1130 t = netdev_priv(dev);
1132 if (ipv4_is_multicast(p.iph.daddr))
1133 nflags = IFF_BROADCAST;
1134 else if (p.iph.daddr)
1135 nflags = IFF_POINTOPOINT;
1137 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1141 ipgre_tunnel_unlink(ign, t);
1142 t->parms.iph.saddr = p.iph.saddr;
1143 t->parms.iph.daddr = p.iph.daddr;
1144 t->parms.i_key = p.i_key;
1145 t->parms.o_key = p.o_key;
1146 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1147 memcpy(dev->broadcast, &p.iph.daddr, 4);
1148 ipgre_tunnel_link(ign, t);
1149 netdev_state_change(dev);
1156 t->parms.iph.ttl = p.iph.ttl;
1157 t->parms.iph.tos = p.iph.tos;
1158 t->parms.iph.frag_off = p.iph.frag_off;
1159 if (t->parms.link != p.link) {
1160 t->parms.link = p.link;
1161 dev->mtu = ipgre_tunnel_bind_dev(dev);
1162 netdev_state_change(dev);
1165 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1168 err = (add_tunnel ? -ENOBUFS : -ENOENT);
1173 if (!capable(CAP_NET_ADMIN))
1176 if (dev == ign->fb_tunnel_dev) {
1178 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1181 if ((t = ipgre_tunnel_locate(net, &p, false, 0)) == NULL)
1184 if (t == netdev_priv(ign->fb_tunnel_dev))
1188 unregister_netdevice(dev);
1200 #ifndef HAVE_NETDEV_STATS
1201 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1203 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1207 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1209 struct ip_tunnel *tunnel = netdev_priv(dev);
1211 new_mtu > 0xFFF8 - tunnel_hard_header_len(dev) - tunnel->hlen)
1217 /* Nice toy. Unfortunately, useless in real life :-)
1218 It allows to construct virtual multiprotocol broadcast "LAN"
1219 over the Internet, provided multicast routing is tuned.
1222 I have no idea was this bicycle invented before me,
1223 so that I had to set ARPHRD_IPGRE to a random value.
1224 I have an impression, that Cisco could make something similar,
1225 but this feature is apparently missing in IOS<=11.2(8).
1227 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1228 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1230 ping -t 255 224.66.66.66
1232 If nobody answers, mbone does not work.
1234 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1235 ip addr add 10.66.66.<somewhat>/24 dev Universe
1236 ifconfig Universe up
1237 ifconfig Universe add fe80::<Your_real_addr>/10
1238 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1241 ftp fec0:6666:6666::193.233.7.65
1246 #ifdef HAVE_NETDEV_HEADER_OPS
1247 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1248 unsigned short type,
1249 const void *daddr, const void *saddr, unsigned len)
1251 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1252 void *daddr, void *saddr, unsigned len)
1255 struct ip_tunnel *t = netdev_priv(dev);
1256 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1257 __be16 *p = (__be16*)(iph+1);
1259 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1260 p[0] = t->parms.o_flags;
1264 * Set the source hardware address.
1268 memcpy(&iph->saddr, saddr, 4);
1271 memcpy(&iph->daddr, daddr, 4);
1274 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1280 #ifdef HAVE_NETDEV_HEADER_OPS
1281 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1283 static int ipgre_header_parse(struct sk_buff *skb, unsigned char *haddr)
1286 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1287 memcpy(haddr, &iph->saddr, 4);
1291 #ifdef HAVE_NETDEV_HEADER_OPS
1292 static const struct header_ops ipgre_header_ops = {
1293 .create = ipgre_header,
1294 .parse = ipgre_header_parse,
1298 #ifdef CONFIG_NET_IPGRE_BROADCAST
1299 static int ipgre_open(struct net_device *dev)
1301 struct ip_tunnel *t = netdev_priv(dev);
1303 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1304 struct flowi fl = { .oif = t->parms.link,
1306 { .daddr = t->parms.iph.daddr,
1307 .saddr = t->parms.iph.saddr,
1308 .tos = RT_TOS(t->parms.iph.tos) } },
1309 .proto = IPPROTO_GRE };
1311 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1312 return -EADDRNOTAVAIL;
1313 dev = rt->u.dst.dev;
1315 if (__in_dev_get_rtnl(dev) == NULL)
1316 return -EADDRNOTAVAIL;
1317 t->mlink = dev->ifindex;
1318 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1323 static int ipgre_close(struct net_device *dev)
1325 struct ip_tunnel *t = netdev_priv(dev);
1327 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1328 struct in_device *in_dev;
1329 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1331 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1340 static void ethtool_getinfo(struct net_device *dev,
1341 struct ethtool_drvinfo *info)
1343 strcpy(info->driver, "ip_gre");
1344 strcpy(info->version, "Open vSwitch "VERSION BUILDNR);
1345 strcpy(info->bus_info, dev->type == ARPHRD_ETHER ? "gretap" : "gre");
1348 static struct ethtool_ops ethtool_ops = {
1349 .get_drvinfo = ethtool_getinfo,
1352 #ifdef HAVE_NET_DEVICE_OPS
1353 static const struct net_device_ops ipgre_netdev_ops = {
1354 .ndo_init = ipgre_tunnel_init,
1355 .ndo_uninit = ipgre_tunnel_uninit,
1356 #ifdef CONFIG_NET_IPGRE_BROADCAST
1357 .ndo_open = ipgre_open,
1358 .ndo_stop = ipgre_close,
1360 .ndo_start_xmit = ipgre_tunnel_xmit,
1361 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1362 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1366 static void ipgre_tunnel_setup(struct net_device *dev)
1368 #ifdef HAVE_NET_DEVICE_OPS
1369 dev->netdev_ops = &ipgre_netdev_ops;
1371 dev->init = ipgre_tunnel_init;
1372 dev->uninit = ipgre_tunnel_uninit;
1373 dev->hard_start_xmit = ipgre_tunnel_xmit;
1374 #ifndef HAVE_NETDEV_STATS
1375 dev->get_stats = ipgre_tunnel_get_stats;
1377 dev->do_ioctl = ipgre_tunnel_ioctl;
1378 dev->change_mtu = ipgre_tunnel_change_mtu;
1379 #endif /* HAVE_NET_DEVICE_OPS */
1380 dev->destructor = free_netdev;
1382 dev->type = ARPHRD_IPGRE;
1383 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1384 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1386 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1388 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1389 dev->flags = IFF_NOARP;
1392 dev->features |= NETIF_F_NETNS_LOCAL;
1393 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1395 SET_ETHTOOL_OPS(dev, ðtool_ops);
1398 static int ipgre_tunnel_init(struct net_device *dev)
1400 struct ip_tunnel *tunnel;
1403 tunnel = netdev_priv(dev);
1404 iph = &tunnel->parms.iph;
1407 strcpy(tunnel->parms.name, dev->name);
1409 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1410 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1413 #ifdef CONFIG_NET_IPGRE_BROADCAST
1414 if (ipv4_is_multicast(iph->daddr)) {
1417 dev->flags = IFF_BROADCAST;
1418 #ifdef HAVE_NETDEV_HEADER_OPS
1419 dev->header_ops = &ipgre_header_ops;
1421 dev->hard_header = ipgre_header;
1422 dev->hard_header_parse = ipgre_header_parse;
1424 #ifndef HAVE_NET_DEVICE_OPS
1425 dev->open = ipgre_open;
1426 dev->stop = ipgre_close;
1431 #ifdef HAVE_NETDEV_HEADER_OPS
1432 dev->header_ops = &ipgre_header_ops;
1434 dev->hard_header = ipgre_header;
1435 dev->hard_header_parse = ipgre_header_parse;
1442 #ifdef HAVE_NET_DEVICE_OPS
1443 static void ipgre_fb_tunnel_init(struct net_device *dev)
1445 static int ipgre_fb_tunnel_init(struct net_device *dev)
1448 struct ip_tunnel *tunnel = netdev_priv(dev);
1449 struct iphdr *iph = &tunnel->parms.iph;
1450 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1453 strcpy(tunnel->parms.name, dev->name);
1456 iph->protocol = IPPROTO_GRE;
1458 tunnel->hlen = sizeof(struct iphdr) + 4;
1461 ign->tunnels_wc[0] = tunnel;
1463 #ifndef HAVE_NET_DEVICE_OPS
1468 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
1469 static struct net_protocol ipgre_protocol = {
1471 static const struct net_protocol ipgre_protocol = {
1473 .handler = ipgre_rcv,
1474 .err_handler = ipgre_err,
1475 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
1480 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1484 for (prio = 0; prio < 4; prio++) {
1486 for (h = 0; h < HASH_SIZE; h++) {
1487 struct ip_tunnel *t = ign->tunnels[prio][h];
1490 unregister_netdevice_queue(t->dev, head);
1497 static int ipgre_init_net(struct net *net)
1499 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1502 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), GRE_IOCTL_DEVICE,
1503 ipgre_tunnel_setup);
1504 if (!ign->fb_tunnel_dev) {
1508 dev_net_set(ign->fb_tunnel_dev, net);
1510 #ifdef HAVE_NET_DEVICE_OPS
1511 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1513 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1515 #ifndef GRE_IOCTL_ONLY
1516 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1519 if ((err = register_netdev(ign->fb_tunnel_dev)))
1525 free_netdev(ign->fb_tunnel_dev);
1530 static void ipgre_exit_net(struct net *net)
1532 struct ipgre_net *ign;
1535 ign = net_generic(net, ipgre_net_id);
1537 ipgre_destroy_tunnels(ign, &list);
1538 unregister_netdevice_many(&list);
1542 static struct pernet_operations ipgre_net_ops = {
1543 .init = ipgre_init_net,
1544 .exit = ipgre_exit_net,
1545 .id = &ipgre_net_id,
1546 .size = sizeof(struct ipgre_net),
1549 static int ipgre_tap_init(struct net_device *dev)
1551 struct ip_tunnel *tunnel;
1553 tunnel = netdev_priv(dev);
1556 strcpy(tunnel->parms.name, dev->name);
1558 ipgre_tunnel_bind_dev(dev);
1563 #ifdef HAVE_NET_DEVICE_OPS
1564 static const struct net_device_ops ipgre_tap_netdev_ops = {
1565 .ndo_init = ipgre_tap_init,
1566 .ndo_uninit = ipgre_tunnel_uninit,
1567 .ndo_start_xmit = ipgre_tunnel_xmit,
1568 .ndo_set_mac_address = eth_mac_addr,
1569 .ndo_validate_addr = eth_validate_addr,
1570 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1571 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1575 static void ipgre_tap_setup(struct net_device *dev)
1579 #ifdef HAVE_NET_DEVICE_OPS
1580 dev->netdev_ops = &ipgre_tap_netdev_ops;
1582 dev->init = ipgre_tap_init;
1583 dev->uninit = ipgre_tunnel_uninit;
1584 dev->hard_start_xmit = ipgre_tunnel_xmit;
1585 #ifndef HAVE_NETDEV_STATS
1586 dev->get_stats = ipgre_tunnel_get_stats;
1588 dev->do_ioctl = ipgre_tunnel_ioctl;
1589 dev->change_mtu = ipgre_tunnel_change_mtu;
1590 #endif /* HAVE_NET_DEVICE_OPS */
1591 dev->destructor = free_netdev;
1594 dev->features |= NETIF_F_NETNS_LOCAL;
1595 dev->tx_queue_len = 0;
1597 SET_ETHTOOL_OPS(dev, ðtool_ops);
1600 #ifndef GRE_IOCTL_ONLY
1601 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1609 if (data[IFLA_GRE_IFLAGS])
1610 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1611 if (data[IFLA_GRE_OFLAGS])
1612 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1613 if (flags & (GRE_VERSION|GRE_ROUTING))
1619 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1623 if (tb[IFLA_ADDRESS]) {
1624 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1626 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1627 return -EADDRNOTAVAIL;
1633 if (data[IFLA_GRE_REMOTE]) {
1634 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1640 return ipgre_tunnel_validate(tb, data);
1643 static void ipgre_netlink_parms(struct nlattr *data[],
1644 struct ip_tunnel_parm *parms)
1646 memset(parms, 0, sizeof(*parms));
1648 parms->iph.protocol = IPPROTO_GRE;
1653 if (data[IFLA_GRE_LINK])
1654 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1656 if (data[IFLA_GRE_IFLAGS])
1657 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1659 if (data[IFLA_GRE_OFLAGS])
1660 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1662 if (data[IFLA_GRE_IKEY])
1663 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1665 if (data[IFLA_GRE_OKEY])
1666 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1668 if (data[IFLA_GRE_LOCAL])
1669 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1671 if (data[IFLA_GRE_REMOTE])
1672 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1674 if (data[IFLA_GRE_TTL])
1675 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1677 if (data[IFLA_GRE_TOS])
1678 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1680 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1681 parms->iph.frag_off = htons(IP_DF);
1684 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)
1685 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1686 struct nlattr *data[])
1688 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1689 struct nlattr *data[])
1692 struct ip_tunnel *nt;
1693 struct net *net = dev_net(dev);
1694 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1698 nt = netdev_priv(dev);
1699 ipgre_netlink_parms(data, &nt->parms);
1701 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1704 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1705 random_ether_addr(dev->dev_addr);
1707 mtu = ipgre_tunnel_bind_dev(dev);
1711 err = register_netdevice(dev);
1716 ipgre_tunnel_link(ign, nt);
1722 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1723 struct nlattr *data[])
1725 struct ip_tunnel *t, *nt;
1726 struct net *net = dev_net(dev);
1727 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1728 struct ip_tunnel_parm p;
1731 if (dev == ign->fb_tunnel_dev)
1734 nt = netdev_priv(dev);
1735 ipgre_netlink_parms(data, &p);
1737 t = ipgre_tunnel_locate(net, &p, false, 0);
1745 if (dev->type != ARPHRD_ETHER) {
1746 unsigned nflags = 0;
1748 if (ipv4_is_multicast(p.iph.daddr))
1749 nflags = IFF_BROADCAST;
1750 else if (p.iph.daddr)
1751 nflags = IFF_POINTOPOINT;
1753 if ((dev->flags ^ nflags) &
1754 (IFF_POINTOPOINT | IFF_BROADCAST))
1758 ipgre_tunnel_unlink(ign, t);
1759 t->parms.iph.saddr = p.iph.saddr;
1760 t->parms.iph.daddr = p.iph.daddr;
1761 t->parms.i_key = p.i_key;
1762 if (dev->type != ARPHRD_ETHER) {
1763 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1764 memcpy(dev->broadcast, &p.iph.daddr, 4);
1766 ipgre_tunnel_link(ign, t);
1767 netdev_state_change(dev);
1770 t->parms.o_key = p.o_key;
1771 t->parms.iph.ttl = p.iph.ttl;
1772 t->parms.iph.tos = p.iph.tos;
1773 t->parms.iph.frag_off = p.iph.frag_off;
1775 if (t->parms.link != p.link) {
1776 t->parms.link = p.link;
1777 mtu = ipgre_tunnel_bind_dev(dev);
1780 netdev_state_change(dev);
1786 static size_t ipgre_get_size(const struct net_device *dev)
1791 /* IFLA_GRE_IFLAGS */
1793 /* IFLA_GRE_OFLAGS */
1799 /* IFLA_GRE_LOCAL */
1801 /* IFLA_GRE_REMOTE */
1807 /* IFLA_GRE_PMTUDISC */
1812 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1814 struct ip_tunnel *t = netdev_priv(dev);
1815 struct ip_tunnel_parm *p = &t->parms;
1817 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1818 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1819 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1820 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1821 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1822 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1823 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1824 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1825 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1826 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1834 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1835 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1836 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1837 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1838 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1839 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1840 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1841 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1842 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1843 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1844 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1847 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1849 .maxtype = IFLA_GRE_MAX,
1850 .policy = ipgre_policy,
1851 .priv_size = sizeof(struct ip_tunnel),
1852 .setup = ipgre_tunnel_setup,
1853 .validate = ipgre_tunnel_validate,
1854 .newlink = ipgre_newlink,
1855 .changelink = ipgre_changelink,
1856 .get_size = ipgre_get_size,
1857 .fill_info = ipgre_fill_info,
1860 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1862 .maxtype = IFLA_GRE_MAX,
1863 .policy = ipgre_policy,
1864 .priv_size = sizeof(struct ip_tunnel),
1865 .setup = ipgre_tap_setup,
1866 .validate = ipgre_tap_validate,
1867 .newlink = ipgre_newlink,
1868 .changelink = ipgre_changelink,
1869 .get_size = ipgre_get_size,
1870 .fill_info = ipgre_fill_info,
1875 * And now the modules code and kernel interface.
1878 static int __init ipgre_init(void)
1882 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1884 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1885 printk(KERN_INFO "ipgre init: can't add protocol\n");
1889 err = register_pernet_device(&ipgre_net_ops);
1891 goto gen_device_failed;
1893 #ifndef GRE_IOCTL_ONLY
1894 err = rtnl_link_register(&ipgre_link_ops);
1896 goto rtnl_link_failed;
1898 err = rtnl_link_register(&ipgre_tap_ops);
1900 goto tap_ops_failed;
1906 #ifndef GRE_IOCTL_ONLY
1908 rtnl_link_unregister(&ipgre_link_ops);
1910 unregister_pernet_device(&ipgre_net_ops);
1913 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1918 static void __exit ipgre_fini(void)
1920 #ifndef GRE_IOCTL_ONLY
1921 rtnl_link_unregister(&ipgre_tap_ops);
1922 rtnl_link_unregister(&ipgre_link_ops);
1924 unregister_pernet_device(&ipgre_net_ops);
1925 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1926 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1929 module_init(ipgre_init);
1930 module_exit(ipgre_fini);
1931 MODULE_DESCRIPTION("GRE over IPv4 tunneling driver");
1932 MODULE_LICENSE("GPL");
1933 #ifndef GRE_IOCTL_ONLY
1934 MODULE_ALIAS_RTNL_LINK("gre");
1935 MODULE_ALIAS_RTNL_LINK("gretap");