1 /* ip_gre driver port to Linux 2.6.18 and greater */
3 #include <linux/version.h>
4 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5 #define HAVE_NETDEV_STATS
7 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
8 #define HAVE_NETDEV_HEADER_OPS
10 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
11 #define HAVE_NETDEV_NEEDED_HEADROOM
15 * Linux NET3: GRE over IP protocol decoder.
17 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
26 #include <linux/capability.h>
27 #include <linux/module.h>
28 #include <linux/types.h>
29 #include <linux/kernel.h>
30 #include <asm/uaccess.h>
31 #include <linux/skbuff.h>
32 #include <linux/netdevice.h>
34 #include <linux/tcp.h>
35 #include <linux/udp.h>
36 #include <linux/if_arp.h>
37 #include <linux/mroute.h>
38 #include <linux/init.h>
39 #include <linux/in6.h>
40 #include <linux/inetdevice.h>
41 #include <linux/igmp.h>
42 #include <linux/netfilter_ipv4.h>
43 #include <linux/etherdevice.h>
44 #include <linux/if_ether.h>
49 #include <net/protocol.h>
52 #include <net/checksum.h>
53 #include <net/dsfield.h>
54 #include <net/inet_ecn.h>
56 #include <net/net_namespace.h>
57 #include <net/netns/generic.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
66 #include "openvswitch/gre.h"
68 #ifndef GRE_IOCTL_ONLY
69 #include <net/rtnetlink.h>
76 1. The most important issue is detecting local dead loops.
77 They would cause complete host lockup in transmit, which
78 would be "resolved" by stack overflow or, if queueing is enabled,
79 with infinite looping in net_bh.
81 We cannot track such dead loops during route installation,
82 it is infeasible task. The most general solutions would be
83 to keep skb->encapsulation counter (sort of local ttl),
84 and silently drop packet when it expires. It is the best
85 solution, but it supposes maintaing new variable in ALL
86 skb, even if no tunneling is used.
88 Current solution: HARD_TX_LOCK lock breaks dead loops.
92 2. Networking dead loops would not kill routers, but would really
93 kill network. IP hop limit plays role of "t->recursion" in this case,
94 if we copy it from packet being encapsulated to upper header.
95 It is very good solution, but it introduces two problems:
97 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
98 do not work over tunnels.
99 - traceroute does not work. I planned to relay ICMP from tunnel,
100 so that this problem would be solved and traceroute output
101 would even more informative. This idea appeared to be wrong:
102 only Linux complies to rfc1812 now (yes, guys, Linux is the only
103 true router now :-)), all routers (at least, in neighbourhood of mine)
104 return only 8 bytes of payload. It is the end.
106 Hence, if we want that OSPF worked or traceroute said something reasonable,
107 we should search for another solution.
109 One of them is to parse packet trying to detect inner encapsulation
110 made by our node. It is difficult or even impossible, especially,
111 taking into account fragmentation. TO be short, tt is not solution at all.
113 Current solution: The solution was UNEXPECTEDLY SIMPLE.
114 We force DF flag on tunnels with preconfigured hop limit,
115 that is ALL. :-) Well, it does not remove the problem completely,
116 but exponential growth of network traffic is changed to linear
117 (branches, that exceed pmtu are pruned) and tunnel mtu
118 fastly degrades to value <68, where looping stops.
119 Yes, it is not good if there exists a router in the loop,
120 which does not force DF, even when encapsulating packets have DF set.
121 But it is not our problem! Nobody could accuse us, we made
122 all that we could make. Even if it is your gated who injected
123 fatal route to network, even if it were you who configured
124 fatal static route: you are innocent. :-)
128 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
129 practically identical code. It would be good to glue them
130 together, but it is not very evident, how to make them modular.
131 sit is integral part of IPv6, ipip and gre are naturally modular.
132 We could extract common parts (hash table, ioctl etc)
133 to a separate module (ip_tunnel.c).
138 #ifndef GRE_IOCTL_ONLY
139 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
140 static struct rtnl_link_ops ipgre_tap_ops __read_mostly;
142 static int ipgre_tunnel_init(struct net_device *dev);
143 static void ipgre_tunnel_setup(struct net_device *dev);
144 static void ipgre_tap_setup(struct net_device *dev);
145 static int ipgre_tunnel_bind_dev(struct net_device *dev);
149 static int ipgre_net_id;
151 struct ip_tunnel *tunnels[4][HASH_SIZE];
153 struct net_device *fb_tunnel_dev;
156 /* Tunnel hash table */
166 We require exact key match i.e. if a key is present in packet
167 it will match only tunnel with the same key; if it is not present,
168 it will match only keyless tunnel.
170 All keysless packets, if not matched configured keyless tunnels
171 will match fallback tunnel.
174 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
176 #define tunnels_r_l tunnels[3]
177 #define tunnels_r tunnels[2]
178 #define tunnels_l tunnels[1]
179 #define tunnels_wc tunnels[0]
181 static DEFINE_RWLOCK(ipgre_lock);
183 /* Given src, dst and key, find appropriate for input tunnel. */
185 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
186 __be32 remote, __be32 local,
187 __be32 key, __be16 gre_proto)
189 struct net *net = dev_net(dev);
190 int link = dev->ifindex;
191 unsigned h0 = HASH(remote);
192 unsigned h1 = HASH(key);
193 struct ip_tunnel *t, *cand = NULL;
194 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
195 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
196 ARPHRD_ETHER : ARPHRD_IPGRE;
197 int score, cand_score = 4;
199 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
200 if (local != t->parms.iph.saddr ||
201 remote != t->parms.iph.daddr ||
202 key != t->parms.i_key ||
203 !(t->dev->flags & IFF_UP))
206 if (t->dev->type != ARPHRD_IPGRE &&
207 t->dev->type != dev_type)
211 if (t->parms.link != link)
213 if (t->dev->type != dev_type)
218 if (score < cand_score) {
224 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
225 if (remote != t->parms.iph.daddr ||
226 key != t->parms.i_key ||
227 !(t->dev->flags & IFF_UP))
230 if (t->dev->type != ARPHRD_IPGRE &&
231 t->dev->type != dev_type)
235 if (t->parms.link != link)
237 if (t->dev->type != dev_type)
242 if (score < cand_score) {
248 for (t = ign->tunnels_l[h1]; t; t = t->next) {
249 if ((local != t->parms.iph.saddr &&
250 (local != t->parms.iph.daddr ||
251 !ipv4_is_multicast(local))) ||
252 key != t->parms.i_key ||
253 !(t->dev->flags & IFF_UP))
256 if (t->dev->type != ARPHRD_IPGRE &&
257 t->dev->type != dev_type)
261 if (t->parms.link != link)
263 if (t->dev->type != dev_type)
268 if (score < cand_score) {
274 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
275 if (t->parms.i_key != key ||
276 !(t->dev->flags & IFF_UP))
279 if (t->dev->type != ARPHRD_IPGRE &&
280 t->dev->type != dev_type)
284 if (t->parms.link != link)
286 if (t->dev->type != dev_type)
291 if (score < cand_score) {
300 if (ign->fb_tunnel_dev->flags & IFF_UP)
301 return netdev_priv(ign->fb_tunnel_dev);
306 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
307 struct ip_tunnel_parm *parms)
309 __be32 remote = parms->iph.daddr;
310 __be32 local = parms->iph.saddr;
311 __be32 key = parms->i_key;
312 unsigned h = HASH(key);
317 if (remote && !ipv4_is_multicast(remote)) {
322 return &ign->tunnels[prio][h];
325 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
328 return __ipgre_bucket(ign, &t->parms);
331 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
333 struct ip_tunnel **tp = ipgre_bucket(ign, t);
336 write_lock_bh(&ipgre_lock);
338 write_unlock_bh(&ipgre_lock);
341 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
343 struct ip_tunnel **tp;
345 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
347 write_lock_bh(&ipgre_lock);
349 write_unlock_bh(&ipgre_lock);
355 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
356 struct ip_tunnel_parm *parms,
359 __be32 remote = parms->iph.daddr;
360 __be32 local = parms->iph.saddr;
361 __be32 key = parms->i_key;
362 int link = parms->link;
363 struct ip_tunnel *t, **tp;
364 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
366 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
367 if (local == t->parms.iph.saddr &&
368 remote == t->parms.iph.daddr &&
369 key == t->parms.i_key &&
370 link == t->parms.link &&
371 type == t->dev->type)
377 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
378 struct ip_tunnel_parm *parms, int gretap, int create)
380 struct ip_tunnel *t, *nt;
381 struct net_device *dev;
383 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
385 t = ipgre_tunnel_find(net, parms, gretap ? ARPHRD_ETHER : ARPHRD_IPGRE);
390 strlcpy(name, parms->name, IFNAMSIZ);
392 sprintf(name, "gre%%d");
394 dev = alloc_netdev(sizeof(*t), name, gretap ? ipgre_tap_setup
395 : ipgre_tunnel_setup);
399 dev_net_set(dev, net);
401 if (strchr(name, '%')) {
402 if (dev_alloc_name(dev, name) < 0)
407 random_ether_addr(dev->dev_addr);
409 #ifndef GRE_IOCTL_ONLY
410 dev->rtnl_link_ops = gretap ? &ipgre_tap_ops : &ipgre_link_ops;
412 nt = netdev_priv(dev);
415 dev->mtu = ipgre_tunnel_bind_dev(dev);
417 if (register_netdevice(dev) < 0)
421 ipgre_tunnel_link(ign, nt);
429 static void ipgre_tunnel_uninit(struct net_device *dev)
431 struct net *net = dev_net(dev);
432 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
434 ipgre_tunnel_unlink(ign, netdev_priv(dev));
439 static void ipgre_err(struct sk_buff *skb, u32 info)
442 /* All the routers (except for Linux) return only
443 8 bytes of packet payload. It means, that precise relaying of
444 ICMP in the real Internet is absolutely infeasible.
446 Moreover, Cisco "wise men" put GRE key to the third word
447 in GRE header. It makes impossible maintaining even soft state for keyed
448 GRE tunnels with enabled checksum. Tell them "thank you".
450 Well, I wonder, rfc1812 was written by Cisco employee,
451 what the hell these idiots break standrads established
455 struct iphdr *iph = (struct iphdr *)skb->data;
456 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
457 int grehlen = (iph->ihl<<2) + 4;
458 const int type = icmp_hdr(skb)->type;
459 const int code = icmp_hdr(skb)->code;
464 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
465 if (flags&(GRE_VERSION|GRE_ROUTING))
474 /* If only 8 bytes returned, keyed message will be dropped here */
475 if (skb_headlen(skb) < grehlen)
480 case ICMP_PARAMETERPROB:
483 case ICMP_DEST_UNREACH:
486 case ICMP_PORT_UNREACH:
487 /* Impossible event. */
489 case ICMP_FRAG_NEEDED:
490 /* Soft state for pmtu is maintained by IP core. */
493 /* All others are translated to HOST_UNREACH.
494 rfc2003 contains "deep thoughts" about NET_UNREACH,
495 I believe they are just ether pollution. --ANK
500 case ICMP_TIME_EXCEEDED:
501 if (code != ICMP_EXC_TTL)
506 read_lock(&ipgre_lock);
507 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
509 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
511 if (t == NULL || t->parms.iph.daddr == 0 ||
512 ipv4_is_multicast(t->parms.iph.daddr))
515 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
518 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
522 t->err_time = jiffies;
524 read_unlock(&ipgre_lock);
528 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
530 if (INET_ECN_is_ce(iph->tos)) {
531 if (skb->protocol == htons(ETH_P_IP)) {
532 IP_ECN_set_ce(ip_hdr(skb));
533 } else if (skb->protocol == htons(ETH_P_IPV6)) {
534 IP6_ECN_set_ce(ipv6_hdr(skb));
540 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
543 if (skb->protocol == htons(ETH_P_IP))
544 inner = old_iph->tos;
545 else if (skb->protocol == htons(ETH_P_IPV6))
546 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
547 return INET_ECN_encapsulate(tos, inner);
550 static int ipgre_rcv(struct sk_buff *skb)
558 struct ip_tunnel *tunnel;
563 if (!pskb_may_pull(skb, 16))
570 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
571 /* - Version must be 0.
572 - We do not support routing headers.
574 if (flags&(GRE_VERSION|GRE_ROUTING))
577 if (flags&GRE_CSUM) {
578 switch (skb->ip_summed) {
579 case CHECKSUM_COMPLETE:
580 csum = csum_fold(skb->csum);
586 csum = __skb_checksum_complete(skb);
587 skb->ip_summed = CHECKSUM_COMPLETE;
592 key = *(__be32*)(h + offset);
596 seqno = ntohl(*(__be32*)(h + offset));
601 gre_proto = *(__be16 *)(h + 2);
603 read_lock(&ipgre_lock);
604 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
605 iph->saddr, iph->daddr, key,
607 struct net_device_stats *stats;
608 #ifdef HAVE_NETDEV_STATS
609 stats = &tunnel->dev->stats;
611 stats = &tunnel->stat;
616 skb->protocol = gre_proto;
617 /* WCCP version 1 and 2 protocol decoding.
618 * - Change protocol to IP
619 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
621 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
622 skb->protocol = htons(ETH_P_IP);
623 if ((*(h + offset) & 0xF0) != 0x40)
627 skb->mac_header = skb->network_header;
628 __pskb_pull(skb, offset);
629 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
630 skb->pkt_type = PACKET_HOST;
631 #ifdef CONFIG_NET_IPGRE_BROADCAST
632 if (ipv4_is_multicast(iph->daddr)) {
633 /* Looped back packet, drop it! */
634 if (skb_rtable(skb)->fl.iif == 0)
637 skb->pkt_type = PACKET_BROADCAST;
641 if (((flags&GRE_CSUM) && csum) ||
642 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
643 stats->rx_crc_errors++;
647 if (tunnel->parms.i_flags&GRE_SEQ) {
648 if (!(flags&GRE_SEQ) ||
649 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
650 stats->rx_fifo_errors++;
654 tunnel->i_seqno = seqno + 1;
659 /* Warning: All skb pointers will be invalidated! */
660 if (tunnel->dev->type == ARPHRD_ETHER) {
661 if (!pskb_may_pull(skb, ETH_HLEN)) {
662 stats->rx_length_errors++;
668 skb->protocol = eth_type_trans(skb, tunnel->dev);
669 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
673 stats->rx_bytes += len;
674 skb->dev = tunnel->dev;
678 skb_reset_network_header(skb);
679 ipgre_ecn_decapsulate(iph, skb);
682 /* XXX: Temporary workaround to avoid a panic when doing
683 * bridging due to multiple meanings of CHECKSUM_HW. */
684 if (skb->ip_summed == CHECKSUM_HW)
685 skb->ip_summed = CHECKSUM_NONE;
689 read_unlock(&ipgre_lock);
692 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
695 read_unlock(&ipgre_lock);
701 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
703 struct ip_tunnel *tunnel = netdev_priv(dev);
704 struct net_device_stats *stats;
705 struct iphdr *old_iph = ip_hdr(skb);
709 struct rtable *rt; /* Route to the other host */
710 struct net_device *tdev; /* Device to other host */
711 struct iphdr *iph; /* Our new IP header */
712 unsigned int max_headroom; /* The extra header space needed */
717 #ifdef HAVE_NETDEV_STATS
718 stats = &tunnel->dev->stats;
720 stats = &tunnel->stat;
723 if (dev->type == ARPHRD_ETHER)
724 IPCB(skb)->flags = 0;
726 #ifdef HAVE_NETDEV_HEADER_OPS
727 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
729 if (dev->hard_header && dev->type == ARPHRD_IPGRE) {
732 tiph = (struct iphdr *)skb->data;
734 gre_hlen = tunnel->hlen;
735 tiph = &tunnel->parms.iph;
738 if ((dst = tiph->daddr) == 0) {
741 if (skb_dst(skb) == NULL) {
742 stats->tx_fifo_errors++;
746 if (skb->protocol == htons(ETH_P_IP)) {
747 rt = skb_rtable(skb);
748 if ((dst = rt->rt_gateway) == 0)
752 else if (skb->protocol == htons(ETH_P_IPV6)) {
753 struct in6_addr *addr6;
755 struct neighbour *neigh = skb_dst(skb)->neighbour;
760 addr6 = (struct in6_addr *)&neigh->primary_key;
761 addr_type = ipv6_addr_type(addr6);
763 if (addr_type == IPV6_ADDR_ANY) {
764 addr6 = &ipv6_hdr(skb)->daddr;
765 addr_type = ipv6_addr_type(addr6);
768 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
771 dst = addr6->s6_addr32[3];
781 if (skb->protocol == htons(ETH_P_IP))
786 struct flowi fl = { .oif = tunnel->parms.link,
789 .saddr = tiph->saddr,
790 .tos = RT_TOS(tos) } },
791 .proto = IPPROTO_GRE };
792 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
793 stats->tx_carrier_errors++;
797 tdev = rt->u.dst.dev;
807 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
808 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
810 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
813 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
816 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
818 /* XXX: Temporarily allow fragmentation since DF doesn't
819 * do the right thing with bridging. */
821 if (skb->protocol == htons(ETH_P_IP)) {
822 df |= (old_iph->frag_off&htons(IP_DF));
824 if ((old_iph->frag_off&htons(IP_DF)) &&
825 mtu < ntohs(old_iph->tot_len)) {
826 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
832 else if (skb->protocol == htons(ETH_P_IPV6)) {
833 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
835 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
836 if ((tunnel->parms.iph.daddr &&
837 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
838 rt6->rt6i_dst.plen == 128) {
839 rt6->rt6i_flags |= RTF_MODIFIED;
840 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
844 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
845 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
852 if (tunnel->err_count > 0) {
853 if (time_before(jiffies,
854 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
857 dst_link_failure(skb);
859 tunnel->err_count = 0;
862 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
864 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
865 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
866 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
874 skb_set_owner_w(new_skb, skb->sk);
877 old_iph = ip_hdr(skb);
880 skb_reset_transport_header(skb);
881 skb_push(skb, gre_hlen);
882 skb_reset_network_header(skb);
883 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
884 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
887 skb_dst_set(skb, &rt->u.dst);
890 * Push down and install the IPIP header.
895 iph->ihl = sizeof(struct iphdr) >> 2;
897 iph->protocol = IPPROTO_GRE;
898 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
899 iph->daddr = rt->rt_dst;
900 iph->saddr = rt->rt_src;
902 if ((iph->ttl = tiph->ttl) == 0) {
903 if (skb->protocol == htons(ETH_P_IP))
904 iph->ttl = old_iph->ttl;
906 else if (skb->protocol == htons(ETH_P_IPV6))
907 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
910 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
913 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
914 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
915 htons(ETH_P_TEB) : skb->protocol;
917 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
918 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
920 if (tunnel->parms.o_flags&GRE_SEQ) {
922 *ptr = htonl(tunnel->o_seqno);
925 if (tunnel->parms.o_flags&GRE_KEY) {
926 *ptr = tunnel->parms.o_key;
929 if (tunnel->parms.o_flags&GRE_CSUM) {
931 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
941 dst_link_failure(skb);
949 static int ipgre_tunnel_bind_dev(struct net_device *dev)
951 struct net_device *tdev = NULL;
952 struct ip_tunnel *tunnel;
954 int hlen = LL_MAX_HEADER;
955 int mtu = ETH_DATA_LEN;
956 int addend = sizeof(struct iphdr) + 4;
958 tunnel = netdev_priv(dev);
959 iph = &tunnel->parms.iph;
961 /* Guess output device to choose reasonable mtu and needed_headroom */
964 struct flowi fl = { .oif = tunnel->parms.link,
966 { .daddr = iph->daddr,
968 .tos = RT_TOS(iph->tos) } },
969 .proto = IPPROTO_GRE };
971 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
972 tdev = rt->u.dst.dev;
976 if (dev->type != ARPHRD_ETHER)
977 dev->flags |= IFF_POINTOPOINT;
980 if (!tdev && tunnel->parms.link)
981 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
984 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
985 hlen = tdev->hard_header_len + tdev->needed_headroom;
987 hlen = tdev->hard_header_len;
991 dev->iflink = tunnel->parms.link;
993 /* Precalculate GRE options length */
994 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
995 if (tunnel->parms.o_flags&GRE_CSUM)
997 if (tunnel->parms.o_flags&GRE_KEY)
999 if (tunnel->parms.o_flags&GRE_SEQ)
1002 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1003 dev->needed_headroom = hlen + addend;
1004 mtu -= dev->hard_header_len + addend;
1006 dev->hard_header_len = hlen + addend;
1009 tunnel->hlen = addend;
1014 /* XXX: Set MTU to the maximum possible value. If we are bridged to a
1015 * device with a larger MTU then packets will be dropped. */
1022 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1025 struct ip_tunnel_parm p;
1026 struct ip_tunnel *t;
1027 struct net *net = dev_net(dev);
1028 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1029 int add_tunnel, gretap;
1034 if (dev == ign->fb_tunnel_dev) {
1035 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1039 t = ipgre_tunnel_locate(net, &p, false, 0);
1042 t = netdev_priv(dev);
1043 memcpy(&p, &t->parms, sizeof(p));
1044 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1053 if (!capable(CAP_NET_ADMIN))
1057 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1061 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1062 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1063 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1066 add_tunnel = (cmd == SIOCADDTUNNEL || cmd == SIOCADDGRETAP);
1067 gretap = (cmd == SIOCADDGRETAP || cmd == SIOCCHGGRETAP);
1070 p.iph.frag_off |= htons(IP_DF);
1072 if (!(p.i_flags&GRE_KEY))
1074 if (!(p.o_flags&GRE_KEY))
1077 t = ipgre_tunnel_locate(net, &p, gretap, add_tunnel);
1079 if (dev != ign->fb_tunnel_dev && !add_tunnel) {
1081 if (t->dev != dev) {
1086 unsigned nflags = 0;
1088 t = netdev_priv(dev);
1090 if (ipv4_is_multicast(p.iph.daddr))
1091 nflags = IFF_BROADCAST;
1092 else if (p.iph.daddr)
1093 nflags = IFF_POINTOPOINT;
1095 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1099 ipgre_tunnel_unlink(ign, t);
1100 t->parms.iph.saddr = p.iph.saddr;
1101 t->parms.iph.daddr = p.iph.daddr;
1102 t->parms.i_key = p.i_key;
1103 t->parms.o_key = p.o_key;
1104 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1105 memcpy(dev->broadcast, &p.iph.daddr, 4);
1106 ipgre_tunnel_link(ign, t);
1107 netdev_state_change(dev);
1114 t->parms.iph.ttl = p.iph.ttl;
1115 t->parms.iph.tos = p.iph.tos;
1116 t->parms.iph.frag_off = p.iph.frag_off;
1117 if (t->parms.link != p.link) {
1118 t->parms.link = p.link;
1119 dev->mtu = ipgre_tunnel_bind_dev(dev);
1120 netdev_state_change(dev);
1123 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1126 err = (add_tunnel ? -ENOBUFS : -ENOENT);
1131 if (!capable(CAP_NET_ADMIN))
1134 if (dev == ign->fb_tunnel_dev) {
1136 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1139 if ((t = ipgre_tunnel_locate(net, &p, false, 0)) == NULL)
1142 if (t == netdev_priv(ign->fb_tunnel_dev))
1146 unregister_netdevice(dev);
1158 #ifndef HAVE_NETDEV_STATS
1159 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1161 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1165 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1167 struct ip_tunnel *tunnel = netdev_priv(dev);
1169 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1170 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1172 new_mtu > 0xFFF8 - tunnel->hlen)
1179 /* Nice toy. Unfortunately, useless in real life :-)
1180 It allows to construct virtual multiprotocol broadcast "LAN"
1181 over the Internet, provided multicast routing is tuned.
1184 I have no idea was this bicycle invented before me,
1185 so that I had to set ARPHRD_IPGRE to a random value.
1186 I have an impression, that Cisco could make something similar,
1187 but this feature is apparently missing in IOS<=11.2(8).
1189 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1190 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1192 ping -t 255 224.66.66.66
1194 If nobody answers, mbone does not work.
1196 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1197 ip addr add 10.66.66.<somewhat>/24 dev Universe
1198 ifconfig Universe up
1199 ifconfig Universe add fe80::<Your_real_addr>/10
1200 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1203 ftp fec0:6666:6666::193.233.7.65
1208 #ifdef HAVE_NETDEV_HEADER_OPS
1209 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1210 unsigned short type,
1211 const void *daddr, const void *saddr, unsigned len)
1213 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1214 void *daddr, void *saddr, unsigned len)
1217 struct ip_tunnel *t = netdev_priv(dev);
1218 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1219 __be16 *p = (__be16*)(iph+1);
1221 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1222 p[0] = t->parms.o_flags;
1226 * Set the source hardware address.
1230 memcpy(&iph->saddr, saddr, 4);
1233 memcpy(&iph->daddr, daddr, 4);
1236 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1242 #ifdef HAVE_NETDEV_HEADER_OPS
1243 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1245 static int ipgre_header_parse(struct sk_buff *skb, unsigned char *haddr)
1248 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1249 memcpy(haddr, &iph->saddr, 4);
1253 #ifdef HAVE_NETDEV_HEADER_OPS
1254 static const struct header_ops ipgre_header_ops = {
1255 .create = ipgre_header,
1256 .parse = ipgre_header_parse,
1260 #ifdef CONFIG_NET_IPGRE_BROADCAST
1261 static int ipgre_open(struct net_device *dev)
1263 struct ip_tunnel *t = netdev_priv(dev);
1265 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1266 struct flowi fl = { .oif = t->parms.link,
1268 { .daddr = t->parms.iph.daddr,
1269 .saddr = t->parms.iph.saddr,
1270 .tos = RT_TOS(t->parms.iph.tos) } },
1271 .proto = IPPROTO_GRE };
1273 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1274 return -EADDRNOTAVAIL;
1275 dev = rt->u.dst.dev;
1277 if (__in_dev_get_rtnl(dev) == NULL)
1278 return -EADDRNOTAVAIL;
1279 t->mlink = dev->ifindex;
1280 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1285 static int ipgre_close(struct net_device *dev)
1287 struct ip_tunnel *t = netdev_priv(dev);
1289 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1290 struct in_device *in_dev;
1291 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1293 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1302 #ifdef HAVE_NET_DEVICE_OPS
1303 static const struct net_device_ops ipgre_netdev_ops = {
1304 .ndo_init = ipgre_tunnel_init,
1305 .ndo_uninit = ipgre_tunnel_uninit,
1306 #ifdef CONFIG_NET_IPGRE_BROADCAST
1307 .ndo_open = ipgre_open,
1308 .ndo_stop = ipgre_close,
1310 .ndo_start_xmit = ipgre_tunnel_xmit,
1311 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1312 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1316 static void ipgre_tunnel_setup(struct net_device *dev)
1318 #ifdef HAVE_NET_DEVICE_OPS
1319 dev->netdev_ops = &ipgre_netdev_ops;
1321 dev->init = ipgre_tunnel_init;
1322 dev->uninit = ipgre_tunnel_uninit;
1323 dev->hard_start_xmit = ipgre_tunnel_xmit;
1324 #ifndef HAVE_NETDEV_STATS
1325 dev->get_stats = ipgre_tunnel_get_stats;
1327 dev->do_ioctl = ipgre_tunnel_ioctl;
1328 dev->change_mtu = ipgre_tunnel_change_mtu;
1329 #endif /* HAVE_NET_DEVICE_OPS */
1330 dev->destructor = free_netdev;
1332 dev->type = ARPHRD_IPGRE;
1333 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1334 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1336 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1338 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1339 dev->flags = IFF_NOARP;
1342 dev->features |= NETIF_F_NETNS_LOCAL;
1343 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1346 static int ipgre_tunnel_init(struct net_device *dev)
1348 struct ip_tunnel *tunnel;
1351 tunnel = netdev_priv(dev);
1352 iph = &tunnel->parms.iph;
1355 strcpy(tunnel->parms.name, dev->name);
1357 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1358 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1361 #ifdef CONFIG_NET_IPGRE_BROADCAST
1362 if (ipv4_is_multicast(iph->daddr)) {
1365 dev->flags = IFF_BROADCAST;
1366 #ifdef HAVE_NETDEV_HEADER_OPS
1367 dev->header_ops = &ipgre_header_ops;
1369 dev->hard_header = ipgre_header;
1370 dev->hard_header_parse = ipgre_header_parse;
1372 #ifndef HAVE_NET_DEVICE_OPS
1373 dev->open = ipgre_open;
1374 dev->stop = ipgre_close;
1379 #ifdef HAVE_NETDEV_HEADER_OPS
1380 dev->header_ops = &ipgre_header_ops;
1382 dev->hard_header = ipgre_header;
1383 dev->hard_header_parse = ipgre_header_parse;
1390 #ifdef HAVE_NET_DEVICE_OPS
1391 static void ipgre_fb_tunnel_init(struct net_device *dev)
1393 static int ipgre_fb_tunnel_init(struct net_device *dev)
1396 struct ip_tunnel *tunnel = netdev_priv(dev);
1397 struct iphdr *iph = &tunnel->parms.iph;
1398 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1401 strcpy(tunnel->parms.name, dev->name);
1404 iph->protocol = IPPROTO_GRE;
1406 tunnel->hlen = sizeof(struct iphdr) + 4;
1409 ign->tunnels_wc[0] = tunnel;
1411 #ifndef HAVE_NET_DEVICE_OPS
1416 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
1417 static struct net_protocol ipgre_protocol = {
1419 static const struct net_protocol ipgre_protocol = {
1421 .handler = ipgre_rcv,
1422 .err_handler = ipgre_err,
1423 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
1428 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1432 for (prio = 0; prio < 4; prio++) {
1434 for (h = 0; h < HASH_SIZE; h++) {
1435 struct ip_tunnel *t;
1436 while ((t = ign->tunnels[prio][h]) != NULL)
1437 unregister_netdevice(t->dev);
1442 static int ipgre_init_net(struct net *net)
1445 struct ipgre_net *ign;
1448 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1452 err = net_assign_generic(net, ipgre_net_id, ign);
1456 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), GRE_IOCTL_DEVICE,
1457 ipgre_tunnel_setup);
1458 if (!ign->fb_tunnel_dev) {
1462 dev_net_set(ign->fb_tunnel_dev, net);
1464 #ifdef HAVE_NET_DEVICE_OPS
1465 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1467 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1469 #ifndef GRE_IOCTL_ONLY
1470 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1473 if ((err = register_netdev(ign->fb_tunnel_dev)))
1479 free_netdev(ign->fb_tunnel_dev);
1488 static void ipgre_exit_net(struct net *net)
1490 struct ipgre_net *ign;
1492 ign = net_generic(net, ipgre_net_id);
1494 ipgre_destroy_tunnels(ign);
1499 static struct pernet_operations ipgre_net_ops = {
1500 .init = ipgre_init_net,
1501 .exit = ipgre_exit_net,
1504 static int ipgre_tap_init(struct net_device *dev)
1506 struct ip_tunnel *tunnel;
1508 tunnel = netdev_priv(dev);
1511 strcpy(tunnel->parms.name, dev->name);
1513 ipgre_tunnel_bind_dev(dev);
1518 #ifdef HAVE_NET_DEVICE_OPS
1519 static const struct net_device_ops ipgre_tap_netdev_ops = {
1520 .ndo_init = ipgre_tap_init,
1521 .ndo_uninit = ipgre_tunnel_uninit,
1522 .ndo_start_xmit = ipgre_tunnel_xmit,
1523 .ndo_set_mac_address = eth_mac_addr,
1524 .ndo_validate_addr = eth_validate_addr,
1525 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1526 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1530 static void ipgre_tap_setup(struct net_device *dev)
1534 #ifdef HAVE_NET_DEVICE_OPS
1535 dev->netdev_ops = &ipgre_tap_netdev_ops;
1537 dev->init = ipgre_tap_init;
1538 dev->uninit = ipgre_tunnel_uninit;
1539 dev->hard_start_xmit = ipgre_tunnel_xmit;
1540 #ifndef HAVE_NETDEV_STATS
1541 dev->get_stats = ipgre_tunnel_get_stats;
1543 dev->do_ioctl = ipgre_tunnel_ioctl;
1544 dev->change_mtu = ipgre_tunnel_change_mtu;
1545 #endif /* HAVE_NET_DEVICE_OPS */
1546 dev->destructor = free_netdev;
1549 dev->features |= NETIF_F_NETNS_LOCAL;
1552 #ifndef GRE_IOCTL_ONLY
1553 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1561 if (data[IFLA_GRE_IFLAGS])
1562 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1563 if (data[IFLA_GRE_OFLAGS])
1564 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1565 if (flags & (GRE_VERSION|GRE_ROUTING))
1571 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1575 if (tb[IFLA_ADDRESS]) {
1576 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1578 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1579 return -EADDRNOTAVAIL;
1585 if (data[IFLA_GRE_REMOTE]) {
1586 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1592 return ipgre_tunnel_validate(tb, data);
1595 static void ipgre_netlink_parms(struct nlattr *data[],
1596 struct ip_tunnel_parm *parms)
1598 memset(parms, 0, sizeof(*parms));
1600 parms->iph.protocol = IPPROTO_GRE;
1605 if (data[IFLA_GRE_LINK])
1606 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1608 if (data[IFLA_GRE_IFLAGS])
1609 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1611 if (data[IFLA_GRE_OFLAGS])
1612 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1614 if (data[IFLA_GRE_IKEY])
1615 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1617 if (data[IFLA_GRE_OKEY])
1618 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1620 if (data[IFLA_GRE_LOCAL])
1621 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1623 if (data[IFLA_GRE_REMOTE])
1624 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1626 if (data[IFLA_GRE_TTL])
1627 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1629 if (data[IFLA_GRE_TOS])
1630 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1632 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1633 parms->iph.frag_off = htons(IP_DF);
1636 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1637 struct nlattr *data[])
1639 struct ip_tunnel *nt;
1640 struct net *net = dev_net(dev);
1641 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1645 nt = netdev_priv(dev);
1646 ipgre_netlink_parms(data, &nt->parms);
1648 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1651 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1652 random_ether_addr(dev->dev_addr);
1654 mtu = ipgre_tunnel_bind_dev(dev);
1658 err = register_netdevice(dev);
1663 ipgre_tunnel_link(ign, nt);
1669 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1670 struct nlattr *data[])
1672 struct ip_tunnel *t, *nt;
1673 struct net *net = dev_net(dev);
1674 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1675 struct ip_tunnel_parm p;
1678 if (dev == ign->fb_tunnel_dev)
1681 nt = netdev_priv(dev);
1682 ipgre_netlink_parms(data, &p);
1684 t = ipgre_tunnel_locate(net, &p, false, 0);
1692 if (dev->type != ARPHRD_ETHER) {
1693 unsigned nflags = 0;
1695 if (ipv4_is_multicast(p.iph.daddr))
1696 nflags = IFF_BROADCAST;
1697 else if (p.iph.daddr)
1698 nflags = IFF_POINTOPOINT;
1700 if ((dev->flags ^ nflags) &
1701 (IFF_POINTOPOINT | IFF_BROADCAST))
1705 ipgre_tunnel_unlink(ign, t);
1706 t->parms.iph.saddr = p.iph.saddr;
1707 t->parms.iph.daddr = p.iph.daddr;
1708 t->parms.i_key = p.i_key;
1709 if (dev->type != ARPHRD_ETHER) {
1710 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1711 memcpy(dev->broadcast, &p.iph.daddr, 4);
1713 ipgre_tunnel_link(ign, t);
1714 netdev_state_change(dev);
1717 t->parms.o_key = p.o_key;
1718 t->parms.iph.ttl = p.iph.ttl;
1719 t->parms.iph.tos = p.iph.tos;
1720 t->parms.iph.frag_off = p.iph.frag_off;
1722 if (t->parms.link != p.link) {
1723 t->parms.link = p.link;
1724 mtu = ipgre_tunnel_bind_dev(dev);
1727 netdev_state_change(dev);
1733 static size_t ipgre_get_size(const struct net_device *dev)
1738 /* IFLA_GRE_IFLAGS */
1740 /* IFLA_GRE_OFLAGS */
1746 /* IFLA_GRE_LOCAL */
1748 /* IFLA_GRE_REMOTE */
1754 /* IFLA_GRE_PMTUDISC */
1759 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1761 struct ip_tunnel *t = netdev_priv(dev);
1762 struct ip_tunnel_parm *p = &t->parms;
1764 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1765 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1766 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1767 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1768 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1769 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1770 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1771 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1772 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1773 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1781 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1782 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1783 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1784 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1785 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1786 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1787 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1788 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1789 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1790 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1791 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1794 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1796 .maxtype = IFLA_GRE_MAX,
1797 .policy = ipgre_policy,
1798 .priv_size = sizeof(struct ip_tunnel),
1799 .setup = ipgre_tunnel_setup,
1800 .validate = ipgre_tunnel_validate,
1801 .newlink = ipgre_newlink,
1802 .changelink = ipgre_changelink,
1803 .get_size = ipgre_get_size,
1804 .fill_info = ipgre_fill_info,
1807 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1809 .maxtype = IFLA_GRE_MAX,
1810 .policy = ipgre_policy,
1811 .priv_size = sizeof(struct ip_tunnel),
1812 .setup = ipgre_tap_setup,
1813 .validate = ipgre_tap_validate,
1814 .newlink = ipgre_newlink,
1815 .changelink = ipgre_changelink,
1816 .get_size = ipgre_get_size,
1817 .fill_info = ipgre_fill_info,
1822 * And now the modules code and kernel interface.
1825 static int __init ipgre_init(void)
1829 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1831 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1832 printk(KERN_INFO "ipgre init: can't add protocol\n");
1836 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1838 goto gen_device_failed;
1840 #ifndef GRE_IOCTL_ONLY
1841 err = rtnl_link_register(&ipgre_link_ops);
1843 goto rtnl_link_failed;
1845 err = rtnl_link_register(&ipgre_tap_ops);
1847 goto tap_ops_failed;
1853 #ifndef GRE_IOCTL_ONLY
1855 rtnl_link_unregister(&ipgre_link_ops);
1857 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1860 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1865 static void __exit ipgre_fini(void)
1867 #ifndef GRE_IOCTL_ONLY
1868 rtnl_link_unregister(&ipgre_tap_ops);
1869 rtnl_link_unregister(&ipgre_link_ops);
1871 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1872 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1873 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1876 module_init(ipgre_init);
1877 module_exit(ipgre_fini);
1878 MODULE_DESCRIPTION("GRE over IPv4 tunneling driver");
1879 MODULE_LICENSE("GPL");
1880 #ifndef GRE_IOCTL_ONLY
1881 MODULE_ALIAS_RTNL_LINK("gre");
1882 MODULE_ALIAS_RTNL_LINK("gretap");