[IPV6]: Remove sockets from ip6_tunnel.c

35a9a271 · Ville Nuorvala · David S. Miller · 71b1bb03 · 35a9a271 · 35a9a271
Commit 35a9a271 authored Aug 31, 2003 by Ville Nuorvala Committed by David S. Miller Aug 31, 2003
Hide whitespace changes
Inline Side-by-side

Showing with 130 additions and 229 deletions

include/net/ip6_tunnel.h include/net/ip6_tunnel.h +2 -0

net/ipv6/ip6_tunnel.c net/ipv6/ip6_tunnel.c +127 -229

net/ipv6/ipv6_syms.c net/ipv6/ipv6_syms.c +1 -0

No files found.
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -25,6 +25,8 @@ struct ip6_tnl {
 	int recursion;		/* depth of hard_start_xmit recursion */
 	struct ip6_tnl_parm parms;	/* tunnel configuration paramters */
 	struct flowi fl;	/* flowi template for xmit */
+	struct dst_entry *dst_cache;    /* cached dst */
+	u32 dst_cookie;
 };

 /* Tunnel encapsulation limit destination sub-option */

--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -23,7 +23,6 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/if.h>
 #include <linux/in.h>
@@ -37,12 +36,12 @@
 #include <linux/init.h>
 #include <linux/route.h>
 #include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>

 #include <asm/uaccess.h>
 #include <asm/atomic.h>

 #include <net/ip.h>
-#include <net/sock.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/ip6_route.h>
@@ -63,22 +62,6 @@ MODULE_LICENSE("GPL");

 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)

-/* socket(s) used by ip6ip6_tnl_xmit() for resending packets */
-static struct socket *__ip6_socket[NR_CPUS];
-#define ip6_socket __ip6_socket[smp_processor_id()]
-
-static void ip6_xmit_lock(void)
-{
-	local_bh_disable();
-	if (unlikely(!spin_trylock(&ip6_socket->sk->sk_lock.slock)))
-		BUG();
-}
-
-static void ip6_xmit_unlock(void)
-{
-	spin_unlock_bh(&ip6_socket->sk->sk_lock.slock);
-}
-
 #define HASH_SIZE  32

 #define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
@@ -101,6 +84,33 @@ static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
 /* lock for the tunnel lists */
 static rwlock_t ip6ip6_lock = RW_LOCK_UNLOCKED;

+static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+{
+	struct dst_entry *dst = t->dst_cache;
+
+	if (dst && dst->obsolete && 
+	    dst->ops->check(dst, t->dst_cookie) == NULL) {
+		t->dst_cache = NULL;
+		return NULL;
+	}
+
+	return dst;
+}
+
+static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+{
+	dst_release(t->dst_cache);
+	t->dst_cache = NULL;
+}
+
+static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+	dst_release(t->dst_cache);
+	t->dst_cache = dst;
+}
+
 /**
 * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 *   @remote: the address of the tunnel exit-point 
@@ -294,13 +304,16 @@ ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
 static void
 ip6ip6_tnl_dev_uninit(struct net_device *dev)
 {
+	struct ip6_tnl *t = dev->priv;
+
 	if (dev == ip6ip6_fb_tnl_dev) {
 		write_lock_bh(&ip6ip6_lock);
 		tnls_wc[0] = NULL;
 		write_unlock_bh(&ip6ip6_lock);
 	} else {
-		ip6ip6_tnl_unlink((struct ip6_tnl *) dev->priv);
+		ip6ip6_tnl_unlink(t);
 	}
+	ip6_tnl_dst_reset(t);
 	dev_put(dev);
 }

@@ -522,112 +535,34 @@ int ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 	return 0;
 }

-/**
- * txopt_len - get necessary size for new &struct ipv6_txoptions
- *   @orig_opt: old options
- *
- * Return:
- *   Size of old one plus size of tunnel encapsulation limit option
- **/
-
-static inline int
-txopt_len(struct ipv6_txoptions *orig_opt)
-{
-	int len = sizeof (*orig_opt) + 8;
-
-	if (orig_opt && orig_opt->dst0opt)
-		len += ipv6_optlen(orig_opt->dst0opt);
-	return len;
-}
-
-/**
- * merge_options - add encapsulation limit to original options
- *   @encap_limit: number of allowed encapsulation limits
- *   @orig_opt: original options
- * 
- * Return:
- *   Pointer to new &struct ipv6_txoptions containing the tunnel
- *   encapsulation limit
- **/
-
-static struct ipv6_txoptions *
-merge_options(struct sock *sk, __u8 encap_limit,
-	      struct ipv6_txoptions *orig_opt)
+static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
 {
 	struct ipv6_tlv_tnl_enc_lim *tel;
 	struct ipv6_txoptions *opt;
 	__u8 *raw;
-	__u8 pad_to = 8;
-	int opt_len = txopt_len(orig_opt);

-	if (!(opt = sock_kmalloc(sk, opt_len, GFP_ATOMIC))) {
+	int opt_len = sizeof(*opt) + 8;
+
+	if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
 		return NULL;
 	}
-
 	memset(opt, 0, opt_len);
 	opt->tot_len = opt_len;
 	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
 	opt->opt_nflen = 8;

-	raw = (__u8 *) opt->dst0opt;
-
 	tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
 	tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
 	tel->length = 1;
 	tel->encap_limit = encap_limit;

-	if (orig_opt) {
-		__u8 *orig_raw;
-
-		opt->hopopt = orig_opt->hopopt;
-
-		/* Keep the original destination options properly
-		   aligned and merge possible old paddings to the
-		   new padding option */
-		if ((orig_raw = (__u8 *) orig_opt->dst0opt) != NULL) {
-			__u8 type;
-			int i = sizeof (struct ipv6_opt_hdr);
-			pad_to += sizeof (struct ipv6_opt_hdr);
-			while (i < ipv6_optlen(orig_opt->dst0opt)) {
-				type = orig_raw[i++];
-				if (type == IPV6_TLV_PAD0)
-					pad_to++;
-				else if (type == IPV6_TLV_PADN) {
-					int len = orig_raw[i++];
-					i += len;
-					pad_to += len + 2;
-				} else {
-					break;
-				}
-			}
-			opt->dst0opt->hdrlen = orig_opt->dst0opt->hdrlen + 1;
-			memcpy(raw + pad_to, orig_raw + pad_to - 8,
-			       opt_len - sizeof (*opt) - pad_to);
-		}
-		opt->srcrt = orig_opt->srcrt;
-		opt->opt_nflen += orig_opt->opt_nflen;
-
-		opt->dst1opt = orig_opt->dst1opt;
-		opt->auth = orig_opt->auth;
-		opt->opt_flen = orig_opt->opt_flen;
-	}
+	raw = (__u8 *) opt->dst0opt;
 	raw[5] = IPV6_TLV_PADN;
-
-	/* subtract lengths of destination suboption header,
-	   tunnel encapsulation limit and pad N header */
-	raw[6] = pad_to - 7;
+	raw[6] = 1;

 	return opt;
 }

-static int 
-ip6ip6_getfrag(void *from, char *to, int offset, int len, int odd, 
-		struct sk_buff *skb)
-{
-	memcpy(to, (char *) from + offset, len);
-	return 0;
-}
-
 /**
 * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
 *   @t: the outgoing tunnel device
@@ -655,7 +590,7 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
 *
 * Description:
 *   Build new header and do some sanity checks on the packet before sending
- *   it to ip6_build_xmit().
+ *   it.
 *
 * Return: 
 *   0
@@ -666,18 +601,17 @@ int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
 	struct net_device_stats *stats = &t->stat;
 	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
-	struct ipv6_txoptions *orig_opt = NULL;
 	struct ipv6_txoptions *opt = NULL;
 	int encap_limit = -1;
 	__u16 offset;
 	struct flowi fl;
-	struct ip6_flowlabel *fl_lbl = NULL;
-	int err = 0;
 	struct dst_entry *dst;
-	int link_failure = 0;
-	struct sock *sk = ip6_socket->sk;
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net_device *tdev;
 	int mtu;
+	int max_headroom = sizeof(struct ipv6hdr);
+	u8 proto;
+	int err;
+	int pkt_len;

 	if (t->recursion++) {
 		stats->collisions++;
@@ -700,58 +634,39 @@ int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
 		encap_limit = t->parms.encap_limit;
 	}
-	ip6_xmit_lock();
-
 	memcpy(&fl, &t->fl, sizeof (fl));
+	proto = fl.proto;

 	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
 		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
 	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
 		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);

-	if (fl.fl6_flowlabel) {
-		fl_lbl = fl6_sock_lookup(sk, fl.fl6_flowlabel);
-		if (fl_lbl)
-			orig_opt = fl_lbl->opt;
-	}
-	if (encap_limit >= 0) {
-		if (!(opt = merge_options(sk, encap_limit, orig_opt))) {
-			goto tx_err_free_fl_lbl;
-		}
-	} else {
-		opt = orig_opt;
-	}
-	dst = __sk_dst_check(sk, np->dst_cookie);
+	if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
+		goto tx_err;

-	if (dst) {
-		if (np->daddr_cache == NULL ||
-		    ipv6_addr_cmp(&fl.fl6_dst, np->daddr_cache) ||
-		    (fl.oif && fl.oif != dst->dev->ifindex)) {
-			dst = NULL;
-		}
-	}
-	if (dst == NULL) {
-		dst = ip6_route_output(sk, &fl);
-		if (dst->error) {
-			stats->tx_carrier_errors++;
-			link_failure = 1;
-			goto tx_err_dst_release;
-		}
-		/* local routing loop */
-		if (dst->dev == dev) {
-			stats->collisions++;
-			if (net_ratelimit())
-				printk(KERN_WARNING 
-				       "%s: Local routing loop detected!\n",
-				       t->parms.name);
-			goto tx_err_dst_release;
-		}
-		ipv6_addr_copy(&np->daddr, &fl.fl6_dst);
-		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+	if ((dst = ip6_tnl_dst_check(t)) != NULL)
+		dst_hold(dst);
+	else
+		dst = ip6_route_output(NULL, &fl);
+
+	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+		goto tx_err_link_failure;
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		if (net_ratelimit())
+			printk(KERN_WARNING 
+			       "%s: Local routing loop detected!\n",
+			       t->parms.name);
+		goto tx_err_dst_release;
 	}
 	mtu = dst_pmtu(dst) - sizeof (*ipv6h);
 	if (opt) {
-		mtu -= (opt->opt_nflen + opt->opt_flen);
+		max_headroom += 8;
+		mtu -= 8;
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
@@ -764,41 +679,71 @@ int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
 		goto tx_err_dst_release;
 	}
-	err = ip6_append_data(sk, ip6ip6_getfrag, skb->nh.raw, skb->len, 0,
-			      t->parms.hop_limit, opt, &fl, 
-			      (struct rt6_info *)dst, MSG_DONTWAIT);
+	skb->h.raw = skb->nh.raw;

-	if (err) {
-		ip6_flush_pending_frames(sk);
-	} else {
-		err = ip6_push_pending_frames(sk);
-		err = (err < 0 ? err : 0);
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom += LL_RESERVED_SPACE(tdev);
+	
+	if (skb_headroom(skb) < max_headroom || 
+	    skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb;
+		
+		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
+			goto tx_err_dst_release;
+
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		kfree_skb(skb);
+		skb = new_skb;
 	}
-	if (!err) {
-		stats->tx_bytes += skb->len;
+	dst_release(skb->dst);
+	skb->dst = dst_clone(dst);
+
+	if (opt)
+		ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
+
+	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
+	ipv6h = skb->nh.ipv6h;
+	*(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->nexthdr = proto;
+	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
+	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+	pkt_len = skb->len;
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 
+		      skb->dst->dev, dst_output);
+
+	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
+		stats->tx_bytes += pkt_len;
 		stats->tx_packets++;
 	} else {
 		stats->tx_errors++;
 		stats->tx_aborted_errors++;
 	}
-	if (opt && opt != orig_opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
+	ip6_tnl_dst_store(t, dst);
+
+	if (opt)
+		kfree(opt);

-	fl6_sock_release(fl_lbl);
-	ip6_dst_store(sk, dst, &np->daddr);
-	ip6_xmit_unlock();
-	kfree_skb(skb);
 	t->recursion--;
 	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
 tx_err_dst_release:
 	dst_release(dst);
-	if (opt && opt != orig_opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
-tx_err_free_fl_lbl:
-	fl6_sock_release(fl_lbl);
-	ip6_xmit_unlock();
-	if (link_failure)
-		dst_link_failure(skb);
+	if (opt)
+		kfree(opt);
 tx_err:
 	stats->tx_errors++;
 	stats->tx_dropped++;
@@ -850,13 +795,12 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
 	struct ip6_tnl_parm *p = &t->parms;
-	struct flowi *fl;
+	struct flowi *fl = &t->fl;

 	memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
 	memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr));

 	/* Set up flowi template */
-	fl = &t->fl;
 	ipv6_addr_copy(&fl->fl6_src, &p->laddr);
 	ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
 	fl->oif = p->link;
@@ -881,10 +825,7 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
 		if (rt == NULL)
 			return;

-		/* as long as tunnels use the same socket for transmission,
-		   locally nested tunnels won't work */
-		
-		if (rt->rt6i_dev && rt->rt6i_dev->type != ARPHRD_TUNNEL6) {
+		if (rt->rt6i_dev) {
 			dev->iflink = rt->rt6i_dev->ifindex;

 			dev->hard_header_len = rt->rt6i_dev->hard_header_len +
@@ -1139,7 +1080,7 @@ ip6ip6_tnl_dev_init(struct net_device *dev)
 int ip6ip6_fb_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = dev->priv;
-	ip6ip6_tnl_dev_init_gen(dev); 
+	ip6ip6_tnl_dev_init_gen(dev);
 	dev_hold(dev);
 	tnls_wc[0] = t;
 	return 0;
@@ -1159,61 +1100,28 @@ static struct inet6_protocol ip6ip6_protocol = {

 int __init ip6_tunnel_init(void)
 {
-	int i, j, err;
-	struct sock *sk;
-	struct ipv6_pinfo *np;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i))
-			continue;
-
-		err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_IPV6, 
-				  &__ip6_socket[i]);
-		if (err < 0) {
-			printk(KERN_ERR 
-			       "Failed to create the IPv6 tunnel socket "
-			       "(err %d).\n", 
-			       err);
-			goto fail;
-		}
-		sk = __ip6_socket[i]->sk;
-		sk->sk_allocation = GFP_ATOMIC;
-
-		np = inet6_sk(sk);
-		np->hop_limit = 255;
-		np->mc_loop = 0;
+	int  err;

-		sk->sk_prot->unhash(sk);
-	}
 	if ((err = inet6_add_protocol(&ip6ip6_protocol, IPPROTO_IPV6)) < 0) {
 		printk(KERN_ERR "Failed to register IPv6 protocol\n");
-		goto fail;
+		return err;
 	}
-
-	
 	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 					 ip6ip6_tnl_dev_setup);

 	if (!ip6ip6_fb_tnl_dev) {
 		err = -ENOMEM;
-		goto tnl_fail;
+		goto fail;
 	}
 	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;

 	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
 		kfree(ip6ip6_fb_tnl_dev);
-		goto tnl_fail;
+		goto fail;
 	}
 	return 0;
-tnl_fail:
-	inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6);
 fail:
-	for (j = 0; j < i; j++) {
-		if (!cpu_possible(j))
-			continue;
-		sock_release(__ip6_socket[j]);
-		__ip6_socket[j] = NULL;
-	}
+	inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6);
 	return err;
 }

@@ -1223,18 +1131,8 @@ int __init ip6_tunnel_init(void)

 void ip6_tunnel_cleanup(void)
 {
-	int i;
-
 	unregister_netdev(ip6ip6_fb_tnl_dev);
-
 	inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6);
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i))
-			continue;
-		sock_release(__ip6_socket[i]);
-		__ip6_socket[i] = NULL;
-	}
 }

 #ifdef MODULE

--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -45,3 +45,4 @@ EXPORT_SYMBOL(ipv6_ext_hdr);
 EXPORT_SYMBOL(ip6_append_data);
 EXPORT_SYMBOL(ip6_flush_pending_frames);
 EXPORT_SYMBOL(ip6_push_pending_frames);
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);