summary refs log tree commit diff
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-18 14:40:30 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-18 14:40:30 -0700
commita57793651ff1a09ef18bade998632435ca2dc13f (patch)
treefffc839d7b001f196421f09f0a06491588835fe1 /net/ipv6
parent9cf52b2921fbe62566b6b2ee79f71203749c9e5e (diff)
parent52f095ee88d8851866bc7694ab991ca5abf21d5e (diff)
downloadlinux-a57793651ff1a09ef18bade998632435ca2dc13f.tar.gz
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (51 commits)
  [IPV6]: Fix again the fl6_sock_lookup() fixed locking
  [NETFILTER]: nf_conntrack_tcp: fix connection reopening fix
  [IPV6]: Fix race in ipv6_flowlabel_opt() when inserting two labels
  [IPV6]: Lost locking in fl6_sock_lookup
  [IPV6]: Lost locking when inserting a flowlabel in ipv6_fl_list
  [NETFILTER]: xt_sctp: fix mistake to pass a pointer where array is required
  [NET]: Fix OOPS due to missing check in dev_parse_header().
  [TCP]: Remove lost_retrans zero seqno special cases
  [NET]: fix carrier-on bug?
  [NET]: Fix uninitialised variable in ip_frag_reasm()
  [IPSEC]: Rename mode to outer_mode and add inner_mode
  [IPSEC]: Disallow combinations of RO and AH/ESP/IPCOMP
  [IPSEC]: Use the top IPv4 route's peer instead of the bottom
  [IPSEC]: Store afinfo pointer in xfrm_mode
  [IPSEC]: Add missing BEET checks
  [IPSEC]: Move type and mode map into xfrm_state.c
  [IPSEC]: Fix length check in xfrm_parse_spi
  [IPSEC]: Move ip_summed zapping out of xfrm6_rcv_spi
  [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi
  [IPSEC]: Move tunnel parsing for IPv4 out of xfrm4_input
  ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ah6.c11
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/ip6_flowlabel.c57
-rw-r--r--net/ipv6/ipcomp6.c9
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c112
-rw-r--r--net/ipv6/reassembly.c131
-rw-r--r--net/ipv6/xfrm6_input.c14
-rw-r--r--net/ipv6/xfrm6_mode_beet.c1
-rw-r--r--net/ipv6/xfrm6_mode_ro.c9
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c17
-rw-r--r--net/ipv6/xfrm6_state.c7
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
16 files changed, 146 insertions, 249 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 52d10d213217..348bd8d06112 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -255,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
-	int err = -ENOMEM;
-
-	if (!idev || !idev->dev)
-		return -EINVAL;
-
 	if (snmp_mib_init((void **)idev->stats.ipv6,
 			  sizeof(struct ipstats_mib),
 			  __alignof__(struct ipstats_mib)) < 0)
@@ -280,15 +275,14 @@ err_icmpmsg:
 err_icmp:
 	snmp_mib_free((void **)idev->stats.ipv6);
 err_ip:
-	return err;
+	return -ENOMEM;
 }
 
-static int snmp6_free_dev(struct inet6_dev *idev)
+static void snmp6_free_dev(struct inet6_dev *idev)
 {
 	snmp_mib_free((void **)idev->stats.icmpv6msg);
 	snmp_mib_free((void **)idev->stats.icmpv6);
 	snmp_mib_free((void **)idev->stats.ipv6);
-	return 0;
 }
 
 /* Nobody refers to this device, we may destroy it. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bc929381fa46..1b1caf3aa1c1 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -747,6 +747,7 @@ static void cleanup_ipv6_mibs(void)
 {
 	snmp_mib_free((void **)ipv6_statistics);
 	snmp_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)icmpv6msg_statistics);
 	snmp_mib_free((void **)udp_stats_in6);
 	snmp_mib_free((void **)udplite_stats_in6);
 }
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f9f689162692..67cd06613a25 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -344,6 +344,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		goto out;
 
+	skb->ip_summed = CHECKSUM_NONE;
+
 	hdr_len = skb->data - skb_network_header(skb);
 	ah = (struct ip_auth_hdr *)skb->data;
 	ahp = x->data;
@@ -475,8 +477,15 @@ static int ah6_init_state(struct xfrm_state *x)
 
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
 					  ahp->icv_trunc_len);
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 	x->data = ahp;
 
 	return 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9eb928598351..b0715432e454 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -354,8 +354,15 @@ static int esp6_init_state(struct xfrm_state *x)
 				    (x->ealg->alg_key_len + 7) / 8))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 	x->data = esp;
 	return 0;
 
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 217d60f9fc80..b12cc22e7745 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -154,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy)
 	write_unlock(&ip6_fl_lock);
 }
 
-static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
+static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label)
 {
+	struct ip6_flowlabel *lfl;
+
 	fl->label = label & IPV6_FLOWLABEL_MASK;
 
 	write_lock_bh(&ip6_fl_lock);
@@ -163,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
 		for (;;) {
 			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
 			if (fl->label) {
-				struct ip6_flowlabel *lfl;
 				lfl = __fl_lookup(fl->label);
 				if (lfl == NULL)
 					break;
 			}
 		}
+	} else {
+		/*
+		 * we dropper the ip6_fl_lock, so this entry could reappear
+		 * and we need to recheck with it.
+		 *
+		 * OTOH no need to search the active socket first, like it is
+		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
+		 * with the same label can only appear on another sock
+		 */
+		lfl = __fl_lookup(fl->label);
+		if (lfl != NULL) {
+			atomic_inc(&lfl->users);
+			write_unlock_bh(&ip6_fl_lock);
+			return lfl;
+		}
 	}
 
 	fl->lastuse = jiffies;
@@ -176,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
 	fl_ht[FL_HASH(fl->label)] = fl;
 	atomic_inc(&fl_size);
 	write_unlock_bh(&ip6_fl_lock);
-	return 0;
+	return NULL;
 }
 
 
@@ -190,14 +206,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
+	read_lock_bh(&ip6_sk_fl_lock);
 	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
+			read_unlock_bh(&ip6_sk_fl_lock);
 			return fl;
 		}
 	}
+	read_unlock_bh(&ip6_sk_fl_lock);
 	return NULL;
 }
 
@@ -409,6 +428,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 	return 0;
 }
 
+static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
+		struct ip6_flowlabel *fl)
+{
+	write_lock_bh(&ip6_sk_fl_lock);
+	sfl->fl = fl;
+	sfl->next = np->ipv6_fl_list;
+	np->ipv6_fl_list = sfl;
+	write_unlock_bh(&ip6_sk_fl_lock);
+}
+
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 {
 	int err;
@@ -416,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 	struct in6_flowlabel_req freq;
 	struct ipv6_fl_socklist *sfl1=NULL;
 	struct ipv6_fl_socklist *sfl, **sflp;
-	struct ip6_flowlabel *fl;
+	struct ip6_flowlabel *fl, *fl1 = NULL;
+
 
 	if (optlen < sizeof(freq))
 		return -EINVAL;
@@ -472,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
 
 		if (freq.flr_label) {
-			struct ip6_flowlabel *fl1 = NULL;
-
 			err = -EEXIST;
 			read_lock_bh(&ip6_sk_fl_lock);
 			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
@@ -492,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 			if (fl1 == NULL)
 				fl1 = fl_lookup(freq.flr_label);
 			if (fl1) {
+recheck:
 				err = -EEXIST;
 				if (freq.flr_flags&IPV6_FL_F_EXCL)
 					goto release;
@@ -513,11 +542,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					fl1->linger = fl->linger;
 				if ((long)(fl->expires - fl1->expires) > 0)
 					fl1->expires = fl->expires;
-				write_lock_bh(&ip6_sk_fl_lock);
-				sfl1->fl = fl1;
-				sfl1->next = np->ipv6_fl_list;
-				np->ipv6_fl_list = sfl1;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				fl_link(np, sfl1, fl1);
 				fl_free(fl);
 				return 0;
 
@@ -534,9 +559,9 @@ release:
 		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
 			goto done;
 
-		err = fl_intern(fl, freq.flr_label);
-		if (err)
-			goto done;
+		fl1 = fl_intern(fl, freq.flr_label);
+		if (fl1 != NULL)
+			goto recheck;
 
 		if (!freq.flr_label) {
 			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
@@ -545,9 +570,7 @@ release:
 			}
 		}
 
-		sfl1->fl = fl;
-		sfl1->next = np->ipv6_fl_list;
-		np->ipv6_fl_list = sfl1;
+		fl_link(np, sfl1, fl);
 		return 0;
 
 	default:
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 28fc8edfdc3a..80ef2a1d39fd 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -411,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 
 	mutex_lock(&ipcomp6_resource_mutex);
 	if (!ipcomp6_alloc_scratches())
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 726fafd41961..e170c67c47a5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -130,22 +130,6 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 	kfree_skb(skb);
 }
 
-static void nf_frag_free(struct inet_frag_queue *q)
-{
-	kfree(container_of(q, struct nf_ct_frag6_queue, q));
-}
-
-static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
-{
-	struct nf_ct_frag6_queue *fq;
-
-	fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
-	if (fq == NULL)
-		return NULL;
-	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
-	return fq;
-}
-
 /* Destruction primitives. */
 
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
@@ -168,7 +152,10 @@ static void nf_ct_frag6_evictor(void)
 
 static void nf_ct_frag6_expire(unsigned long data)
 {
-	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
+	struct nf_ct_frag6_queue *fq;
+
+	fq = container_of((struct inet_frag_queue *)data,
+			struct nf_ct_frag6_queue, q);
 
 	spin_lock(&fq->q.lock);
 
@@ -184,89 +171,29 @@ out:
 
 /* Creation primitives. */
 
-static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
-					  struct nf_ct_frag6_queue *fq_in)
+static __inline__ struct nf_ct_frag6_queue *
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 {
-	struct nf_ct_frag6_queue *fq;
-#ifdef CONFIG_SMP
-	struct hlist_node *n;
-#endif
-
-	write_lock(&nf_frags.lock);
-#ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
-		if (fq->id == fq_in->id &&
-		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			write_unlock(&nf_frags.lock);
-			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in);
-			return fq;
-		}
-	}
-#endif
-	fq = fq_in;
-
-	if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
-		atomic_inc(&fq->q.refcnt);
-
-	atomic_inc(&fq->q.refcnt);
-	hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
-	INIT_LIST_HEAD(&fq->q.lru_list);
-	list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
-	nf_frags.nqueues++;
-	write_unlock(&nf_frags.lock);
-	return fq;
-}
+	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
+	unsigned int hash;
 
+	arg.id = id;
+	arg.src = src;
+	arg.dst = dst;
+	hash = ip6qhashfn(id, src, dst);
 
-static struct nf_ct_frag6_queue *
-nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   struct in6_addr *dst)
-{
-	struct nf_ct_frag6_queue *fq;
-
-	if ((fq = frag_alloc_queue()) == NULL) {
-		pr_debug("Can't alloc new queue\n");
+	q = inet_frag_find(&nf_frags, &arg, hash);
+	if (q == NULL)
 		goto oom;
-	}
-
-	fq->id = id;
-	ipv6_addr_copy(&fq->saddr, src);
-	ipv6_addr_copy(&fq->daddr, dst);
-
-	setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
-	spin_lock_init(&fq->q.lock);
-	atomic_set(&fq->q.refcnt, 1);
 
-	return nf_ct_frag6_intern(hash, fq);
+	return container_of(q, struct nf_ct_frag6_queue, q);
 
 oom:
+	pr_debug("Can't alloc new queue\n");
 	return NULL;
 }
 
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
-{
-	struct nf_ct_frag6_queue *fq;
-	struct hlist_node *n;
-	unsigned int hash = ip6qhashfn(id, src, dst);
-
-	read_lock(&nf_frags.lock);
-	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
-		if (fq->id == id &&
-		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			read_unlock(&nf_frags.lock);
-			return fq;
-		}
-	}
-	read_unlock(&nf_frags.lock);
-
-	return nf_ct_frag6_create(hash, id, src, dst);
-}
-
 
 static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			     struct frag_hdr *fhdr, int nhoff)
@@ -749,9 +676,12 @@ int nf_ct_frag6_init(void)
 {
 	nf_frags.ctl = &nf_frags_ctl;
 	nf_frags.hashfn = nf_hashfn;
-	nf_frags.destructor = nf_frag_free;
+	nf_frags.constructor = ip6_frag_init;
+	nf_frags.destructor = NULL;
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	nf_frags.match = ip6_frag_match;
+	nf_frags.frag_expire = nf_ct_frag6_expire;
 	inet_frags_init(&nf_frags);
 
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6ad19cfc2025..76c88a93b9b5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -143,6 +143,18 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
+int ip6_frag_match(struct inet_frag_queue *q, void *a)
+{
+	struct frag_queue *fq;
+	struct ip6_create_arg *arg = a;
+
+	fq = container_of(q, struct frag_queue, q);
+	return (fq->id == arg->id &&
+			ipv6_addr_equal(&fq->saddr, arg->src) &&
+			ipv6_addr_equal(&fq->daddr, arg->dst));
+}
+EXPORT_SYMBOL(ip6_frag_match);
+
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
@@ -152,20 +164,16 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 	kfree_skb(skb);
 }
 
-static void ip6_frag_free(struct inet_frag_queue *fq)
+void ip6_frag_init(struct inet_frag_queue *q, void *a)
 {
-	kfree(container_of(fq, struct frag_queue, q));
-}
-
-static inline struct frag_queue *frag_alloc_queue(void)
-{
-	struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC);
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	struct ip6_create_arg *arg = a;
 
-	if(!fq)
-		return NULL;
-	atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
-	return fq;
+	fq->id = arg->id;
+	ipv6_addr_copy(&fq->saddr, arg->src);
+	ipv6_addr_copy(&fq->daddr, arg->dst);
 }
+EXPORT_SYMBOL(ip6_frag_init);
 
 /* Destruction primitives. */
 
@@ -193,9 +201,11 @@ static void ip6_evictor(struct inet6_dev *idev)
 
 static void ip6_frag_expire(unsigned long data)
 {
-	struct frag_queue *fq = (struct frag_queue *) data;
+	struct frag_queue *fq;
 	struct net_device *dev = NULL;
 
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+
 	spin_lock(&fq->q.lock);
 
 	if (fq->q.last_in & COMPLETE)
@@ -230,98 +240,30 @@ out:
 	fq_put(fq);
 }
 
-/* Creation primitives. */
-
-
-static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
+static __inline__ struct frag_queue *
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+	struct inet6_dev *idev)
 {
-	struct frag_queue *fq;
+	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
 	unsigned int hash;
-#ifdef CONFIG_SMP
-	struct hlist_node *n;
-#endif
 
-	write_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
-#ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
-		if (fq->id == fq_in->id &&
-		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			write_unlock(&ip6_frags.lock);
-			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in);
-			return fq;
-		}
-	}
-#endif
-	fq = fq_in;
-
-	if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
-		atomic_inc(&fq->q.refcnt);
-
-	atomic_inc(&fq->q.refcnt);
-	hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
-	INIT_LIST_HEAD(&fq->q.lru_list);
-	list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
-	ip6_frags.nqueues++;
-	write_unlock(&ip6_frags.lock);
-	return fq;
-}
-
-
-static struct frag_queue *
-ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
-		struct inet6_dev *idev)
-{
-	struct frag_queue *fq;
+	arg.id = id;
+	arg.src = src;
+	arg.dst = dst;
+	hash = ip6qhashfn(id, src, dst);
 
-	if ((fq = frag_alloc_queue()) == NULL)
+	q = inet_frag_find(&ip6_frags, &arg, hash);
+	if (q == NULL)
 		goto oom;
 
-	fq->id = id;
-	ipv6_addr_copy(&fq->saddr, src);
-	ipv6_addr_copy(&fq->daddr, dst);
-
-	init_timer(&fq->q.timer);
-	fq->q.timer.function = ip6_frag_expire;
-	fq->q.timer.data = (long) fq;
-	spin_lock_init(&fq->q.lock);
-	atomic_set(&fq->q.refcnt, 1);
-
-	return ip6_frag_intern(fq);
+	return container_of(q, struct frag_queue, q);
 
 oom:
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
 	return NULL;
 }
 
-static __inline__ struct frag_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
-	struct inet6_dev *idev)
-{
-	struct frag_queue *fq;
-	struct hlist_node *n;
-	unsigned int hash;
-
-	read_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
-	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
-		if (fq->id == id &&
-		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			read_unlock(&ip6_frags.lock);
-			return fq;
-		}
-	}
-	read_unlock(&ip6_frags.lock);
-
-	return ip6_frag_create(id, src, dst, idev);
-}
-
-
 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			   struct frag_hdr *fhdr, int nhoff)
 {
@@ -697,8 +639,11 @@ void __init ipv6_frag_init(void)
 
 	ip6_frags.ctl = &ip6_frags_ctl;
 	ip6_frags.hashfn = ip6_hashfn;
-	ip6_frags.destructor = ip6_frag_free;
+	ip6_frags.constructor = ip6_frag_init;
+	ip6_frags.destructor = NULL;
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
+	ip6_frags.match = ip6_frag_match;
+	ip6_frags.frag_expire = ip6_frag_expire;
 	inet_frags_init(&ip6_frags);
 }
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 02f69e544f6f..515783707e86 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,7 +16,7 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
 	int err;
 	__be32 seq;
@@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	struct xfrm_state *x;
 	int xfrm_nr = 0;
 	int decaps = 0;
-	int nexthdr;
 	unsigned int nhoff;
 
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb_network_header(skb)[nhoff];
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
@@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 			goto drop;
 
 		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
-				nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6);
+				      nexthdr, AF_INET6);
 		if (x == NULL)
 			goto drop;
 		spin_lock(&x->lock);
@@ -70,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 
 		xfrm_vec[xfrm_nr++] = x;
 
-		if (x->mode->input(x, skb))
+		if (x->outer_mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
+		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
@@ -99,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
 	       xfrm_nr * sizeof(xfrm_vec[0]));
 	skb->sp->len += xfrm_nr;
-	skb->ip_summed = CHECKSUM_NONE;
 
 	nf_reset(skb);
 
@@ -135,7 +132,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 
 int xfrm6_rcv(struct sk_buff *skb)
 {
-	return xfrm6_rcv_spi(skb, 0);
+	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+			     0);
 }
 
 EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 13bb1e856764..2bfb4f05c14c 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = {
 	.output = xfrm6_beet_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_BEET,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm6_beet_init(void)
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 957ae36b6695..a7bc8c62317a 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -58,16 +58,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-/*
- * Do nothing about routing optimization header unlike IPsec.
- */
-static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct xfrm_mode xfrm6_ro_mode = {
-	.input = xfrm6_ro_input,
 	.output = xfrm6_ro_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index ea2283879112..fd84e2217274 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = {
 	.output = xfrm6_tunnel_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm6_tunnel_init(void)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index a5a32c17249d..656976760ad4 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb)
 	struct ipv6hdr *iph;
 	int err;
 
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
+	if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 15aa4c58c315..82e27b80d07d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		__xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
 		trailer_len += xfrm[i]->props.trailer_len;
 
-		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL ||
-		    xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			unsigned short encap_family = xfrm[i]->props.family;
 			switch(encap_family) {
 			case AF_INET:
@@ -215,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	i = 0;
 	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-		struct xfrm_state_afinfo *afinfo;
 
 		dst_prev->xfrm = xfrm[i++];
 		dst_prev->dev = rt->u.dst.dev;
@@ -232,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		/* Copy neighbour for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		/* XXX: When IPv4 is implemented as module and can be unloaded,
-		 * we should manage reference to xfrm4_output in afinfo->output.
-		 * Miyazawa
-		 */
-		afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
-		if (!afinfo) {
-			dst = *dst_p;
-			goto error;
-		}
-
-		dst_prev->output = afinfo->output;
-		xfrm_state_put_afinfo(afinfo);
+		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
 		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index cdadb4847469..b392bee396f1 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 	/* Rule 4: select IPsec tunnel */
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
-		    src[i]->props.mode == XFRM_MODE_TUNNEL) {
+		    (src[i]->props.mode == XFRM_MODE_TUNNEL ||
+		     src[i]->props.mode == XFRM_MODE_BEET)) {
 			dst[j++] = src[i];
 			src[i] = NULL;
 		}
@@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 	/* Rule 3: select IPsec tunnel */
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
-		    src[i]->mode == XFRM_MODE_TUNNEL) {
+		    (src[i]->mode == XFRM_MODE_TUNNEL ||
+		     src[i]->mode == XFRM_MODE_BEET)) {
 			dst[j++] = src[i];
 			src[i] = NULL;
 		}
@@ -168,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
+	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 3f8a3abde67e..fae90ff31087 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -248,7 +248,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	return 0;
+	return skb_network_header(skb)[IP6CB(skb)->nhoff];
 }
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
@@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
+	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,