Pull up following revision(s) (requested by rmind in ticket #777): netbsd-6
authorriz <riz@NetBSD.org>
Fri, 08 Feb 2013 19:18:09 +0000
branchnetbsd-6
changeset 256505 7316dbc6e6a6
parent 256504 2d3d0121e8ab
child 256506 e1ae2ea82a19
Pull up following revision(s) (requested by rmind in ticket #777): usr.sbin/npf/npfctl/npfctl.c: revision 1.27 sys/net/npf/npf_session.c: revision 1.19 usr.sbin/npf/npftest/libnpftest/npf_mbuf_subr.c: revision 1.4 sys/net/npf/npf_rproc.c: revision 1.5 usr.sbin/npf/npftest/README: revision 1.3 sys/sys/mbuf.h: revision 1.151 sys/net/npf/npf_ruleset.c: revision 1.15 usr.sbin/npf/npftest/libnpftest/npf_nbuf_test.c: revision 1.3 sys/net/npf/npf_ruleset.c: revision 1.16 usr.sbin/npf/npftest/libnpftest/npf_state_test.c: revision 1.4 usr.sbin/npf/npftest/libnpftest/npf_nbuf_test.c: revision 1.4 sys/net/npf/npf_inet.c: revision 1.19 sys/net/npf/npf_instr.c: revision 1.15 sys/net/npf/npf_handler.c: revision 1.24 sys/net/npf/npf_handler.c: revision 1.25 sys/net/npf/npf_state_tcp.c: revision 1.12 sys/net/npf/npf_processor.c: revision 1.13 sys/net/npf/npf_impl.h: revision 1.25 sys/net/npf/npf_processor.c: revision 1.14 sys/net/npf/npf_mbuf.c: revision 1.10 sys/net/npf/npf_alg_icmp.c: revision 1.14 sys/net/npf/npf_mbuf.c: revision 1.9 usr.sbin/npf/npftest/libnpftest/npf_nat_test.c: revision 1.2 usr.sbin/npf/npftest/libnpftest/npf_rule_test.c: revision 1.3 sys/net/npf/npf_session.c: revision 1.20 sys/net/npf/npf_alg.c: revision 1.6 sys/kern/uipc_mbuf.c: revision 1.148 sys/net/npf/npf_inet.c: revision 1.20 sys/net/npf/npf.h: revision 1.25 sys/net/npf/npf_nat.c: revision 1.18 sys/net/npf/npf_state.c: revision 1.13 sys/net/npf/npf_sendpkt.c: revision 1.13 sys/net/npf/npf_ext_log.c: revision 1.2 usr.sbin/npf/npftest/libnpftest/npf_processor_test.c: revision 1.4 sys/net/npf/npf_ext_normalise.c: revision 1.2 - Rework NPF's nbuf interface: use advancing and ensuring as a main method. Eliminate unnecessary copy and simplify. Adapt regression tests. - Simplify ICMP ALG a little. While here, handle ICMP ECHO for traceroute. - Minor fixes, misc cleanup. Silence gcc in npf_recache(). Add m_ensure_contig() routine, which is equivalent to m_pullup, but does not destroy the mbuf chain on failure (it is kept valid). - nbuf_ensure_contig: rework to use m_ensure_contig(9), which will not free the mbuf chain on failure. Fixes some corner cases. Improve regression test and sprinkle some asserts. - npf_reassembly: clear nbuf on IPv6 reassembly failure path (partial fix). The problem was found and fix provided by Anthony Mallet.
sys/kern/uipc_mbuf.c
sys/net/npf/npf.h
sys/net/npf/npf_alg.c
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_ext_log.c
sys/net/npf/npf_ext_normalise.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_instr.c
sys/net/npf/npf_mbuf.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_processor.c
sys/net/npf/npf_rproc.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_sendpkt.c
sys/net/npf/npf_session.c
sys/net/npf/npf_state.c
sys/net/npf/npf_state_tcp.c
sys/sys/mbuf.h
usr.sbin/npf/npfctl/npfctl.c
usr.sbin/npf/npftest/README
usr.sbin/npf/npftest/libnpftest/npf_mbuf_subr.c
usr.sbin/npf/npftest/libnpftest/npf_nat_test.c
usr.sbin/npf/npftest/libnpftest/npf_nbuf_test.c
usr.sbin/npf/npftest/libnpftest/npf_processor_test.c
usr.sbin/npf/npftest/libnpftest/npf_rule_test.c
usr.sbin/npf/npftest/libnpftest/npf_state_test.c
--- a/sys/kern/uipc_mbuf.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/kern/uipc_mbuf.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_mbuf.c,v 1.145 2012/02/10 17:35:47 para Exp $	*/
+/*	$NetBSD: uipc_mbuf.c,v 1.145.2.1 2013/02/08 19:18:12 riz Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.145 2012/02/10 17:35:47 para Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.145.2.1 2013/02/08 19:18:12 riz Exp $");
 
 #include "opt_mbuftrace.h"
 #include "opt_nmbclusters.h"
@@ -907,21 +907,18 @@
 }
 
 /*
- * Rearrange an mbuf chain so that len bytes are contiguous
- * and in the data area of an mbuf (so that mtod and dtom
- * will work for a structure of size len).  Returns the resulting
- * mbuf chain on success, frees it and returns null on failure.
- * If there is room, it will add up to max_protohdr-len extra bytes to the
- * contiguous region in an attempt to avoid being called next time.
+ * m_ensure_contig: rearrange an mbuf chain that given length of bytes
+ * would be contiguous and in the data area of an mbuf (therefore, mtod()
+ * would work for a structure of given length).
+ *
+ * => On success, returns true and the resulting mbuf chain; false otherwise.
+ * => The mbuf chain may change, but is always preserved valid.
  */
-int MPFail;
-
-struct mbuf *
-m_pullup(struct mbuf *n, int len)
+bool
+m_ensure_contig(struct mbuf **m0, int len)
 {
-	struct mbuf *m;
-	int count;
-	int space;
+	struct mbuf *n = *m0, *m;
+	size_t count, space;
 
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
@@ -930,17 +927,20 @@
 	 */
 	if ((n->m_flags & M_EXT) == 0 &&
 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
-		if (n->m_len >= len)
-			return (n);
+		if (n->m_len >= len) {
+			return true;
+		}
 		m = n;
 		n = n->m_next;
 		len -= m->m_len;
 	} else {
-		if (len > MHLEN)
-			goto bad;
+		if (len > MHLEN) {
+			return false;
+		}
 		MGET(m, M_DONTWAIT, n->m_type);
-		if (m == 0)
-			goto bad;
+		if (m == NULL) {
+			return false;
+		}
 		MCLAIM(m, n->m_owner);
 		m->m_len = 0;
 		if (n->m_flags & M_PKTHDR) {
@@ -949,7 +949,7 @@
 	}
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
-		count = min(min(max(len, max_protohdr), space), n->m_len);
+		count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len);
 		memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
 		  (unsigned)count);
 		len -= count;
@@ -961,16 +961,30 @@
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
-	if (len > 0) {
-		(void) m_free(m);
-		goto bad;
-	}
+
 	m->m_next = n;
-	return (m);
-bad:
-	m_freem(n);
-	MPFail++;
-	return (NULL);
+	*m0 = m;
+
+	return len <= 0;
+}
+
+/*
+ * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error.
+ */
+int MPFail;
+
+struct mbuf *
+m_pullup(struct mbuf *n, int len)
+{
+	struct mbuf *m = n;
+
+	if (!m_ensure_contig(&m, len)) {
+		KASSERT(m != NULL);
+		m_freem(m);
+		MPFail++;
+		m = NULL;
+	}
+	return m;
 }
 
 /*
--- a/sys/net/npf/npf.h	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf.h	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.h,v 1.14.2.10 2013/01/07 16:51:08 riz Exp $	*/
+/*	$NetBSD: npf.h,v 1.14.2.11 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -69,6 +69,7 @@
 /*
  * Packet information cache.
  */
+#include <net/if.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
@@ -86,6 +87,8 @@
 #define	NPC_ICMP	0x40	/* ICMP header. */
 #define	NPC_ICMP_ID	0x80	/* ICMP with query ID. */
 
+#define	NPC_ALG_EXEC	0x100	/* ALG execution. */
+
 #define	NPC_IP46	(NPC_IP4|NPC_IP6)
 
 typedef struct {
@@ -95,20 +98,21 @@
 	npf_addr_t *		npc_srcip;
 	npf_addr_t *		npc_dstip;
 	/* Size (v4 or v6) of IP addresses. */
-	int			npc_alen;
-	u_int			npc_hlen;
-	int			npc_next_proto;
+	uint8_t			npc_alen;
+	uint8_t			npc_hlen;
+	uint16_t		npc_proto;
 	/* IPv4, IPv6. */
 	union {
-		struct ip	v4;
-		struct ip6_hdr	v6;
+		struct ip *		v4;
+		struct ip6_hdr *	v6;
 	} npc_ip;
 	/* TCP, UDP, ICMP. */
 	union {
-		struct tcphdr		tcp;
-		struct udphdr		udp;
-		struct icmp		icmp;
-		struct icmp6_hdr	icmp6;
+		struct tcphdr *		tcp;
+		struct udphdr *		udp;
+		struct icmp *		icmp;
+		struct icmp6_hdr *	icmp6;
+		void *			hdr;
 	} npc_l4;
 } npf_cache_t;
 
@@ -123,7 +127,7 @@
 npf_cache_ipproto(const npf_cache_t *npc)
 {
 	KASSERT(npf_iscached(npc, NPC_IP46));
-	return npc->npc_next_proto;
+	return npc->npc_proto;
 }
 
 static inline u_int
@@ -137,16 +141,31 @@
  * Network buffer interface.
  */
 
-typedef void	nbuf_t;
+#define	NBUF_DATAREF_RESET	0x01
+
+typedef struct {
+	struct mbuf *	nb_mbuf0;
+	struct mbuf *	nb_mbuf;
+	void *		nb_nptr;
+	const ifnet_t *	nb_ifp;
+	int		nb_flags;
+} nbuf_t;
 
-void *		nbuf_dataptr(void *);
-void *		nbuf_advance(nbuf_t **, void *, u_int);
-int		nbuf_advfetch(nbuf_t **, void **, u_int, size_t, void *);
-int		nbuf_advstore(nbuf_t **, void **, u_int, size_t, void *);
-int		nbuf_fetch_datum(nbuf_t *, void *, size_t, void *);
-int		nbuf_store_datum(nbuf_t *, void *, size_t, void *);
+void		nbuf_init(nbuf_t *, struct mbuf *, const ifnet_t *);
+void		nbuf_reset(nbuf_t *);
+struct mbuf *	nbuf_head_mbuf(nbuf_t *);
+
+bool		nbuf_flag_p(const nbuf_t *, int);
+void		nbuf_unset_flag(nbuf_t *, int);
 
-void		nbuf_cksum_barrier(nbuf_t *);
+void *		nbuf_dataptr(nbuf_t *);
+size_t		nbuf_offset(const nbuf_t *);
+void *		nbuf_advance(nbuf_t *, size_t, size_t);
+
+void *		nbuf_ensure_contig(nbuf_t *, size_t);
+void *		nbuf_ensure_writable(nbuf_t *, size_t);
+
+bool		nbuf_cksum_barrier(nbuf_t *, int);
 int		nbuf_add_tag(nbuf_t *, uint32_t, uint32_t);
 int		nbuf_find_tag(nbuf_t *, uint32_t, void **);
 
@@ -264,6 +283,9 @@
 	NPF_STAT_REASSFAIL,
 	/* Other errors. */
 	NPF_STAT_ERROR,
+	/* nbuf non-contiguous cases. */
+	NPF_STAT_NBUF_NONCONTIG,
+	NPF_STAT_NBUF_CONTIG_FAIL,
 	/* Count (last). */
 	NPF_STATS_COUNT
 } npf_stats_t;
--- a/sys/net/npf/npf_alg.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_alg.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg.c,v 1.2.16.3 2012/07/16 22:13:26 riz Exp $	*/
+/*	$NetBSD: npf_alg.c,v 1.2.16.4 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.2.16.3 2012/07/16 22:13:26 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.2.16.4 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -48,17 +48,16 @@
 
 /* NAT ALG structure for registration. */
 struct npf_alg {
-	LIST_ENTRY(npf_alg)		na_entry;
-	npf_alg_t *			na_bptr;
-	npf_algfunc_t			na_match_func;
-	npf_algfunc_t			na_out_func;
-	npf_algfunc_t			na_in_func;
-	npf_algfunc_t			na_seid_func;
+	LIST_ENTRY(npf_alg)	na_entry;
+	npf_alg_t *		na_bptr;
+	npf_alg_func_t		na_match_func;
+	npf_alg_func_t		na_tr_func;
+	npf_alg_sfunc_t		na_se_func;
 };
 
-static LIST_HEAD(, npf_alg)		nat_alg_list	__cacheline_aligned;
-static kmutex_t				nat_alg_lock	__cacheline_aligned;
-static pserialize_t			nat_alg_psz	__cacheline_aligned;
+static LIST_HEAD(, npf_alg)	nat_alg_list	__cacheline_aligned;
+static kmutex_t			nat_alg_lock	__cacheline_aligned;
+static pserialize_t		nat_alg_psz	__cacheline_aligned;
 
 void
 npf_alg_sysinit(void)
@@ -84,17 +83,16 @@
  * XXX: Protected by module lock, but unify serialisation later.
  */
 npf_alg_t *
-npf_alg_register(npf_algfunc_t match, npf_algfunc_t out, npf_algfunc_t in,
-    npf_algfunc_t seid)
+npf_alg_register(npf_alg_func_t mfunc, npf_alg_func_t tfunc,
+    npf_alg_sfunc_t sfunc)
 {
 	npf_alg_t *alg;
 
 	alg = kmem_zalloc(sizeof(npf_alg_t), KM_SLEEP);
 	alg->na_bptr = alg;
-	alg->na_match_func = match;
-	alg->na_out_func = out;
-	alg->na_in_func = in;
-	alg->na_seid_func = seid;
+	alg->na_match_func = mfunc;
+	alg->na_tr_func = tfunc;
+	alg->na_se_func = sfunc;
 
 	mutex_enter(&nat_alg_lock);
 	LIST_INSERT_HEAD(&nat_alg_list, alg, na_entry);
@@ -127,7 +125,7 @@
  * npf_alg_match: call ALG matching inspectors, determine if any ALG matches.
  */
 bool
-npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt)
+npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di)
 {
 	npf_alg_t *alg;
 	bool match = false;
@@ -135,9 +133,9 @@
 
 	s = pserialize_read_enter();
 	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
-		npf_algfunc_t func = alg->na_match_func;
+		npf_alg_func_t func = alg->na_match_func;
 
-		if (func && func(npc, nbuf, nt)) {
+		if (func && func(npc, nbuf, nt, di)) {
 			match = true;
 			break;
 		}
@@ -150,41 +148,37 @@
  * npf_alg_exec: execute ALG hooks for translation.
  */
 void
-npf_alg_exec(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, const int di)
+npf_alg_exec(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di)
 {
 	npf_alg_t *alg;
 	int s;
 
 	s = pserialize_read_enter();
 	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
-		if ((di & PFIL_OUT) != 0 && alg->na_out_func != NULL) {
-			(alg->na_out_func)(npc, nbuf, nt);
-			continue;
-		}
-		if ((di & PFIL_IN) != 0 && alg->na_in_func != NULL) {
-			(alg->na_in_func)(npc, nbuf, nt);
-			continue;
+		npf_alg_func_t func;
+
+		if ((func = alg->na_tr_func) != NULL) {
+			(func)(npc, nbuf, nt, di);
 		}
 	}
 	pserialize_read_exit(s);
 }
 
-bool
-npf_alg_sessionid(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *key)
+npf_session_t *
+npf_alg_session(npf_cache_t *npc, nbuf_t *nbuf, int di)
 {
+	npf_session_t *se = NULL;
 	npf_alg_t *alg;
-	bool nkey = false;
 	int s;
 
 	s = pserialize_read_enter();
 	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
-		npf_algfunc_t func = alg->na_seid_func;
+		npf_alg_sfunc_t func = alg->na_se_func;
 
-		if (func && func(npc, nbuf, (npf_nat_t *)key)) {
-			nkey = true;
+		if (func && (se = func(npc, nbuf, di)) != NULL) {
 			break;
 		}
 	}
 	pserialize_read_exit(s);
-	return nkey;
+	return se;
 }
--- a/sys/net/npf/npf_alg_icmp.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.8.4.5 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.8.4.6 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.8.4.5 2012/11/18 21:48:56 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.8.4.6 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -57,18 +57,18 @@
  * Traceroute criteria.
  *
  * IANA assigned base port: 33434.  However, common practice is to increase
- * the port, thus monitor [33434-33484] range.  Additional filter is TTL < 50.
+ * the port, thus monitor [33434-33484] range.  Additional filter is low TTL.
  */
 
 #define	TR_BASE_PORT	33434
 #define	TR_PORT_RANGE	33484
-#define	TR_MAX_TTL	50
+#define	TR_MAX_TTL	48
 
 static npf_alg_t *	alg_icmp	__read_mostly;
 
-static bool		npfa_icmp_match(npf_cache_t *, nbuf_t *, void *);
-static bool		npfa_icmp_natin(npf_cache_t *, nbuf_t *, void *);
-static bool		npfa_icmp_session(npf_cache_t *, nbuf_t *, void *);
+static bool	npfa_icmp_match(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
+static bool	npfa_icmp_nat(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
+static npf_session_t *npfa_icmp_session(npf_cache_t *, nbuf_t *, int);
 
 /*
  * npf_alg_icmp_{init,fini,modcmd}: ICMP ALG initialization, destruction
@@ -79,8 +79,8 @@
 npf_alg_icmp_init(void)
 {
 
-	alg_icmp = npf_alg_register(npfa_icmp_match, NULL,
-	    npfa_icmp_natin, npfa_icmp_session);
+	alg_icmp = npf_alg_register(npfa_icmp_match,
+	    npfa_icmp_nat, npfa_icmp_session);
 	KASSERT(alg_icmp != NULL);
 	return 0;
 }
@@ -115,10 +115,10 @@
  * associates ALG with NAT entry.
  */
 static bool
-npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
+npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di)
 {
 	const int proto = npf_cache_ipproto(npc);
-	struct ip *ip = &npc->npc_ip.v4;
+	const struct ip *ip = npc->npc_ip.v4;
 	in_port_t dport;
 
 	KASSERT(npf_iscached(npc, NPC_IP46));
@@ -129,13 +129,23 @@
 		return false;
 	}
 
-	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &npc->npc_l4.tcp;
+	switch (proto) {
+	case IPPROTO_TCP: {
+		const struct tcphdr *th = npc->npc_l4.tcp;
 		dport = ntohs(th->th_dport);
-	} else if (proto == IPPROTO_UDP) {
-		struct udphdr *uh = &npc->npc_l4.udp;
+		break;
+	}
+	case IPPROTO_UDP: {
+		const struct udphdr *uh = npc->npc_l4.udp;
 		dport = ntohs(uh->uh_dport);
-	} else {
+		break;
+	}
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		/* Just to pass the test below. */
+		dport = TR_BASE_PORT;
+		break;
+	default:
 		return false;
 	}
 
@@ -145,20 +155,18 @@
 	}
 
 	/* Associate ALG with translation entry. */
-	npf_nat_t *nt = ntptr;
 	npf_nat_setalg(nt, alg_icmp, 0);
 	return true;
 }
 
 /*
- * npf_icmp{4,6}_uniqid: retrieve unique identifiers - either ICMP query ID
- * or TCP/UDP ports of the original packet, which is embedded.
+ * npfa_icmp{4,6}_inspect: retrieve unique identifiers - either ICMP query
+ * ID or TCP/UDP ports of the original packet, which is embedded.
  */
 
 static bool
-npf_icmp4_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+npfa_icmp4_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf)
 {
-	struct icmp *ic;
 	u_int offby;
 
 	/* Per RFC 792. */
@@ -168,24 +176,14 @@
 	case ICMP_REDIRECT:
 	case ICMP_TIMXCEED:
 	case ICMP_PARAMPROB:
-		/* Should contain original IP header. */
-		offby = offsetof(struct icmp, icmp_ip);
-		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
-			return false;
-		}
-		/* Fetch into the cache. */
-		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
+		if (npc == NULL) {
 			return false;
 		}
-		switch (npf_cache_ipproto(npc)) {
-		case IPPROTO_TCP:
-			return npf_fetch_tcp(npc, nbuf, n_ptr);
-		case IPPROTO_UDP:
-			return npf_fetch_udp(npc, nbuf, n_ptr);
-		default:
+		/* Should contain original IP header. */
+		if (!nbuf_advance(nbuf, offsetof(struct icmp, icmp_ip), 0)) {
 			return false;
 		}
-		return true;
+		return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0;
 
 	case ICMP_ECHOREPLY:
 	case ICMP_ECHO:
@@ -193,11 +191,9 @@
 	case ICMP_TSTAMPREPLY:
 	case ICMP_IREQ:
 	case ICMP_IREQREPLY:
-		/* Should contain ICMP query ID. */
-		ic = &npc->npc_l4.icmp;
+		/* Should contain ICMP query ID - ensure. */
 		offby = offsetof(struct icmp, icmp_id);
-		if (nbuf_advfetch(&nbuf, &n_ptr, offby,
-		    sizeof(uint16_t), &ic->icmp_id)) {
+		if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) {
 			return false;
 		}
 		npc->npc_info |= NPC_ICMP_ID;
@@ -205,14 +201,12 @@
 	default:
 		break;
 	}
-	/* No unique IDs. */
 	return false;
 }
 
 static bool
-npf_icmp6_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+npfa_icmp6_inspect(const int type, npf_cache_t *npc, nbuf_t *nbuf)
 {
-	struct icmp6_hdr *ic6;
 	u_int offby;
 
 	/* Per RFC 4443. */
@@ -221,32 +215,20 @@
 	case ICMP6_PACKET_TOO_BIG:
 	case ICMP6_TIME_EXCEEDED:
 	case ICMP6_PARAM_PROB:
-		/* Should contain original IP header. */
-		offby = sizeof(struct icmp6_hdr);
-		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
-			return false;
-		}
-		/* Fetch into the cache. */
-		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
+		if (npc == NULL) {
 			return false;
 		}
-		switch (npf_cache_ipproto(npc)) {
-		case IPPROTO_TCP:
-			return npf_fetch_tcp(npc, nbuf, n_ptr);
-		case IPPROTO_UDP:
-			return npf_fetch_udp(npc, nbuf, n_ptr);
-		default:
+		/* Should contain original IP header. */
+		if (!nbuf_advance(nbuf, sizeof(struct icmp6_hdr), 0)) {
 			return false;
 		}
-		return true;
+		return (npf_cache_all(npc, nbuf) & NPC_LAYER4) != 0;
 
 	case ICMP6_ECHO_REQUEST:
 	case ICMP6_ECHO_REPLY:
-		/* Should contain ICMP query ID. */
-		ic6 = &npc->npc_l4.icmp6;
+		/* Should contain ICMP query ID - ensure. */
 		offby = offsetof(struct icmp6_hdr, icmp6_id);
-		if (nbuf_advfetch(&nbuf, &n_ptr, offby,
-		    sizeof(uint16_t), &ic6->icmp6_id)) {
+		if (!nbuf_advance(nbuf, offby, sizeof(uint16_t))) {
 			return false;
 		}
 		npc->npc_info |= NPC_ICMP_ID;
@@ -254,189 +236,204 @@
 	default:
 		break;
 	}
-	/* No unique IDs. */
 	return false;
 }
 
-static void
-npfa_srcdst_invert(npf_cache_t *npc)
-{
-	const int proto = npf_cache_ipproto(npc);
-	npf_addr_t *tmp_ip;
-
-	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &npc->npc_l4.tcp;
-		in_port_t tmp_sport = th->th_sport;
-		th->th_sport = th->th_dport;
-		th->th_dport = tmp_sport;
-
-	} else if (proto == IPPROTO_UDP) {
-		struct udphdr *uh = &npc->npc_l4.udp;
-		in_port_t tmp_sport = uh->uh_sport;
-		uh->uh_sport = uh->uh_dport;
-		uh->uh_dport = tmp_sport;
-	}
-	tmp_ip = npc->npc_srcip;
-	npc->npc_srcip = npc->npc_dstip;
-	npc->npc_dstip = tmp_ip;
-}
-
 /*
- * npfa_icmp_session: ALG session inspector, returns unique identifiers.
+ * npfa_icmp_session: ALG ICMP inspector.
+ *
+ * => Returns true if "enpc" is filled.
  */
 static bool
-npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
+npfa_icmp_inspect(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *enpc)
 {
-	npf_cache_t *key = keyptr;
 	bool ret;
 
-	KASSERT(key->npc_info == 0);
-
-	/* IP + ICMP?  Get unique identifiers from ICMP packet. */
-	if (!npf_iscached(npc, NPC_IP4)) {
-		return false;
-	}
-	if (npf_cache_ipproto(npc) != IPPROTO_ICMP) {
-		return false;
-	}
+	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_ICMP));
 
 	/* Advance to ICMP header. */
-	void *n_ptr = nbuf_dataptr(nbuf);
-	const u_int hlen = npf_cache_hlen(npc);
-
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, hlen)) == NULL) {
+	nbuf_reset(nbuf);
+	if (!nbuf_advance(nbuf, npf_cache_hlen(npc), 0)) {
 		return false;
 	}
+	enpc->npc_info = 0;
 
 	/*
-	 * Fetch relevant data into the separate ("key") cache.
+	 * Inspect the ICMP packet.  The relevant data might be in the
+	 * embedded packet.  Fill the "enpc" cache, if so.
 	 */
-	struct icmp *ic = &npc->npc_l4.icmp;
-
 	if (npf_iscached(npc, NPC_IP4)) {
-		ret = npf_icmp4_uniqid(ic->icmp_type, key, nbuf, n_ptr);
+		const struct icmp *ic = npc->npc_l4.icmp;
+		ret = npfa_icmp4_inspect(ic->icmp_type, enpc, nbuf);
 	} else if (npf_iscached(npc, NPC_IP6)) {
-		KASSERT(offsetof(struct icmp, icmp_id) ==
-		    offsetof(struct icmp6_hdr, icmp6_id));
-		ret = npf_icmp6_uniqid(ic->icmp_type, key, nbuf, n_ptr);
+		const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
+		ret = npfa_icmp6_inspect(ic6->icmp6_type, enpc, nbuf);
 	} else {
 		ret = false;
 	}
-
 	if (!ret) {
 		return false;
 	}
 
-	if (npf_iscached(key, NPC_ICMP_ID)) {
-		struct icmp *keyic = &key->npc_l4.icmp;
-
-		/* Copy ICMP ID to the cache and flag it. */
+	/* ICMP ID is the original packet, just indicate it. */
+	if (npf_iscached(enpc, NPC_ICMP_ID)) {
 		npc->npc_info |= NPC_ICMP_ID;
-		ic->icmp_id = keyic->icmp_id;
-
-		/* Note: return False, since key is the original cache. */
 		return false;
 	}
 
+	/* Indicate that embedded packet is in the cache. */
+	return true;
+}
+
+static npf_session_t *
+npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, int di)
+{
+	npf_cache_t enpc;
+
+	/* Inspect ICMP packet for an embedded packet. */
+	if (!npf_iscached(npc, NPC_ICMP))
+		return NULL;
+	if (!npfa_icmp_inspect(npc, nbuf, &enpc))
+		return NULL;
+
 	/*
-	 * Embedded IP packet is the original of "forwards" stream.
-	 * We should imitate the "backwards" stream for inspection.
+	 * Invert the identifiers of the embedded packet.
+	 * If it is ICMP, then ensure ICMP ID.
 	 */
-	KASSERT(npf_iscached(key, NPC_IP46));
-	KASSERT(npf_iscached(key, NPC_LAYER4));
-	npfa_srcdst_invert(key);
-	key->npc_alen = npc->npc_alen;
+	union l4 {
+		struct tcphdr th;
+		struct udphdr uh;
+	} l4;
+	bool ret, forw;
+
+	#define	SWAP(type, x, y) { type tmp = x; x = y; y = tmp; }
+	SWAP(npf_addr_t *, enpc.npc_srcip, enpc.npc_dstip);
 
-	return true;
+	switch (npf_cache_ipproto(&enpc)) {
+	case IPPROTO_TCP:
+		l4.th.th_sport = enpc.npc_l4.tcp->th_dport;
+		l4.th.th_dport = enpc.npc_l4.tcp->th_sport;
+		enpc.npc_l4.tcp = &l4.th;
+		break;
+	case IPPROTO_UDP:
+		l4.uh.uh_sport = enpc.npc_l4.udp->uh_dport;
+		l4.uh.uh_dport = enpc.npc_l4.udp->uh_sport;
+		enpc.npc_l4.udp = &l4.uh;
+		break;
+	case IPPROTO_ICMP: {
+		const struct icmp *ic = enpc.npc_l4.icmp;
+		ret = npfa_icmp4_inspect(ic->icmp_type, &enpc, nbuf);
+		if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID))
+			return false;
+		break;
+	}
+	case IPPROTO_ICMPV6: {
+		const struct icmp6_hdr *ic6 = enpc.npc_l4.icmp6;
+		ret = npfa_icmp6_inspect(ic6->icmp6_type, &enpc, nbuf);
+		if (!ret || !npf_iscached(&enpc, NPC_ICMP_ID))
+			return false;
+		break;
+	}
+	default:
+		return false;
+	}
+
+	/* Lookup for a session using embedded packet. */
+	return npf_session_lookup(&enpc, nbuf, di, &forw);
 }
 
 /*
- * npfa_icmp_natin: ALG inbound translation inspector, rewrite IP address
+ * npfa_icmp_nat: ALG inbound translation inspector, rewrite IP address
  * in the IP header, which is embedded in ICMP packet.
  */
 static bool
-npfa_icmp_natin(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
+npfa_icmp_nat(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, int di)
 {
-	npf_cache_t enpc = { .npc_info = 0 };
+	npf_cache_t enpc;
 
-	/* XXX: Duplicated work (done at session inspection). */
-	if (!npfa_icmp_session(npc, nbuf, &enpc)) {
+	if (di != PFIL_IN || !npf_iscached(npc, NPC_ICMP))
 		return false;
-	}
-	/* XXX: Restore inversion (inefficient). */
+	if (!npfa_icmp_inspect(npc, nbuf, &enpc))
+		return false;
+
 	KASSERT(npf_iscached(&enpc, NPC_IP46));
 	KASSERT(npf_iscached(&enpc, NPC_LAYER4));
-	npfa_srcdst_invert(&enpc);
+
+	struct icmp *ic = npc->npc_l4.icmp;
+	uint16_t cksum = ic->icmp_cksum;
+
+	CTASSERT(offsetof(struct icmp, icmp_cksum) ==
+	    offsetof(struct icmp6_hdr, icmp6_cksum));
 
 	/*
-	 * Save ICMP and embedded IP with TCP/UDP header checksums, retrieve
-	 * the original address and port, and calculate ICMP checksum for
-	 * embedded packet changes, while data is not rewritten in the cache.
+	 * Retrieve the original address and port, then calculate ICMP
+	 * checksum for these changes in the embedded packet.  While data
+	 * is not rewritten in the cache, save IP and TCP/UDP checksums.
 	 */
 	const int proto = npf_cache_ipproto(&enpc);
-	const struct ip *eip = &enpc.npc_ip.v4;
-	const struct icmp * const ic = &npc->npc_l4.icmp;
-	uint16_t cksum = ic->icmp_cksum, ecksum = eip->ip_sum, l4cksum;
-	npf_nat_t *nt = ntptr;
+	uint16_t ipcksum = 0, l4cksum = 0;
 	npf_addr_t *addr;
 	in_port_t port;
 
 	npf_nat_getorig(nt, &addr, &port);
 
-	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &enpc.npc_l4.tcp;
-		cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
-		l4cksum = th->th_sum;
-	} else {
-		struct udphdr *uh = &enpc.npc_l4.udp;
-		cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
-		l4cksum = uh->uh_sum;
+	if (npf_iscached(&enpc, NPC_IP4)) {
+		const struct ip *eip = enpc.npc_ip.v4;
+		ipcksum = eip->ip_sum;
 	}
 	cksum = npf_addr_cksum(cksum, enpc.npc_alen, enpc.npc_srcip, addr);
 
-	/*
-	 * Save the original pointers to the main IP header and then advance
-	 * to the embedded IP header after ICMP header.
-	 */
-	void *n_ptr = nbuf_dataptr(nbuf), *cnbuf = nbuf, *cnptr = n_ptr;
-	u_int offby = npf_cache_hlen(npc) + offsetof(struct icmp, icmp_ip);
-
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+	switch (proto) {
+	case IPPROTO_TCP: {
+		const struct tcphdr *th = enpc.npc_l4.tcp;
+		cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
+		l4cksum = th->th_sum;
+		break;
+	}
+	case IPPROTO_UDP: {
+		const struct udphdr *uh = enpc.npc_l4.udp;
+		cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
+		l4cksum = uh->uh_sum;
+		break;
+	}
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		break;
+	default:
 		return false;
 	}
 
 	/*
-	 * Rewrite source IP address and port of the embedded IP header,
-	 * which represents original packet - therefore passing PFIL_OUT.
-	 * Note: checksums are first, since it uses values from the cache.
+	 * Rewrite the source IP address and port of the embedded IP header,
+	 * which represents the original packet, therefore passing PFIL_OUT.
+	 * This updates the checksums in the embedded packet.
 	 */
-	if (!npf_rwrcksum(&enpc, nbuf, n_ptr, PFIL_OUT, addr, port)) {
-		return false;
-	}
-	if (!npf_rwrip(&enpc, nbuf, n_ptr, PFIL_OUT, addr)) {
-		return false;
-	}
-	if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port)) {
+	if (npf_nat_translate(&enpc, nbuf, nt, false, PFIL_OUT)) {
 		return false;
 	}
 
 	/*
-	 * Finish calculation of the ICMP checksum.  Update for embedded IP
-	 * and TCP/UDP checksum changes.  Finally, rewrite ICMP checksum.
+	 * Finish calculation of the ICMP checksum: include the checksum
+	 * change in the embedded packet.
 	 */
-	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &enpc.npc_l4.tcp;
+	if (npf_iscached(&enpc, NPC_IP4)) {
+		const struct ip *eip = enpc.npc_ip.v4;
+		cksum = npf_fixup16_cksum(cksum, ipcksum, eip->ip_sum);
+	}
+	switch (proto) {
+	case IPPROTO_TCP: {
+		const struct tcphdr *th = enpc.npc_l4.tcp;
 		cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum);
-	} else if (l4cksum) {
-		struct udphdr *uh = &enpc.npc_l4.udp;
-		cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
+		break;
 	}
-	cksum = npf_fixup16_cksum(cksum, ecksum, eip->ip_sum);
-
-	offby = npf_cache_hlen(npc) + offsetof(struct icmp, icmp_cksum);
-	if (nbuf_advstore(&cnbuf, &cnptr, offby, sizeof(uint16_t), &cksum)) {
-		return false;
+	case IPPROTO_UDP:
+		if (l4cksum) {
+			const struct udphdr *uh = enpc.npc_l4.udp;
+			cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
+		}
+		break;
 	}
+	ic->icmp_cksum = cksum;
 	return true;
 }
--- a/sys/net/npf/npf_ext_log.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_ext_log.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ext_log.c,v 1.1.4.2 2012/11/18 22:38:26 riz Exp $	*/
+/*	$NetBSD: npf_ext_log.c,v 1.1.4.3 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ext_log.c,v 1.1.4.2 2012/11/18 22:38:26 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ext_log.c,v 1.1.4.3 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/types.h>
 #include <sys/module.h>
@@ -176,8 +176,8 @@
 static void
 npf_log(npf_cache_t *npc, nbuf_t *nbuf, void *meta, int *decision)
 {
+	struct mbuf *m = nbuf_head_mbuf(nbuf);
 	const npf_ext_log_t *log = meta;
-	struct mbuf *m = nbuf;
 	ifnet_t *ifp;
 	int family;
 
--- a/sys/net/npf/npf_ext_normalise.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_ext_normalise.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ext_normalise.c,v 1.1.4.2 2012/11/18 22:38:27 riz Exp $	*/
+/*	$NetBSD: npf_ext_normalise.c,v 1.1.4.3 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ext_normalise.c,v 1.1.4.2 2012/11/18 22:38:27 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ext_normalise.c,v 1.1.4.3 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/types.h>
 #include <sys/module.h>
@@ -99,16 +99,14 @@
  * npf_normalise_ip4: routine to normalise IPv4 header (randomise ID,
  * clear "don't fragment" and/or enforce minimum TTL).
  */
-static inline bool
-npf_normalise_ip4(npf_cache_t *npc, nbuf_t *nbuf, npf_normalise_t *np)
+static inline void
+npf_normalise_ip4(npf_cache_t *npc, npf_normalise_t *np)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
-	struct ip *ip = &npc->npc_ip.v4;
+	struct ip *ip = npc->npc_ip.v4;
 	uint16_t cksum = ip->ip_sum;
 	uint16_t ip_off = ip->ip_off;
 	uint8_t ttl = ip->ip_ttl;
 	u_int minttl = np->n_minttl;
-	u_int offby = 0;
 
 	KASSERT(np->n_random_id || np->n_no_df || minttl);
 
@@ -117,10 +115,6 @@
 		uint16_t oid = ip->ip_id, nid;
 
 		nid = htons(ip_randomid(ip_ids, 0));
-		offby = offsetof(struct ip, ip_id);
-		if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(nid), &nid)) {
-			return false;
-		}
 		cksum = npf_fixup16_cksum(cksum, oid, nid);
 		ip->ip_id = nid;
 	}
@@ -129,35 +123,18 @@
 	if (np->n_no_df && (ip_off & htons(IP_DF)) != 0) {
 		uint16_t nip_off = ip_off & ~htons(IP_DF);
 
-		if (nbuf_advstore(&nbuf, &n_ptr,
-		    offsetof(struct ip, ip_off) - offby,
-		    sizeof(uint16_t), &nip_off)) {
-			return false;
-		}
 		cksum = npf_fixup16_cksum(cksum, ip_off, nip_off);
 		ip->ip_off = nip_off;
-		offby = offsetof(struct ip, ip_off);
 	}
 
 	/* Enforce minimum TTL. */
 	if (minttl && ttl < minttl) {
-		if (nbuf_advstore(&nbuf, &n_ptr,
-		    offsetof(struct ip, ip_ttl) - offby,
-		    sizeof(uint8_t), &minttl)) {
-			return false;
-		}
 		cksum = npf_fixup16_cksum(cksum, ttl, minttl);
 		ip->ip_ttl = minttl;
-		offby = offsetof(struct ip, ip_ttl);
 	}
 
 	/* Update IPv4 checksum. */
-	offby = offsetof(struct ip, ip_sum) - offby;
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
-		return false;
-	}
 	ip->ip_sum = cksum;
-	return true;
 }
 
 /*
@@ -167,10 +144,8 @@
 npf_normalise(npf_cache_t *npc, nbuf_t *nbuf, void *params, int *decision)
 {
 	npf_normalise_t *np = params;
-	void *n_ptr = nbuf_dataptr(nbuf);
-	struct tcphdr *th = &npc->npc_l4.tcp;
-	u_int offby, maxmss = np->n_maxmss;
-	uint16_t cksum, mss;
+	struct tcphdr *th = npc->npc_l4.tcp;
+	uint16_t cksum, mss, maxmss = np->n_maxmss;
 	int wscale;
 
 	/* Skip, if already blocking. */
@@ -178,14 +153,9 @@
 		return;
 	}
 
-	/* Normalise IPv4. */
+	/* Normalise IPv4.  Nothing to do for IPv6. */
 	if (npf_iscached(npc, NPC_IP4) && (np->n_random_id || np->n_minttl)) {
-		if (!npf_normalise_ip4(npc, nbuf, np)) {
-			return;
-		}
-	} else if (!npf_iscached(npc, NPC_IP6)) {
-		/* If not IPv6, then nothing to do. */
-		return;
+		npf_normalise_ip4(npc, np);
 	}
 
 	/*
@@ -205,18 +175,12 @@
 		/* Nothing else to do. */
 		return;
 	}
+	maxmss = htons(maxmss);
 
-	/* Calculate TCP checksum, then rewrite MSS and the checksum. */
-	maxmss = htons(maxmss);
-	cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss);
-	th->th_sum = cksum;
-	mss = maxmss;
-	if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
-		return;
-	}
-	offby = npf_cache_hlen(npc) + offsetof(struct tcphdr, th_sum);
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
-		return;
+	/* Store new MSS, calculate TCP checksum and update it. */
+	if (npf_fetch_tcpopts(npc, nbuf, &maxmss, &wscale)) {
+		cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss);
+		th->th_sum = cksum;
 	}
 }
 
--- a/sys/net/npf/npf_handler.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_handler.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_handler.c,v 1.13.2.7 2012/11/18 22:38:25 riz Exp $	*/
+/*	$NetBSD: npf_handler.c,v 1.13.2.8 2013/02/08 19:18:10 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.13.2.7 2012/11/18 22:38:25 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.13.2.8 2013/02/08 19:18:10 riz Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -71,6 +71,55 @@
 	return 0;
 }
 
+static int
+npf_reassembly(npf_cache_t *npc, nbuf_t *nbuf, struct mbuf **mp)
+{
+	int error = EINVAL;
+
+	/* Reset the mbuf as it may have changed. */
+	*mp = nbuf_head_mbuf(nbuf);
+	nbuf_reset(nbuf);
+
+	if (npf_iscached(npc, NPC_IP4)) {
+		struct ip *ip = nbuf_dataptr(nbuf);
+		error = ip_reass_packet(mp, ip);
+	} else if (npf_iscached(npc, NPC_IP6)) {
+#ifdef INET6
+		/*
+		 * Note: ip6_reass_packet() offset is the start of
+		 * the fragment header.
+		 */
+		const u_int hlen = npf_cache_hlen(npc);
+		error = ip6_reass_packet(mp, hlen);
+		if (error && *mp == NULL) {
+			memset(nbuf, 0, sizeof(nbuf_t));
+		}
+#endif
+	}
+	if (error) {
+		npf_stats_inc(NPF_STAT_REASSFAIL);
+		return error;
+	}
+	if (*mp == NULL) {
+		/* More fragments should come. */
+		npf_stats_inc(NPF_STAT_FRAGMENTS);
+		return 0;
+	}
+
+	/*
+	 * Reassembly is complete, we have the final packet.
+	 * Cache again, since layer 4 data is accessible now.
+	 */
+	nbuf_init(nbuf, *mp, nbuf->nb_ifp);
+	npc->npc_info = 0;
+
+	if (npf_cache_all(npc, nbuf) & NPC_IPFRAG) {
+		return EINVAL;
+	}
+	npf_stats_inc(NPF_STAT_REASSEMBLY);
+	return 0;
+}
+
 /*
  * npf_packet_handler: main packet handling routine for layer 3.
  *
@@ -79,7 +128,7 @@
 int
 npf_packet_handler(void *arg, struct mbuf **mp, ifnet_t *ifp, int di)
 {
-	nbuf_t *nbuf = *mp;
+	nbuf_t nbuf;
 	npf_cache_t npc;
 	npf_session_t *se;
 	npf_ruleset_t *rlset;
@@ -92,6 +141,8 @@
 	 * Initialise packet information cache.
 	 * Note: it is enough to clear the info bits.
 	 */
+	KASSERT(ifp != NULL);
+	nbuf_init(&nbuf, *mp, ifp);
 	npc.npc_info = 0;
 	decision = NPF_DECISION_BLOCK;
 	error = 0;
@@ -99,52 +150,23 @@
 	rp = NULL;
 
 	/* Cache everything.  Determine whether it is an IP fragment. */
-	if (npf_cache_all(&npc, nbuf) & NPC_IPFRAG) {
+	if (npf_cache_all(&npc, &nbuf) & NPC_IPFRAG) {
 		/*
 		 * Pass to IPv4 or IPv6 reassembly mechanism.
 		 */
-		error = EINVAL;
-
-		if (npf_iscached(&npc, NPC_IP4)) {
-			struct ip *ip = nbuf_dataptr(*mp);
-			error = ip_reass_packet(mp, ip);
-		} else if (npf_iscached(&npc, NPC_IP6)) {
-#ifdef INET6
-			/*
-			 * Note: ip6_reass_packet() offset is the start of
-			 * the fragment header.
-			 */
-			const u_int hlen = npf_cache_hlen(&npc);
-			error = ip6_reass_packet(mp, hlen);
-#endif
-		}
+		error = npf_reassembly(&npc, &nbuf, mp);
 		if (error) {
-			npf_stats_inc(NPF_STAT_REASSFAIL);
 			se = NULL;
 			goto out;
 		}
 		if (*mp == NULL) {
 			/* More fragments should come; return. */
-			npf_stats_inc(NPF_STAT_FRAGMENTS);
 			return 0;
 		}
-
-		/*
-		 * Reassembly is complete, we have the final packet.
-		 * Cache again, since layer 4 data is accessible now.
-		 */
-		nbuf = (nbuf_t *)*mp;
-		npc.npc_info = 0;
-
-		if (npf_cache_all(&npc, nbuf) & NPC_IPFRAG) {
-			se = NULL;
-			goto out;
-		}
-		npf_stats_inc(NPF_STAT_REASSEMBLY);
 	}
 
 	/* Inspect the list of sessions. */
-	se = npf_session_inspect(&npc, nbuf, ifp, di, &error);
+	se = npf_session_inspect(&npc, &nbuf, di, &error);
 
 	/* If "passing" session found - skip the ruleset inspection. */
 	if (se && npf_session_pass(se, &rp)) {
@@ -153,13 +175,15 @@
 		goto pass;
 	}
 	if (error) {
-		goto block;
+		if (error == ENETUNREACH)
+			goto block;
+		goto out;
 	}
 
 	/* Acquire the lock, inspect the ruleset using this packet. */
 	npf_core_enter();
 	rlset = npf_core_ruleset();
-	rl = npf_ruleset_inspect(&npc, nbuf, rlset, ifp, di, NPF_LAYER_3);
+	rl = npf_ruleset_inspect(&npc, &nbuf, rlset, di, NPF_LAYER_3);
 	if (rl == NULL) {
 		bool default_pass = npf_default_pass();
 		npf_core_exit();
@@ -173,14 +197,14 @@
 	}
 
 	/*
-	 * Get the rule procedure (acquires a reference) for assocation
+	 * Get the rule procedure (acquires a reference) for association
 	 * with a session (if any) and execution.
 	 */
 	KASSERT(rp == NULL);
 	rp = npf_rule_getrproc(rl);
 
 	/* Apply the rule, release the lock. */
-	error = npf_rule_apply(&npc, nbuf, rl, &retfl);
+	error = npf_rule_apply(&npc, &nbuf, rl, &retfl);
 	if (error) {
 		npf_stats_inc(NPF_STAT_BLOCK_RULESET);
 		goto block;
@@ -195,7 +219,7 @@
 	 * session.  It will be released on session destruction.
 	 */
 	if ((retfl & NPF_RULE_STATEFUL) != 0 && !se) {
-		se = npf_session_establish(&npc, nbuf, ifp, di);
+		se = npf_session_establish(&npc, &nbuf, di);
 		if (se) {
 			npf_session_setpass(se, rp);
 		}
@@ -206,14 +230,14 @@
 	/*
 	 * Perform NAT.
 	 */
-	error = npf_do_nat(&npc, se, nbuf, ifp, di);
+	error = npf_do_nat(&npc, se, &nbuf, di);
 block:
 	/*
 	 * Execute the rule procedure, if any is associated.
 	 * It may reverse the decision from pass to block.
 	 */
 	if (rp) {
-		npf_rproc_run(&npc, nbuf, rp, &decision);
+		npf_rproc_run(&npc, &nbuf, rp, &decision);
 	}
 out:
 	/*
@@ -226,6 +250,11 @@
 		npf_rproc_release(rp);
 	}
 
+	/* Reset mbuf pointer before returning to the caller. */
+	if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
+		return error ? error : ENOMEM;
+	}
+
 	/* Pass the packet if decided and there is no error. */
 	if (decision == NPF_DECISION_PASS && !error) {
 		/*
@@ -241,7 +270,7 @@
 	 * Depending on the flags and protocol, return TCP reset (RST) or
 	 * ICMP destination unreachable.
 	 */
-	if (retfl && npf_return_block(&npc, nbuf, retfl)) {
+	if (retfl && npf_return_block(&npc, &nbuf, retfl)) {
 		*mp = NULL;
 	}
 
--- a/sys/net/npf/npf_impl.h	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_impl.h	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.10.2.11 2012/11/26 17:39:29 riz Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.10.2.12 2013/02/08 19:18:10 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -92,7 +92,8 @@
  * DEFINITIONS.
  */
 
-typedef bool (*npf_algfunc_t)(npf_cache_t *, nbuf_t *, void *);
+typedef bool (*npf_alg_func_t)(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
+typedef npf_session_t *(*npf_alg_sfunc_t)(npf_cache_t *, nbuf_t *, int);
 
 #define	NPF_NCODE_LIMIT		1024
 #define	NPF_TABLE_SLOTS		32
@@ -154,22 +155,18 @@
 int		npf_packet_handler(void *, struct mbuf **, ifnet_t *, int);
 
 /* Protocol helpers. */
-bool		npf_fetch_ip(npf_cache_t *, nbuf_t *, void *);
-bool		npf_fetch_tcp(npf_cache_t *, nbuf_t *, void *);
-bool		npf_fetch_udp(npf_cache_t *, nbuf_t *, void *);
-bool		npf_fetch_icmp(npf_cache_t *, nbuf_t *, void *);
 int		npf_cache_all(npf_cache_t *, nbuf_t *);
+void		npf_recache(npf_cache_t *, nbuf_t *);
 
-bool		npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int,
-		    npf_addr_t *);
-bool		npf_rwrport(npf_cache_t *, nbuf_t *, void *, const int,
-		    in_port_t);
-bool		npf_rwrcksum(npf_cache_t *, nbuf_t *, void *, const int,
-		    npf_addr_t *, in_port_t);
+bool		npf_rwrip(const npf_cache_t *, int, const npf_addr_t *);
+bool		npf_rwrport(const npf_cache_t *, int, const in_port_t);
+bool		npf_rwrcksum(const npf_cache_t *, const int,
+		    const npf_addr_t *, const in_port_t);
 
 uint16_t	npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
 uint16_t	npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
-uint16_t	npf_addr_cksum(uint16_t, int, npf_addr_t *, npf_addr_t *);
+uint16_t	npf_addr_cksum(uint16_t, int, const npf_addr_t *,
+		    const npf_addr_t *);
 uint32_t	npf_addr_sum(const int, const npf_addr_t *, const npf_addr_t *);
 int		npf_addr_cmp(const npf_addr_t *, const npf_netmask_t,
 		    const npf_addr_t *, const npf_netmask_t, const int);
@@ -178,24 +175,20 @@
 
 int		npf_tcpsaw(const npf_cache_t *, tcp_seq *, tcp_seq *,
 		    uint32_t *);
-bool		npf_fetch_tcpopts(const npf_cache_t *, nbuf_t *,
-		    uint16_t *, int *);
+bool		npf_fetch_tcpopts(npf_cache_t *, nbuf_t *, uint16_t *, int *);
 bool		npf_return_block(npf_cache_t *, nbuf_t *, const int);
 
 /* Complex instructions. */
-int		npf_match_ether(nbuf_t *, int, int, uint16_t, uint32_t *);
-int		npf_match_proto(npf_cache_t *, nbuf_t *, void *, uint32_t);
-int		npf_match_table(npf_cache_t *, nbuf_t *, void *,
-		    const int, const u_int);
-int		npf_match_ipmask(npf_cache_t *, nbuf_t *, void *,
-		    const int, const npf_addr_t *, const npf_netmask_t);
-int		npf_match_tcp_ports(npf_cache_t *, nbuf_t *, void *,
-		    const int, const uint32_t);
-int		npf_match_udp_ports(npf_cache_t *, nbuf_t *, void *,
-		    const int, const uint32_t);
-int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, uint32_t);
-int		npf_match_icmp6(npf_cache_t *, nbuf_t *, void *, uint32_t);
-int		npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, uint32_t);
+int		npf_match_ether(nbuf_t *, int, uint16_t, uint32_t *);
+int		npf_match_proto(const npf_cache_t *, uint32_t);
+int		npf_match_table(const npf_cache_t *, int, u_int);
+int		npf_match_ipmask(const npf_cache_t *, int,
+		    const npf_addr_t *, npf_netmask_t);
+int		npf_match_tcp_ports(const npf_cache_t *, int, uint32_t);
+int		npf_match_udp_ports(const npf_cache_t *, int, uint32_t);
+int		npf_match_icmp4(const npf_cache_t *, uint32_t);
+int		npf_match_icmp6(const npf_cache_t *, uint32_t);
+int		npf_match_tcpfl(const npf_cache_t *, uint32_t);
 
 /* Tableset interface. */
 void		npf_tableset_sysinit(void);
@@ -230,8 +223,8 @@
 npf_rule_t *	npf_ruleset_replace(const char *, npf_ruleset_t *);
 void		npf_ruleset_freealg(npf_ruleset_t *, npf_alg_t *);
 
-npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *, npf_ruleset_t *,
-		    const ifnet_t *, const int, const int);
+npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
+		    const npf_ruleset_t *, const int, const int);
 int		npf_rule_apply(npf_cache_t *, nbuf_t *, npf_rule_t *, int *);
 
 /* Rule interface. */
@@ -261,10 +254,10 @@
 void		sess_htable_destroy(npf_sehash_t *);
 void		sess_htable_reload(npf_sehash_t *);
 
-npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *,
-		    const ifnet_t *, const int, int *);
-npf_session_t *	npf_session_establish(const npf_cache_t *, nbuf_t *,
-		    const ifnet_t *, const int);
+npf_session_t *	npf_session_lookup(const npf_cache_t *, const nbuf_t *,
+		    const int, bool *);
+npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *, const int, int *);
+npf_session_t *	npf_session_establish(npf_cache_t *, nbuf_t *, const int);
 void		npf_session_release(npf_session_t *);
 void		npf_session_expire(npf_session_t *);
 bool		npf_session_pass(const npf_session_t *, npf_rproc_t **);
@@ -276,13 +269,13 @@
 int		npf_session_restore(npf_sehash_t *, prop_dictionary_t);
 
 /* State handling. */
-bool		npf_state_init(const npf_cache_t *, nbuf_t *, npf_state_t *);
-bool		npf_state_inspect(const npf_cache_t *, nbuf_t *, npf_state_t *,
+bool		npf_state_init(npf_cache_t *, nbuf_t *, npf_state_t *);
+bool		npf_state_inspect(npf_cache_t *, nbuf_t *, npf_state_t *,
 		    const bool);
 int		npf_state_etime(const npf_state_t *, const int);
 void		npf_state_destroy(npf_state_t *);
 
-bool		npf_state_tcp(const npf_cache_t *, nbuf_t *, npf_state_t *, int);
+bool		npf_state_tcp(npf_cache_t *, nbuf_t *, npf_state_t *, int);
 int		npf_state_tcp_timeout(const npf_state_t *);
 
 /* NAT. */
@@ -294,8 +287,9 @@
 bool		npf_nat_sharepm(npf_natpolicy_t *, npf_natpolicy_t *);
 void		npf_nat_freealg(npf_natpolicy_t *, npf_alg_t *);
 
-int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *,
-		    const ifnet_t *, const int);
+int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
+int		npf_nat_translate(npf_cache_t *, nbuf_t *, npf_nat_t *,
+		    const bool, const int);
 void		npf_nat_expire(npf_nat_t *);
 void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
@@ -307,12 +301,12 @@
 /* ALG interface. */
 void		npf_alg_sysinit(void);
 void		npf_alg_sysfini(void);
-npf_alg_t *	npf_alg_register(npf_algfunc_t, npf_algfunc_t,
-		    npf_algfunc_t, npf_algfunc_t);
+npf_alg_t *	npf_alg_register(npf_alg_func_t, npf_alg_func_t,
+		    npf_alg_sfunc_t);
 int		npf_alg_unregister(npf_alg_t *);
-bool		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
-void		npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, const int );
-bool		npf_alg_sessionid(npf_cache_t *, nbuf_t *, npf_cache_t *);
+bool		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
+void		npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
+npf_session_t *	npf_alg_session(npf_cache_t *, nbuf_t *, int);
 
 /* Debugging routines. */
 void		npf_addr_dump(const npf_addr_t *);
--- a/sys/net/npf/npf_inet.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_inet.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_inet.c,v 1.10.4.7 2012/12/16 18:19:52 riz Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.10.4.8 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.10.4.7 2012/12/16 18:19:52 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.10.4.8 2013/02/08 19:18:09 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -93,9 +93,11 @@
  * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
  */
 uint16_t
-npf_addr_cksum(uint16_t cksum, int sz, npf_addr_t *oaddr, npf_addr_t *naddr)
+npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr,
+    const npf_addr_t *naddr)
 {
-	uint32_t *oip32 = (uint32_t *)oaddr, *nip32 = (uint32_t *)naddr;
+	const uint32_t *oip32 = (const uint32_t *)oaddr;
+	const uint32_t *nip32 = (const uint32_t *)naddr;
 
 	KASSERT(sz % sizeof(uint32_t) == 0);
 	do {
@@ -185,7 +187,7 @@
 int
 npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
 {
-	const struct tcphdr *th = &npc->npc_l4.tcp;
+	const struct tcphdr *th = npc->npc_l4.tcp;
 	u_int thlen;
 
 	KASSERT(npf_iscached(npc, NPC_TCP));
@@ -196,10 +198,10 @@
 	thlen = th->th_off << 2;
 
 	if (npf_iscached(npc, NPC_IP4)) {
-		const struct ip *ip = &npc->npc_ip.v4;
+		const struct ip *ip = npc->npc_ip.v4;
 		return ntohs(ip->ip_len) - npf_cache_hlen(npc) - thlen;
 	} else if (npf_iscached(npc, NPC_IP6)) {
-		const struct ip6_hdr *ip6 = &npc->npc_ip.v6;
+		const struct ip6_hdr *ip6 = npc->npc_ip.v6;
 		return ntohs(ip6->ip6_plen) - thlen;
 	}
 	return 0;
@@ -209,14 +211,13 @@
  * npf_fetch_tcpopts: parse and return TCP options.
  */
 bool
-npf_fetch_tcpopts(const npf_cache_t *npc, nbuf_t *nbuf,
-    uint16_t *mss, int *wscale)
+npf_fetch_tcpopts(npf_cache_t *npc, nbuf_t *nbuf, uint16_t *mss, int *wscale)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
-	const struct tcphdr *th = &npc->npc_l4.tcp;
+	const struct tcphdr *th = npc->npc_l4.tcp;
 	int topts_len, step;
-	uint16_t val16;
+	void *nptr;
 	uint8_t val;
+	bool ok;
 
 	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_TCP));
@@ -231,53 +232,58 @@
 
 	/* First step: IP and TCP header up to options. */
 	step = npf_cache_hlen(npc) + sizeof(struct tcphdr);
+	nbuf_reset(nbuf);
 next:
-	if (nbuf_advfetch(&nbuf, &n_ptr, step, sizeof(val), &val)) {
-		return false;
+	if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) {
+		ok = false;
+		goto done;
 	}
+	val = *(uint8_t *)nptr;
 
 	switch (val) {
 	case TCPOPT_EOL:
 		/* Done. */
-		return true;
+		ok = true;
+		goto done;
 	case TCPOPT_NOP:
 		topts_len--;
 		step = 1;
 		break;
 	case TCPOPT_MAXSEG:
-		/*
-		 * XXX: clean this mess.
-		 */
-		if (mss && *mss) {
-			val16 = *mss;
-			if (nbuf_advstore(&nbuf, &n_ptr, 2,
-			    sizeof(val16), &val16))
-				return false;
-		} else if (nbuf_advfetch(&nbuf, &n_ptr, 2,
-		    sizeof(val16), &val16)) {
-			return false;
+		if ((nptr = nbuf_advance(nbuf, 2, 2)) == NULL) {
+			ok = false;
+			goto done;
 		}
 		if (mss) {
-			*mss = val16;
+			if (*mss) {
+				memcpy(nptr, mss, sizeof(uint16_t));
+			} else {
+				memcpy(mss, nptr, sizeof(uint16_t));
+			}
 		}
 		topts_len -= TCPOLEN_MAXSEG;
-		step = sizeof(val16);
+		step = 2;
 		break;
 	case TCPOPT_WINDOW:
 		/* TCP Window Scaling (RFC 1323). */
-		if (nbuf_advfetch(&nbuf, &n_ptr, 2, sizeof(val), &val)) {
-			return false;
+		if ((nptr = nbuf_advance(nbuf, 2, 1)) == NULL) {
+			ok = false;
+			goto done;
 		}
+		val = *(uint8_t *)nptr;
 		*wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
 		topts_len -= TCPOLEN_WINDOW;
-		step = sizeof(val);
+		step = 1;
 		break;
 	default:
-		if (nbuf_advfetch(&nbuf, &n_ptr, 1, sizeof(val), &val)) {
-			return false;
+		if ((nptr = nbuf_advance(nbuf, 1, 1)) == NULL) {
+			ok = false;
+			goto done;
 		}
+		val = *(uint8_t *)nptr;
 		if (val < 2 || val > topts_len) {
-			return false;
+			ok = false;
+			goto done;
 		}
 		topts_len -= val;
 		step = val - 1;
@@ -287,82 +293,91 @@
 	if (__predict_true(topts_len > 0)) {
 		goto next;
 	}
-	return true;
+	ok = true;
+done:
+	if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
+		npf_recache(npc, nbuf);
+	}
+	return ok;
 }
 
-/*
- * npf_fetch_ip: fetch, check and cache IP header.
- */
-bool
-npf_fetch_ip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+static int
+npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf)
 {
-	uint8_t ver;
-
-	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &ver)) {
-		return false;
-	}
+	const void *nptr = nbuf_dataptr(nbuf);
+	const uint8_t ver = *(const uint8_t *)nptr;
+	int flags = 0;
 
 	switch (ver >> 4) {
 	case IPVERSION: {
-		struct ip *ip = &npc->npc_ip.v4;
+		struct ip *ip;
 
-		/* Fetch IPv4 header. */
-		if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(struct ip), ip)) {
-			return false;
+		ip = nbuf_ensure_contig(nbuf, sizeof(struct ip));
+		if (ip == NULL) {
+			return 0;
 		}
 
 		/* Check header length and fragment offset. */
 		if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) {
-			return false;
+			return 0;
 		}
 		if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
 			/* Note fragmentation. */
-			npc->npc_info |= NPC_IPFRAG;
+			flags |= NPC_IPFRAG;
 		}
 
 		/* Cache: layer 3 - IPv4. */
 		npc->npc_alen = sizeof(struct in_addr);
 		npc->npc_srcip = (npf_addr_t *)&ip->ip_src;
 		npc->npc_dstip = (npf_addr_t *)&ip->ip_dst;
-		npc->npc_info |= NPC_IP4;
 		npc->npc_hlen = ip->ip_hl << 2;
-		npc->npc_next_proto = npc->npc_ip.v4.ip_p;
+		npc->npc_proto = ip->ip_p;
+
+		npc->npc_ip.v4 = ip;
+		flags |= NPC_IP4;
 		break;
 	}
 
 	case (IPV6_VERSION >> 4): {
-		struct ip6_hdr *ip6 = &npc->npc_ip.v6;
-		size_t hlen = sizeof(struct ip6_hdr);
-		struct ip6_ext ip6e;
+		struct ip6_hdr *ip6;
+		struct ip6_ext *ip6e;
+		size_t off, hlen;
 
-		/* Fetch IPv6 header and set initial next-protocol value. */
-		if (nbuf_fetch_datum(nbuf, n_ptr, hlen, ip6)) {
-			return false;
+		ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr));
+		if (ip6 == NULL) {
+			return 0;
 		}
-		npc->npc_next_proto = ip6->ip6_nxt;
+
+		/* Set initial next-protocol value. */
+		hlen = sizeof(struct ip6_hdr);
+		npc->npc_proto = ip6->ip6_nxt;
 		npc->npc_hlen = hlen;
 
 		/*
-		 * Advance by the length of the current header and
-		 * prefetch the extension header.
+		 * Advance by the length of the current header.
 		 */
-		while (nbuf_advfetch(&nbuf, &n_ptr, hlen,
-		    sizeof(struct ip6_ext), &ip6e) == 0) {
+		off = nbuf_offset(nbuf);
+		while (nbuf_advance(nbuf, hlen, 0) != NULL) {
+			ip6e = nbuf_ensure_contig(nbuf, sizeof(*ip6e));
+			if (ip6e == NULL) {
+				return 0;
+			}
+
 			/*
 			 * Determine whether we are going to continue.
 			 */
-			switch (npc->npc_next_proto) {
+			switch (npc->npc_proto) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
-				hlen = (ip6e.ip6e_len + 1) << 3;
+				hlen = (ip6e->ip6e_len + 1) << 3;
 				break;
 			case IPPROTO_FRAGMENT:
-				npc->npc_info |= NPC_IPFRAG;
 				hlen = sizeof(struct ip6_frag);
+				flags |= NPC_IPFRAG;
 				break;
 			case IPPROTO_AH:
-				hlen = (ip6e.ip6e_len + 2) << 2;
+				hlen = (ip6e->ip6e_len + 2) << 2;
 				break;
 			default:
 				hlen = 0;
@@ -372,260 +387,183 @@
 			if (!hlen) {
 				break;
 			}
-			npc->npc_next_proto = ip6e.ip6e_nxt;
+			npc->npc_proto = ip6e->ip6e_nxt;
 			npc->npc_hlen += hlen;
 		}
 
+		/* Restore the offset. */
+		nbuf_reset(nbuf);
+		if (off) {
+			nbuf_advance(nbuf, off, 0);
+		}
+
 		/* Cache: layer 3 - IPv6. */
 		npc->npc_alen = sizeof(struct in6_addr);
 		npc->npc_srcip = (npf_addr_t *)&ip6->ip6_src;
 		npc->npc_dstip = (npf_addr_t *)&ip6->ip6_dst;
-		npc->npc_info |= NPC_IP6;
+
+		npc->npc_ip.v6 = ip6;
+		flags |= NPC_IP6;
 		break;
 	}
 	default:
-		return false;
-	}
-
-	return true;
-}
-
-/*
- * npf_fetch_tcp: fetch, check and cache TCP header.  If necessary,
- * fetch and cache layer 3 as well.
- */
-bool
-npf_fetch_tcp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
-{
-	struct tcphdr *th;
-
-	/* Must have IP header processed for its length and protocol. */
-	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
-		return false;
-	}
-	if (npf_cache_ipproto(npc) != IPPROTO_TCP) {
-		return false;
-	}
-	th = &npc->npc_l4.tcp;
-
-	/* Fetch TCP header. */
-	if (nbuf_advfetch(&nbuf, &n_ptr, npf_cache_hlen(npc),
-	    sizeof(struct tcphdr), th)) {
-		return false;
-	}
-
-	/* Cache: layer 4 - TCP. */
-	npc->npc_info |= (NPC_LAYER4 | NPC_TCP);
-	return true;
-}
-
-/*
- * npf_fetch_udp: fetch, check and cache UDP header.  If necessary,
- * fetch and cache layer 3 as well.
- */
-bool
-npf_fetch_udp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
-{
-	struct udphdr *uh;
-	u_int hlen;
-
-	/* Must have IP header processed for its length and protocol. */
-	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
-		return false;
+		break;
 	}
-	if (npf_cache_ipproto(npc) != IPPROTO_UDP) {
-		return false;
-	}
-	uh = &npc->npc_l4.udp;
-	hlen = npf_cache_hlen(npc);
-
-	/* Fetch UDP header. */
-	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct udphdr), uh)) {
-		return false;
-	}
-
-	/* Cache: layer 4 - UDP. */
-	npc->npc_info |= (NPC_LAYER4 | NPC_UDP);
-	return true;
-}
-
-/*
- * npf_fetch_icmp: fetch ICMP code, type and possible query ID.
- */
-bool
-npf_fetch_icmp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
-{
-	struct icmp *ic;
-	u_int hlen, iclen;
-
-	/* Must have IP header processed for its length and protocol. */
-	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
-		return false;
-	}
-	if (npf_cache_ipproto(npc) != IPPROTO_ICMP &&
-	    npf_cache_ipproto(npc) != IPPROTO_ICMPV6) {
-		return false;
-	}
-	ic = &npc->npc_l4.icmp;
-	hlen = npf_cache_hlen(npc);
-
-	/* Fetch basic ICMP header, up to the "data" point. */
-	CTASSERT(offsetof(struct icmp, icmp_void) ==
-	         offsetof(struct icmp6_hdr, icmp6_data32));
-
-	iclen = offsetof(struct icmp, icmp_void);
-	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, iclen, ic)) {
-		return false;
-	}
-
-	/* Cache: layer 4 - ICMP. */
-	npc->npc_info |= (NPC_LAYER4 | NPC_ICMP);
-	return true;
+	return flags;
 }
 
 /*
  * npf_cache_all: general routine to cache all relevant IP (v4 or v6)
  * and TCP, UDP or ICMP headers.
+ *
+ * => nbuf offset shall be set accordingly.
  */
 int
 npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
+	int flags, l4flags;
+	u_int hlen;
 
-	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
-		return npc->npc_info;
+	/*
+	 * This routine is a main point where the references are cached,
+	 * therefore clear the flag as we reset.
+	 */
+again:
+	nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
+
+	/*
+	 * First, cache the L3 header (IPv4 or IPv6).  If IP packet is
+	 * fragmented, then we cannot look into L4.
+	 */
+	flags = npf_cache_ip(npc, nbuf);
+	if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0) {
+		npc->npc_info |= flags;
+		return flags;
 	}
-	if (npf_iscached(npc, NPC_IPFRAG)) {
-		return npc->npc_info;
-	}
-	switch (npf_cache_ipproto(npc)) {
+	hlen = npc->npc_hlen;
+
+	switch (npc->npc_proto) {
 	case IPPROTO_TCP:
-		(void)npf_fetch_tcp(npc, nbuf, n_ptr);
+		/* Cache: layer 4 - TCP. */
+		npc->npc_l4.tcp = nbuf_advance(nbuf, hlen,
+		    sizeof(struct tcphdr));
+		l4flags = NPC_LAYER4 | NPC_TCP;
 		break;
 	case IPPROTO_UDP:
-		(void)npf_fetch_udp(npc, nbuf, n_ptr);
+		/* Cache: layer 4 - UDP. */
+		npc->npc_l4.udp = nbuf_advance(nbuf, hlen,
+		    sizeof(struct udphdr));
+		l4flags = NPC_LAYER4 | NPC_UDP;
 		break;
 	case IPPROTO_ICMP:
+		/* Cache: layer 4 - ICMPv4. */
+		npc->npc_l4.icmp = nbuf_advance(nbuf, hlen,
+		    offsetof(struct icmp, icmp_void));
+		l4flags = NPC_LAYER4 | NPC_ICMP;
+		break;
 	case IPPROTO_ICMPV6:
-		(void)npf_fetch_icmp(npc, nbuf, n_ptr);
+		/* Cache: layer 4 - ICMPv6. */
+		npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen,
+		    offsetof(struct icmp6_hdr, icmp6_data32));
+		l4flags = NPC_LAYER4 | NPC_ICMP;
+		break;
+	default:
+		l4flags = 0;
 		break;
 	}
-	return npc->npc_info;
+
+	if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
+		goto again;
+	}
+
+	/* Add the L4 flags if nbuf_advance() succeeded. */
+	if (l4flags && npc->npc_l4.hdr) {
+		flags |= l4flags;
+	}
+	npc->npc_info |= flags;
+	return flags;
+}
+
+void
+npf_recache(npf_cache_t *npc, nbuf_t *nbuf)
+{
+	const int mflags __unused = npc->npc_info & (NPC_IP46 | NPC_LAYER4);
+	int flags;
+
+	nbuf_reset(nbuf);
+	npc->npc_info = 0;
+	flags = npf_cache_all(npc, nbuf);
+	KASSERT((flags & mflags) == mflags);
+	KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0);
 }
 
 /*
- * npf_rwrip: rewrite required IP address, update the cache.
+ * npf_rwrip: rewrite required IP address.
  */
 bool
-npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
-    npf_addr_t *addr)
+npf_rwrip(const npf_cache_t *npc, int di, const npf_addr_t *addr)
 {
 	npf_addr_t *oaddr;
-	u_int offby;
 
 	KASSERT(npf_iscached(npc, NPC_IP46));
 
-	if (di == PFIL_OUT) {
-		/* Rewrite source address, if outgoing. */
-		offby = offsetof(struct ip, ip_src);
-		oaddr = npc->npc_srcip;
-	} else {
-		/* Rewrite destination, if incoming. */
-		offby = offsetof(struct ip, ip_dst);
-		oaddr = npc->npc_dstip;
-	}
-
-	/* Advance to the address and rewrite it. */
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, npc->npc_alen, addr))
-		return false;
-
-	/* Cache: IP address. */
+	/*
+	 * Rewrite source address if outgoing and destination if incoming.
+	 */
+	oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip;
 	memcpy(oaddr, addr, npc->npc_alen);
 	return true;
 }
 
 /*
- * npf_rwrport: rewrite required TCP/UDP port, update the cache.
+ * npf_rwrport: rewrite required TCP/UDP port.
  */
 bool
-npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
-    in_port_t port)
+npf_rwrport(const npf_cache_t *npc, int di, const in_port_t port)
 {
 	const int proto = npf_cache_ipproto(npc);
-	u_int offby = npf_cache_hlen(npc);
 	in_port_t *oport;
 
 	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
 	KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
 
-	/* Offset to the port and pointer in the cache. */
+	/* Get the offset and store the port in it. */
 	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &npc->npc_l4.tcp;
-		if (di == PFIL_OUT) {
-			CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
-			oport = &th->th_sport;
-		} else {
-			offby += offsetof(struct tcphdr, th_dport);
-			oport = &th->th_dport;
-		}
+		struct tcphdr *th = npc->npc_l4.tcp;
+		oport = (di == PFIL_OUT) ? &th->th_sport : &th->th_dport;
 	} else {
-		struct udphdr *uh = &npc->npc_l4.udp;
-		if (di == PFIL_OUT) {
-			CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
-			oport = &uh->uh_sport;
-		} else {
-			offby += offsetof(struct udphdr, uh_dport);
-			oport = &uh->uh_dport;
-		}
+		struct udphdr *uh = npc->npc_l4.udp;
+		oport = (di == PFIL_OUT) ? &uh->uh_sport : &uh->uh_dport;
 	}
-
-	/* Advance and rewrite the port. */
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(in_port_t), &port))
-		return false;
-
-	/* Cache: TCP/UDP port. */
-	*oport = port;
+	memcpy(oport, &port, sizeof(in_port_t));
 	return true;
 }
 
 /*
- * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update the cache.
+ * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum.
  */
 bool
-npf_rwrcksum(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
-    npf_addr_t *addr, in_port_t port)
+npf_rwrcksum(const npf_cache_t *npc, const int di,
+    const npf_addr_t *addr, const in_port_t port)
 {
 	const int proto = npf_cache_ipproto(npc);
+	const int alen = npc->npc_alen;
 	npf_addr_t *oaddr;
 	uint16_t *ocksum;
 	in_port_t oport;
-	u_int offby;
 
-	/* XXX: NetBSD - process delayed checksums. */
-	if (di == PFIL_OUT && proto != IPPROTO_ICMP) {
-		nbuf_cksum_barrier(nbuf);
-		npc->npc_info &= ~(NPC_LAYER4 | NPC_TCP | NPC_UDP);
-		if (!npf_cache_all(npc, nbuf)) {
-			return false;
-		}
-	}
-
+	KASSERT(npf_iscached(npc, NPC_LAYER4));
 	oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip;
 
 	if (npf_iscached(npc, NPC_IP4)) {
-		struct ip *ip = &npc->npc_ip.v4;
-		uint16_t ipsum;
+		struct ip *ip = npc->npc_ip.v4;
+		uint16_t ipsum = ip->ip_sum;
 
-		/* Recalculate IPv4 checksum, advance to it and rewrite. */
-		ipsum = npf_addr_cksum(ip->ip_sum, npc->npc_alen, oaddr, addr);
-		offby = offsetof(struct ip, ip_sum);
-		if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(ipsum), &ipsum))
-			return false;
-		ip->ip_sum = ipsum;
+		/* Recalculate IPv4 checksum and rewrite. */
+		ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr);
 	} else {
 		/* No checksum for IPv6. */
 		KASSERT(npf_iscached(npc, NPC_IP6));
-		offby = 0;
 	}
 
 	/* Nothing else to do for ICMP. */
@@ -633,7 +571,6 @@
 		return true;
 	}
 	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
-	offby = npf_cache_hlen(npc) - offby;
 
 	/*
 	 * Calculate TCP/UDP checksum:
@@ -642,13 +579,12 @@
 	 * - Fixup the port change, if required (non-zero).
 	 */
 	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &npc->npc_l4.tcp;
+		struct tcphdr *th = npc->npc_l4.tcp;
 
 		ocksum = &th->th_sum;
-		offby += offsetof(struct tcphdr, th_sum);
 		oport = (di == PFIL_OUT) ? th->th_sport : th->th_dport;
 	} else {
-		struct udphdr *uh = &npc->npc_l4.udp;
+		struct udphdr *uh = npc->npc_l4.udp;
 
 		KASSERT(proto == IPPROTO_UDP);
 		ocksum = &uh->uh_sum;
@@ -656,21 +592,16 @@
 			/* No need to update. */
 			return true;
 		}
-		offby += offsetof(struct udphdr, uh_sum);
 		oport = (di == PFIL_OUT) ? uh->uh_sport : uh->uh_dport;
 	}
 
-	uint16_t cksum = *ocksum;
-	cksum = npf_addr_cksum(cksum, npc->npc_alen, oaddr, addr);
+	uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr);
 	if (port) {
 		cksum = npf_fixup16_cksum(cksum, oport, port);
 	}
 
-	/* Advance to TCP/UDP checksum and rewrite it. */
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
-		return false;
-	}
-	*ocksum = cksum;
+	/* Rewrite TCP/UDP checksum. */
+	memcpy(ocksum, &cksum, sizeof(uint16_t));
 	return true;
 }
 
--- a/sys/net/npf/npf_instr.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_instr.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_instr.c,v 1.9.2.5 2012/07/25 20:45:23 jdc Exp $	*/
+/*	$NetBSD: npf_instr.c,v 1.9.2.6 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.9.2.5 2012/07/25 20:45:23 jdc Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.9.2.6 2013/02/08 19:18:09 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -56,23 +56,22 @@
  * => Returns zero on success or -1 on failure.
  */
 int
-npf_match_ether(nbuf_t *nbuf, int sd, int _res, uint16_t ethertype, uint32_t *r)
+npf_match_ether(nbuf_t *nbuf, int sd, uint16_t ethertype, uint32_t *r)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
+	const u_int off = nbuf_offset(nbuf);
+	bool vlan = false;
+	void *nptr;
 	u_int offby;
 	uint16_t val16;
-	bool vlan;
-
-	vlan = false;
-	*r = 0;
 
 	/* Ethernet header: check EtherType. */
 	offby = offsetof(struct ether_header, ether_type);
+	*r = 0;
 again:
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t), &val16)) {
+	if ((nptr = nbuf_advance(nbuf, offby, sizeof(uint16_t))) == NULL) {
 		return -1;
 	}
-	val16 = ntohs(val16);
+	memcpy(&val16, nptr, sizeof(val16));
 	*r += offby;
 
 	/* Handle VLAN tags. */
@@ -81,10 +80,14 @@
 		vlan = true;
 		goto again;
 	}
+
+	/* Restore the offset. */
+	nbuf_reset(nbuf);
+	nbuf_advance(nbuf, off, 0);
+
 	if (val16 != ETHERTYPE_IP) {
 		return -1;
 	}
-
 	*r += ETHER_TYPE_LEN;
 	return 0;
 }
@@ -93,18 +96,12 @@
  * npf_match_proto: match IP address length and/or layer 4 protocol.
  */
 int
-npf_match_proto(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t ap)
+npf_match_proto(const npf_cache_t *npc, uint32_t ap)
 {
 	const int alen = (ap >> 8) & 0xff;
 	const int proto = ap & 0xff;
 
-	if (!npf_iscached(npc, NPC_IP46)) {
-		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_IP46));
-	}
-
+	KASSERT(npf_iscached(npc, NPC_IP46));
 	if (alen && npc->npc_alen != alen) {
 		return -1;
 	}
@@ -115,24 +112,14 @@
  * npf_match_table: match IP address against NPF table.
  */
 int
-npf_match_table(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
-    const int sd, const u_int tid)
+npf_match_table(const npf_cache_t *npc, int sd, u_int tid)
 {
-	npf_tableset_t *tblset;
-	npf_addr_t *addr;
-	int alen;
+	npf_tableset_t *tblset = npf_core_tableset();
+	const npf_addr_t *addr = sd ? npc->npc_srcip : npc->npc_dstip;
+	const int alen = npc->npc_alen;
 
 	KASSERT(npf_core_locked());
-
-	if (!npf_iscached(npc, NPC_IP46)) {
-		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_IP46));
-	}
-	addr = sd ? npc->npc_srcip : npc->npc_dstip;
-	tblset = npf_core_tableset();
-	alen = npc->npc_alen;
+	KASSERT(npf_iscached(npc, NPC_IP46));
 
 	/* Match address against NPF table. */
 	return npf_table_lookup(tblset, tid, alen, addr) ? -1 : 0;
@@ -142,18 +129,13 @@
  * npf_match_ipmask: match an address against netaddr/mask.
  */
 int
-npf_match_ipmask(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
-    const int szsd, const npf_addr_t *maddr, npf_netmask_t mask)
+npf_match_ipmask(const npf_cache_t *npc, int szsd,
+    const npf_addr_t *maddr, npf_netmask_t mask)
 {
 	const int alen = szsd >> 1;
-	npf_addr_t *addr;
+	const npf_addr_t *addr;
 
-	if (!npf_iscached(npc, NPC_IP46)) {
-		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_IP46));
-	}
+	KASSERT(npf_iscached(npc, NPC_IP46));
 	if (npc->npc_alen != alen) {
 		return -1;
 	}
@@ -165,19 +147,12 @@
  * npf_match_tcp_ports: match TCP port in header against the range.
  */
 int
-npf_match_tcp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
-    const int sd, const uint32_t prange)
+npf_match_tcp_ports(const npf_cache_t *npc, int sd, uint32_t prange)
 {
-	struct tcphdr *th = &npc->npc_l4.tcp;
-	in_port_t p;
+	const struct tcphdr *th = npc->npc_l4.tcp;
+	const in_port_t p = sd ? th->th_sport : th->th_dport;
 
-	if (!npf_iscached(npc, NPC_TCP)) {
-		if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_TCP));
-	}
-	p = sd ? th->th_sport : th->th_dport;
+	KASSERT(npf_iscached(npc, NPC_TCP));
 
 	/* Match against the port range. */
 	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@@ -187,19 +162,12 @@
  * npf_match_udp_ports: match UDP port in header against the range.
  */
 int
-npf_match_udp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
-    const int sd, const uint32_t prange)
+npf_match_udp_ports(const npf_cache_t *npc, int sd, uint32_t prange)
 {
-	struct udphdr *uh = &npc->npc_l4.udp;
-	in_port_t p;
+	const struct udphdr *uh = npc->npc_l4.udp;
+	const in_port_t p = sd ? uh->uh_sport : uh->uh_dport;
 
-	if (!npf_iscached(npc, NPC_UDP)) {
-		if (!npf_fetch_udp(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_UDP));
-	}
-	p = sd ? uh->uh_sport : uh->uh_dport;
+	KASSERT(npf_iscached(npc, NPC_UDP));
 
 	/* Match against the port range. */
 	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@@ -209,16 +177,11 @@
  * npf_match_icmp4: match ICMPv4 packet.
  */
 int
-npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t tc)
+npf_match_icmp4(const npf_cache_t *npc, uint32_t tc)
 {
-	struct icmp *ic = &npc->npc_l4.icmp;
+	const struct icmp *ic = npc->npc_l4.icmp;
 
-	if (!npf_iscached(npc, NPC_ICMP)) {
-		if (!npf_fetch_icmp(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_ICMP));
-	}
+	KASSERT(npf_iscached(npc, NPC_ICMP));
 
 	/* Match code/type, if required. */
 	if ((1 << 31) & tc) {
@@ -240,16 +203,11 @@
  * npf_match_icmp6: match ICMPv6 packet.
  */
 int
-npf_match_icmp6(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t tc)
+npf_match_icmp6(const npf_cache_t *npc, uint32_t tc)
 {
-	struct icmp6_hdr *ic6 = &npc->npc_l4.icmp6;
+	const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
 
-	if (!npf_iscached(npc, NPC_ICMP)) {
-		if (!npf_fetch_icmp(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_ICMP));
-	}
+	KASSERT(npf_iscached(npc, NPC_ICMP));
 
 	/* Match code/type, if required. */
 	if ((1 << 31) & tc) {
@@ -271,16 +229,11 @@
  * npf_match_tcpfl: match TCP flags.
  */
 int
-npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t fl)
+npf_match_tcpfl(const npf_cache_t *npc, uint32_t fl)
 {
 	const uint8_t tcpfl = (fl >> 8) & 0xff, mask = fl & 0xff;
-	struct tcphdr *th = &npc->npc_l4.tcp;
+	const struct tcphdr *th = npc->npc_l4.tcp;
 
-	if (!npf_iscached(npc, NPC_TCP)) {
-		if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		KASSERT(npf_iscached(npc, NPC_TCP));
-	}
-	return ((th->th_flags & mask) == tcpfl) ? 0 : -1;
+	KASSERT(npf_iscached(npc, NPC_TCP));
+	return (th->th_flags & mask) == tcpfl ? 0 : -1;
 }
--- a/sys/net/npf/npf_mbuf.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_mbuf.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_mbuf.c,v 1.6.14.2 2012/12/16 18:19:52 riz Exp $	*/
+/*	$NetBSD: npf_mbuf.c,v 1.6.14.3 2013/02/08 19:18:10 riz Exp $	*/
 
 /*-
- * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -37,39 +37,88 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.6.14.2 2012/12/16 18:19:52 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.6.14.3 2013/02/08 19:18:10 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
 #include "npf_impl.h"
 
-/*
- * nbuf_dataptr: return a pointer to data in nbuf.
- */
+#define	NBUF_ENSURE_ALIGN	(MAX(COHERENCY_UNIT, 64))
+#define	NBUF_ENSURE_MASK	(NBUF_ENSURE_ALIGN - 1)
+#define	NBUF_ENSURE_ROUNDUP(x)	(((x) + NBUF_ENSURE_ALIGN) & ~NBUF_ENSURE_MASK)
+
+void
+nbuf_init(nbuf_t *nbuf, struct mbuf *m, const ifnet_t *ifp)
+{
+	KASSERT((m->m_flags & M_PKTHDR) != 0);
+	KASSERT(ifp != NULL);
+
+	nbuf->nb_mbuf0 = m;
+	nbuf->nb_ifp = ifp;
+	nbuf_reset(nbuf);
+}
+
+void
+nbuf_reset(nbuf_t *nbuf)
+{
+	struct mbuf *m = nbuf->nb_mbuf0;
+
+	nbuf->nb_mbuf = m;
+	nbuf->nb_nptr = mtod(m, void *);
+}
+
 void *
 nbuf_dataptr(nbuf_t *nbuf)
 {
-	const struct mbuf *m = nbuf;
+	KASSERT(nbuf->nb_nptr);
+	return nbuf->nb_nptr;
+}
+
+size_t
+nbuf_offset(const nbuf_t *nbuf)
+{
+	const struct mbuf *m = nbuf->nb_mbuf;
+	const u_int off = (uintptr_t)nbuf->nb_nptr - mtod(m, uintptr_t);
+	const int poff = m_length(nbuf->nb_mbuf0) - m_length(m) + off;
+
+	return poff;
+}
 
-	return mtod(m, void *);
+struct mbuf *
+nbuf_head_mbuf(nbuf_t *nbuf)
+{
+	return nbuf->nb_mbuf0;
+}
+
+bool
+nbuf_flag_p(const nbuf_t *nbuf, int flag)
+{
+	return (nbuf->nb_flags & flag) != 0;
+}
+
+void
+nbuf_unset_flag(nbuf_t *nbuf, int flag)
+{
+	nbuf->nb_flags &= ~flag;
 }
 
 /*
- * nbuf_advance: advance in mbuf or chain by specified amount of bytes.
+ * nbuf_advance: advance in nbuf or chain by specified amount of bytes and,
+ * if requested, ensure that the area *after* advance is contiguous.
  *
- * => Returns new pointer to data in mbuf and NULL if offset gets invalid.
- * => Sets nbuf to current (after advance) mbuf in the chain.
+ * => Returns new pointer to data in nbuf or NULL if offset is invalid.
+ * => Current nbuf and the offset is stored in the nbuf metadata.
  */
 void *
-nbuf_advance(nbuf_t **nbuf, void *n_ptr, u_int n)
+nbuf_advance(nbuf_t *nbuf, size_t len, size_t ensure)
 {
-	struct mbuf *m = *nbuf;
+	struct mbuf *m = nbuf->nb_mbuf;
 	u_int off, wmark;
 	uint8_t *d;
 
 	/* Offset with amount to advance. */
-	off = (uintptr_t)n_ptr - mtod(m, uintptr_t) + n;
+	off = (uintptr_t)nbuf->nb_nptr - mtod(m, uintptr_t) + len;
 	wmark = m->m_len;
 
 	/* Find the mbuf according to offset. */
@@ -77,187 +126,155 @@
 		m = m->m_next;
 		if (__predict_false(m == NULL)) {
 			/*
-			 * If out of chain, then offset is
+			 * If end of the chain, then the offset is
 			 * higher than packet length.
 			 */
 			return NULL;
 		}
 		wmark += m->m_len;
 	}
+	KASSERT(off < m_length(nbuf->nb_mbuf0));
 
 	/* Offset in mbuf data. */
 	d = mtod(m, uint8_t *);
 	KASSERT(off >= (wmark - m->m_len));
 	d += (off - (wmark - m->m_len));
 
-	*nbuf = (void *)m;
+	nbuf->nb_mbuf = m;
+	nbuf->nb_nptr = d;
+
+	if (ensure) {
+		/* Ensure contiguousness (may change nbuf chain). */
+		d = nbuf_ensure_contig(nbuf, ensure);
+	}
 	return d;
 }
 
 /*
- * nbuf_rw_datum: read or write a datum of specified length at current
- * offset in the nbuf chain and copy datum into passed buffer.
- *
- * => Datum is allowed to overlap between two or more mbufs.
- * => Note: all data in nbuf is in network byte order.
- * => Returns 0 on success, error code on failure.
+ * nbuf_ensure_contig: check whether the specified length from the current
+ * point in the nbuf is contiguous.  If not, rearrange the chain to be so.
  *
- * Note: this function must be static inline with constant operation
- * parameter - we expect constant propagation.
+ * => Returns pointer to the data at the current offset in the buffer.
+ * => Returns NULL on failure and nbuf becomes invalid.
  */
+void *
+nbuf_ensure_contig(nbuf_t *nbuf, size_t len)
+{
+	const struct mbuf * const n = nbuf->nb_mbuf;
+	const size_t off = (uintptr_t)nbuf->nb_nptr - mtod(n, uintptr_t);
 
-#define	NBUF_DATA_READ		0
-#define	NBUF_DATA_WRITE		1
+	KASSERT(off < n->m_len);
 
-static inline int
-nbuf_rw_datum(const int wr, struct mbuf *m, void *n_ptr, size_t len, void *buf)
-{
-	uint8_t *d = n_ptr, *b = buf;
-	u_int off, wmark, end;
+	if (__predict_false(n->m_len < (off + len))) {
+		struct mbuf *m = nbuf->nb_mbuf0;
+		const size_t foff = nbuf_offset(nbuf);
+		const size_t plen = m_length(m);
+		const size_t mlen = m->m_len;
+		size_t target;
+		bool success;
 
-	/* Current offset in mbuf. */
-	off = (uintptr_t)n_ptr - mtod(m, uintptr_t);
-	KASSERT(off < (u_int)m->m_len);
-	wmark = m->m_len;
+		npf_stats_inc(NPF_STAT_NBUF_NONCONTIG);
 
-	/* Is datum overlapping? */
-	end = off + len;
-	while (__predict_false(end > wmark)) {
-		u_int l;
+		/* Attempt to round-up to NBUF_ENSURE_ALIGN bytes. */
+		if ((target = NBUF_ENSURE_ROUNDUP(foff + len)) > plen) {
+			target = foff + len;
+		}
 
-		/* Get the part of current mbuf. */
-		l = m->m_len - off;
-		KASSERT(l < len);
-		len -= l;
-		if (wr == NBUF_DATA_WRITE) {
-			while (l--)
-				*d++ = *b++;
-		} else {
-			KASSERT(wr == NBUF_DATA_READ);
-			while (l--)
-				*b++ = *d++;
+		/* Rearrange the chain to be contiguous. */
+		KASSERT((m->m_flags & M_PKTHDR) != 0);
+		success = m_ensure_contig(&m, target);
+		KASSERT(m != NULL);
+
+		/* If no change in the chain: return what we have. */
+		if (m == nbuf->nb_mbuf0 && m->m_len == mlen) {
+			return success ? nbuf->nb_nptr : NULL;
 		}
-		KASSERT(len > 0);
 
-		/* Take next mbuf and continue. */
-		m = m->m_next;
-		if (__predict_false(m == NULL)) {
-			/*
-			 * If out of chain, then offset with datum
-			 * length exceed the packet length.
-			 */
-			return EINVAL;
+		/*
+		 * The mbuf chain was re-arranged.  Update the pointers
+		 * accordingly and indicate that the references to the data
+		 * might need a reset.
+		 */
+		KASSERT((m->m_flags & M_PKTHDR) != 0);
+		nbuf->nb_mbuf0 = m;
+		nbuf->nb_mbuf = m;
+
+		KASSERT(foff < m->m_len && foff < m_length(m));
+		nbuf->nb_nptr = mtod(m, uint8_t *) + foff;
+		nbuf->nb_flags |= NBUF_DATAREF_RESET;
+
+		if (!success) {
+			npf_stats_inc(NPF_STAT_NBUF_CONTIG_FAIL);
+			return NULL;
 		}
-		wmark += m->m_len;
-		d = mtod(m, uint8_t *);
-		off = 0;
 	}
-	KASSERT(n_ptr == d || mtod(m, uint8_t *) == d);
-	KASSERT(len <= (u_int)m->m_len);
-
-	/* Non-overlapping case: fetch the actual data. */
-	if (wr == NBUF_DATA_WRITE) {
-		while (len--)
-			*d++ = *b++;
-	} else {
-		KASSERT(wr == NBUF_DATA_READ);
-		while (len--)
-			*b++ = *d++;
-	}
-	return 0;
+	return nbuf->nb_nptr;
 }
 
-/*
- * nbuf_{fetch|store}_datum: read/write absraction calls on nbuf_rw_datum().
- */
-int
-nbuf_fetch_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
+void *
+nbuf_ensure_writable(nbuf_t *nbuf, size_t len)
 {
-	struct mbuf *m = nbuf;
+	struct mbuf *m = nbuf->nb_mbuf;
+	const u_int off = (uintptr_t)nbuf->nb_nptr - mtod(m, uintptr_t);
+	const int tlen = off + len;
+	bool head_buf;
+
+	KASSERT(off < m_length(nbuf->nb_mbuf0));
 
-	return nbuf_rw_datum(NBUF_DATA_READ, m, n_ptr, len, buf);
-}
+	if (!M_UNWRITABLE(m, tlen)) {
+		return nbuf->nb_nptr;
+	}
+	head_buf = (nbuf->nb_mbuf0 == m);
+	if (m_makewritable(&m, 0, tlen, M_NOWAIT)) {
+		memset(nbuf, 0, sizeof(nbuf_t));
+		return NULL;
+	}
+	if (head_buf) {
+		KASSERT((m->m_flags & M_PKTHDR) != 0);
+		KASSERT(off < m_length(m));
+		nbuf->nb_mbuf0 = m;
+	}
+	nbuf->nb_mbuf = m;
+	nbuf->nb_nptr = mtod(m, uint8_t *) + off;
 
-int
-nbuf_store_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
-{
-	struct mbuf *m = nbuf;
-
-	KASSERT((m->m_flags & M_PKTHDR) != 0 || !M_READONLY(m));
-	return nbuf_rw_datum(NBUF_DATA_WRITE, m, n_ptr, len, buf);
+	return nbuf->nb_nptr;
 }
 
-/*
- * nbuf_advfetch: advance and fetch the datum.
- */
-int
-nbuf_advfetch(nbuf_t **nbuf, void **n_ptr, u_int n, size_t len, void *buf)
+bool
+nbuf_cksum_barrier(nbuf_t *nbuf, int di)
 {
-	nbuf_t *orig_nbuf = *nbuf;
-	void *orig_nptr = *n_ptr;
-	int error;
-
-	*n_ptr = nbuf_advance(nbuf, *n_ptr, n);
-	if (__predict_false(*n_ptr != NULL)) {
-		error = nbuf_fetch_datum(*nbuf, *n_ptr, len, buf);
-	} else {
-		error = EINVAL;
-	}
-	if (__predict_false(error)) {
-		*nbuf = orig_nbuf;
-		*n_ptr = orig_nptr;
-	}
-	return error;
-}
+	struct mbuf *m;
 
-/*
- * nbuf_advstore: advance and store the datum.
- */
-int
-nbuf_advstore(nbuf_t **nbuf, void **n_ptr, u_int n, size_t len, void *buf)
-{
-	nbuf_t *orig_nbuf = *nbuf;
-	void *orig_nptr = *n_ptr;
-	int error;
-
-	*n_ptr = nbuf_advance(nbuf, *n_ptr, n);
-	if (__predict_false(*n_ptr != NULL)) {
-		error = nbuf_store_datum(*nbuf, *n_ptr, len, buf);
-	} else {
-		error = EINVAL;
+	if (di != PFIL_OUT) {
+		return false;
 	}
-	if (__predict_false(error)) {
-		*nbuf = orig_nbuf;
-		*n_ptr = orig_nptr;
-	}
-	return error;
-}
-
-void
-nbuf_cksum_barrier(nbuf_t *nbuf)
-{
-	struct mbuf *m = nbuf;
+	m = nbuf->nb_mbuf0;
+	KASSERT((m->m_flags & M_PKTHDR) != 0);
 
 	if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4 | M_CSUM_UDPv4);
+		return true;
 	}
+	return false;
 }
 
 /*
  * nbuf_add_tag: add a tag to specified network buffer.
  *
- * => Returns 0 on success, or errno on failure.
+ * => Returns 0 on success or errno on failure.
  */
 int
 nbuf_add_tag(nbuf_t *nbuf, uint32_t key, uint32_t val)
 {
-	struct mbuf *m = nbuf;
+	struct mbuf *m = nbuf->nb_mbuf0;
 	struct m_tag *mt;
 	uint32_t *dat;
 
+	KASSERT((m->m_flags & M_PKTHDR) != 0);
+
 	mt = m_tag_get(PACKET_TAG_NPF, sizeof(uint32_t), M_NOWAIT);
-	if (__predict_false(mt == NULL)) {
+	if (mt == NULL) {
 		return ENOMEM;
 	}
 	dat = (uint32_t *)(mt + 1);
@@ -269,16 +286,18 @@
 /*
  * nbuf_find_tag: find a tag in specified network buffer.
  *
- * => Returns 0 on success, or errno on failure.
+ * => Returns 0 on success or errno on failure.
  */
 int
 nbuf_find_tag(nbuf_t *nbuf, uint32_t key, void **data)
 {
-	struct mbuf *m = nbuf;
+	struct mbuf *m = nbuf->nb_mbuf0;
 	struct m_tag *mt;
 
+	KASSERT((m->m_flags & M_PKTHDR) != 0);
+
 	mt = m_tag_find(m, PACKET_TAG_NPF, NULL);
-	if (__predict_false(mt == NULL)) {
+	if (mt == NULL) {
 		return EINVAL;
 	}
 	*data = (void *)(mt + 1);
--- a/sys/net/npf/npf_nat.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_nat.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.10.2.6 2012/08/19 17:40:31 riz Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.10.2.7 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.6 2012/08/19 17:40:31 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.7 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -417,8 +417,7 @@
  * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
  */
 static npf_natpolicy_t *
-npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, const ifnet_t *ifp,
-    const int di)
+npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di)
 {
 	npf_ruleset_t *rlset;
 	npf_natpolicy_t *np;
@@ -426,7 +425,7 @@
 
 	npf_core_enter();
 	rlset = npf_core_natset();
-	rl = npf_ruleset_inspect(npc, nbuf, rlset, ifp, di, NPF_LAYER_3);
+	rl = npf_ruleset_inspect(npc, nbuf, rlset, di, NPF_LAYER_3);
 	if (rl == NULL) {
 		npf_core_exit();
 		return NULL;
@@ -483,11 +482,11 @@
 
 	/* Save the relevant TCP/UDP port. */
 	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &npc->npc_l4.tcp;
+		const struct tcphdr *th = npc->npc_l4.tcp;
 		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
 		    th->th_sport : th->th_dport;
 	} else {
-		struct udphdr *uh = &npc->npc_l4.udp;
+		const struct udphdr *uh = npc->npc_l4.udp;
 		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
 		    uh->uh_sport : uh->uh_dport;
 	}
@@ -508,16 +507,17 @@
 /*
  * npf_nat_translate: perform address and/or port translation.
  */
-static int
+int
 npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
     const bool forw, const int di)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
-	npf_natpolicy_t *np = nt->nt_natpolicy;
-	npf_addr_t *addr;
+	const int proto = npf_cache_ipproto(npc);
+	const npf_natpolicy_t *np = nt->nt_natpolicy;
+	const npf_addr_t *addr;
 	in_port_t port;
 
 	KASSERT(npf_iscached(npc, NPC_IP46));
+	KASSERT(npf_iscached(npc, NPC_LAYER4));
 
 	if (forw) {
 		/* "Forwards" stream: use translation address/port. */
@@ -530,14 +530,23 @@
 	}
 	KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0);
 
+	/* Process delayed checksums (XXX: NetBSD). */
+	if (nbuf_cksum_barrier(nbuf, di)) {
+		npf_recache(npc, nbuf);
+	}
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
 	/* Execute ALG hook first. */
-	npf_alg_exec(npc, nbuf, nt, di);
+	if ((npc->npc_info & NPC_ALG_EXEC) == 0) {
+		npc->npc_info |= NPC_ALG_EXEC;
+		npf_alg_exec(npc, nbuf, nt, di);
+	}
 
 	/*
 	 * Rewrite IP and/or TCP/UDP checksums first, since it will use
 	 * the cache containing original values for checksum calculation.
 	 */
-	if (!npf_rwrcksum(npc, nbuf, n_ptr, di, addr, port)) {
+	if (!npf_rwrcksum(npc, di, addr, port)) {
 		return EINVAL;
 	}
 
@@ -545,7 +554,7 @@
 	 * Address translation: rewrite source/destination address, depending
 	 * on direction (PFIL_OUT - for source, PFIL_IN - for destination).
 	 */
-	if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) {
+	if (!npf_rwrip(npc, di, addr)) {
 		return EINVAL;
 	}
 	if ((np->n_flags & NPF_NAT_PORTS) == 0) {
@@ -553,12 +562,12 @@
 		return 0;
 	}
 
-	switch (npf_cache_ipproto(npc)) {
+	switch (proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
 		/* Rewrite source/destination port. */
-		if (!npf_rwrport(npc, nbuf, n_ptr, di, port)) {
+		if (!npf_rwrport(npc, di, port)) {
 			return EINVAL;
 		}
 		break;
@@ -582,8 +591,7 @@
  *	- Associate a NAT policy with a session (may establish a new).
  */
 int
-npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
-    const ifnet_t *ifp, const int di)
+npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int di)
 {
 	npf_session_t *nse = NULL;
 	npf_natpolicy_t *np;
@@ -595,6 +603,7 @@
 	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
 		return 0;
 	}
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 
 	/*
 	 * Return the NAT entry associated with the session, if any.
@@ -611,7 +620,7 @@
 	 * Inspect the packet for a NAT policy, if there is no session.
 	 * Note: acquires the lock (releases, if not found).
 	 */
-	np = npf_nat_inspect(npc, nbuf, ifp, di);
+	np = npf_nat_inspect(npc, nbuf, di);
 	if (np == NULL) {
 		/* If packet does not match - done. */
 		return 0;
@@ -633,7 +642,7 @@
 	new = true;
 
 	/* Determine whether any ALG matches. */
-	if (npf_alg_match(npc, nbuf, nt)) {
+	if (npf_alg_match(npc, nbuf, nt, di)) {
 		KASSERT(nt->nt_alg != NULL);
 	}
 
@@ -644,7 +653,7 @@
 	 * stream depends on other, stateless filtering rules.
 	 */
 	if (se == NULL) {
-		nse = npf_session_establish(npc, nbuf, ifp, di);
+		nse = npf_session_establish(npc, nbuf, di);
 		if (nse == NULL) {
 			error = ENOMEM;
 			goto out;
--- a/sys/net/npf/npf_processor.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_processor.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_processor.c,v 1.9.2.3 2012/07/25 20:45:23 jdc Exp $	*/
+/*	$NetBSD: npf_processor.c,v 1.9.2.4 2013/02/08 19:18:10 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.9.2.3 2012/07/25 20:45:23 jdc Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.9.2.4 2013/02/08 19:18:10 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -129,14 +129,10 @@
  */
 int
 npf_ncode_process(npf_cache_t *npc, const void *ncode,
-    nbuf_t *nbuf0, const int layer)
+    nbuf_t *nbuf, const int layer)
 {
 	/* N-code instruction pointer. */
 	const void *	i_ptr;
-	/* Pointer of current nbuf in the chain. */
-	nbuf_t *	nbuf;
-	/* Data pointer in the current nbuf. */
-	void *		n_ptr;
 	/* Virtual registers. */
 	uint32_t	regs[NPF_NREGS];
 	/* Local, state variables. */
@@ -145,16 +141,13 @@
 	u_int lcount;
 	int cmpval;
 
+	nbuf_reset(nbuf);
 	i_ptr = ncode;
 	regs[0] = layer;
 
 	lcount = NPF_LOOP_LIMIT;
 	cmpval = 0;
 
-	/* Note: offset = n_ptr - nbuf_dataptr(nbuf); */
-	nbuf = nbuf0;
-	n_ptr = nbuf_dataptr(nbuf);
-
 process_next:
 	/*
 	 * Loop must always start on instruction, therefore first word
@@ -178,19 +171,27 @@
 	case NPF_OPCODE_ADVR:
 		i_ptr = nc_fetch_word(i_ptr, &i);	/* Register */
 		KASSERT(i < NPF_NREGS);
-		n_ptr = nbuf_advance(&nbuf, n_ptr, regs[i]);
-		if (__predict_false(n_ptr == NULL)) {
+		if (!nbuf_advance(nbuf, regs[i], 0)) {
 			goto fail;
 		}
 		break;
-	case NPF_OPCODE_LW:
+	case NPF_OPCODE_LW: {
+		void *n_ptr;
+
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Size, register */
 		KASSERT(i < NPF_NREGS);
 		KASSERT(n >= sizeof(uint8_t) && n <= sizeof(uint32_t));
-		if (nbuf_fetch_datum(nbuf, n_ptr, n, (uint32_t *)regs + i)) {
+
+		n_ptr = nbuf_ensure_contig(nbuf, n);
+		if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
+			npf_recache(npc, nbuf);
+		}
+		if (n_ptr == NULL) {
 			goto fail;
 		}
+		memcpy(&regs[i], n_ptr, n);
 		break;
+	}
 	case NPF_OPCODE_CMP:
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Value, register */
 		KASSERT(i < NPF_NREGS);
@@ -234,7 +235,7 @@
 		return n;
 	case NPF_OPCODE_TAG:
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Key, value */
-		if (nbuf_add_tag(n_ptr, n, i)) {
+		if (nbuf_add_tag(nbuf, n, i)) {
 			goto fail;
 		}
 		break;
@@ -275,9 +276,9 @@
 		/* Source/destination, network address, subnet. */
 		i_ptr = nc_fetch_word(i_ptr, &d);
 		i_ptr = nc_fetch_double(i_ptr, &addr.s6_addr32[0], &n);
-		cmpval = npf_match_ipmask(npc, nbuf, n_ptr,
+		cmpval = npf_iscached(npc, NPC_IP46) ? npf_match_ipmask(npc,
 		    (sizeof(struct in_addr) << 1) | (d & 0x1),
-		    &addr, (npf_netmask_t)n);
+		    &addr, (npf_netmask_t)n) : -1;
 		break;
 	case NPF_OPCODE_IP6MASK:
 		/* Source/destination, network address, subnet. */
@@ -287,49 +288,56 @@
 		i_ptr = nc_fetch_double(i_ptr,
 		    &addr.s6_addr32[2], &addr.s6_addr32[3]);
 		i_ptr = nc_fetch_word(i_ptr, &n);
-		cmpval = npf_match_ipmask(npc, nbuf, n_ptr,
+		cmpval = npf_iscached(npc, NPC_IP46) ? npf_match_ipmask(npc,
 		    (sizeof(struct in6_addr) << 1) | (d & 0x1),
-		    &addr, (npf_netmask_t)n);
+		    &addr, (npf_netmask_t)n) : -1;
 		break;
 	case NPF_OPCODE_TABLE:
 		/* Source/destination, NPF table ID. */
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);
-		cmpval = npf_match_table(npc, nbuf, n_ptr, n, i);
+		cmpval = npf_iscached(npc, NPC_IP46) ?
+		    npf_match_table(npc, n, i) : -1;
 		break;
 	case NPF_OPCODE_TCP_PORTS:
 		/* Source/destination, port range. */
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);
-		cmpval = npf_match_tcp_ports(npc, nbuf, n_ptr, n, i);
+		cmpval = npf_iscached(npc, NPC_TCP) ?
+		    npf_match_tcp_ports(npc, n, i) : -1;
 		break;
 	case NPF_OPCODE_UDP_PORTS:
 		/* Source/destination, port range. */
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);
-		cmpval = npf_match_udp_ports(npc, nbuf, n_ptr, n, i);
+		cmpval = npf_iscached(npc, NPC_UDP) ?
+		    npf_match_udp_ports(npc, n, i) : -1;
 		break;
 	case NPF_OPCODE_TCP_FLAGS:
 		/* TCP flags/mask. */
 		i_ptr = nc_fetch_word(i_ptr, &n);
-		cmpval = npf_match_tcpfl(npc, nbuf, n_ptr, n);
+		cmpval = npf_iscached(npc, NPC_TCP) ?
+		    npf_match_tcpfl(npc, n) : -1;
 		break;
 	case NPF_OPCODE_ICMP4:
 		/* ICMP type/code. */
 		i_ptr = nc_fetch_word(i_ptr, &n);
-		cmpval = npf_match_icmp4(npc, nbuf, n_ptr, n);
+		cmpval = npf_iscached(npc, NPC_ICMP) ?
+		    npf_match_icmp4(npc, n) : -1;
 		break;
 	case NPF_OPCODE_ICMP6:
 		/* ICMP type/code. */
 		i_ptr = nc_fetch_word(i_ptr, &n);
-		cmpval = npf_match_icmp6(npc, nbuf, n_ptr, n);
+		cmpval = npf_iscached(npc, NPC_ICMP) ?
+		    npf_match_icmp6(npc, n) : -1;
 		break;
 	case NPF_OPCODE_PROTO:
 		i_ptr = nc_fetch_word(i_ptr, &n);
-		cmpval = npf_match_proto(npc, nbuf, n_ptr, n);
+		cmpval = npf_iscached(npc, NPC_IP46) ?
+		    npf_match_proto(npc, n) : -1;
 		break;
 	case NPF_OPCODE_ETHER:
 		/* Source/destination, reserved, ethernet type. */
 		i_ptr = nc_fetch_word(i_ptr, &d);
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);
-		cmpval = npf_match_ether(nbuf, d, n, i, &regs[NPF_NREGS - 1]);
+		cmpval = npf_match_ether(nbuf, d, i, &regs[NPF_NREGS - 1]);
 		break;
 	default:
 		/* Invalid instruction. */
--- a/sys/net/npf/npf_rproc.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_rproc.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_rproc.c,v 1.1.2.2 2012/11/18 22:38:25 riz Exp $	*/
+/*	$NetBSD: npf_rproc.c,v 1.1.2.3 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -263,6 +263,7 @@
 {
 	const unsigned extcount = rp->rp_ext_count;
 
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 	KASSERT(rp->rp_refcnt > 0);
 
 	for (unsigned i = 0; i < extcount; i++) {
@@ -271,5 +272,9 @@
 
 		KASSERT(ext->ext_refcnt > 0);
 		extops->proc(npc, nbuf, rp->rp_ext_meta[i], decision);
+
+		if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
+			npf_recache(npc, nbuf);
+		}
 	}
 }
--- a/sys/net/npf/npf_ruleset.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_ruleset.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ruleset.c,v 1.10.2.4 2012/08/13 17:49:52 riz Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.10.2.5 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.10.2.4 2012/08/13 17:49:52 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.10.2.5 2013/02/08 19:18:09 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -353,18 +353,21 @@
  * => Caller is responsible for nbuf chain protection.
  */
 npf_rule_t *
-npf_ruleset_inspect(npf_cache_t *npc, nbuf_t *nbuf, npf_ruleset_t *mainrlset,
-    const ifnet_t *ifp, const int di, const int layer)
+npf_ruleset_inspect(npf_cache_t *npc, nbuf_t *nbuf,
+    const npf_ruleset_t *mainrlset, const int di, const int layer)
 {
+	const ifnet_t *ifp = nbuf->nb_ifp;
 	const int di_mask = (di & PFIL_IN) ? NPF_RULE_IN : NPF_RULE_OUT;
-	npf_ruleset_t *rlset = mainrlset;
+	const npf_ruleset_t *rlset = mainrlset;
 	npf_rule_t *final_rl = NULL, *rl;
 	bool defed = false;
 
+	KASSERT(ifp != NULL);
 	KASSERT(npf_core_locked());
 	KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));
 again:
 	TAILQ_FOREACH(rl, &rlset->rs_queue, r_entry) {
+		KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
 
 		/* Match the interface. */
@@ -399,6 +402,8 @@
 		final_rl = NULL;
 		goto again;
 	}
+
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 	return final_rl;
 }
 
--- a/sys/net/npf/npf_sendpkt.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_sendpkt.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_sendpkt.c,v 1.8.4.4 2012/07/16 22:13:27 riz Exp $	*/
+/*	$NetBSD: npf_sendpkt.c,v 1.8.4.5 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2011 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.8.4.4 2012/07/16 22:13:27 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.8.4.5 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -78,7 +78,7 @@
 	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_LAYER4));
 	tcpdlen = npf_tcpsaw(npc, &seq, &ack, &win);
-	oth = &npc->npc_l4.tcp;
+	oth = npc->npc_l4.tcp;
 
 	if (oth->th_flags & TH_RST) {
 		return 0;
@@ -102,7 +102,7 @@
 	m->m_pkthdr.len = len;
 
 	if (npf_iscached(npc, NPC_IP4)) {
-		struct ip *oip = &npc->npc_ip.v4;
+		struct ip *oip = npc->npc_ip.v4;
 
 		ip = mtod(m, struct ip *);
 		memset(ip, 0, len);
@@ -118,7 +118,7 @@
 
 		th = (struct tcphdr *)(ip + 1);
 	} else {
-		struct ip6_hdr *oip = &npc->npc_ip.v6;
+		struct ip6_hdr *oip = npc->npc_ip.v6;
 
 		KASSERT(npf_iscached(npc, NPC_IP6));
 		ip6 = mtod(m, struct ip6_hdr *);
@@ -175,9 +175,9 @@
  * npf_return_icmp: return an ICMP error.
  */
 static int
-npf_return_icmp(npf_cache_t *npc, nbuf_t *nbuf)
+npf_return_icmp(const npf_cache_t *npc, nbuf_t *nbuf)
 {
-	struct mbuf *m = nbuf;
+	struct mbuf *m = nbuf_head_mbuf(nbuf);
 
 	if (npf_iscached(npc, NPC_IP4)) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT, 0, 0);
@@ -197,17 +197,12 @@
 bool
 npf_return_block(npf_cache_t *npc, nbuf_t *nbuf, const int retfl)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
-
 	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
 		return false;
 	}
 	switch (npf_cache_ipproto(npc)) {
 	case IPPROTO_TCP:
 		if (retfl & NPF_RULE_RETRST) {
-			if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
-				return false;
-			}
 			(void)npf_return_tcp(npc);
 		}
 		break;
--- a/sys/net/npf/npf_session.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_session.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_session.c,v 1.10.4.7 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.10.4.8 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -65,12 +65,12 @@
  *	and should be released by the caller.  Reference guarantees that the
  *	session will not be destroyed, although it may be expired.
  *
- * External session identifiers
+ * Querying ALGs
  *
- *	Application-level gateways (ALGs) can inspect the packet and fill
- *	the packet cache (npf_cache_t) representing the IDs.  It is done
- *	via npf_alg_sessionid() call.  In such case, ALGs are responsible
- *	for correct filling of protocol, addresses and ports/IDs.
+ *	Application-level gateways (ALGs) can inspect the packet and
+ *	determine whether the packet matches an ALG case.  An ALG may
+ *	also lookup a session using different identifiers and return.
+ *	the packet cache (npf_cache_t) representing the IDs.
  *
  * Lock order
  *
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.7 2012/11/18 21:48:56 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.8 2013/02/08 19:18:09 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -453,72 +453,45 @@
 }
 
 /*
- * npf_session_inspect: lookup for an established session (connection).
+ * npf_session_lookup: lookup for an established session (connection).
  *
- * => If found, we will hold a reference for caller.
+ * => If found, we will hold a reference for the caller.
  */
 npf_session_t *
-npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf, const ifnet_t *ifp,
-    const int di, int *error)
+npf_session_lookup(const npf_cache_t *npc, const nbuf_t *nbuf,
+    const int di, bool *forw)
 {
+	const u_int proto = npf_cache_ipproto(npc);
+	const ifnet_t *ifp = nbuf->nb_ifp;
+	npf_sentry_t senkey, *sen;
+	npf_session_t *se;
 	npf_sehash_t *sh;
-	npf_sentry_t *sen;
-	npf_session_t *se;
 	int flags;
 
-	/*
-	 * Check if session tracking is on.  Also, if layer 3 and 4 are not
-	 * cached - protocol is not supported or packet is invalid.
-	 */
-	if (sess_tracking == SESS_TRACKING_OFF) {
-		return NULL;
-	}
-	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
-		return NULL;
-	}
-
-	/*
-	 * Construct a key for hash and tree lookup.  Execute ALG session
-	 * helpers, which may construct a custom key.
-	 */
-	npf_cache_t algkey = { .npc_info = 0 }, *key;
-	npf_sentry_t senkey;
-
-	if (!npf_alg_sessionid(npc, nbuf, &algkey)) {
-		/* Default: use the cache data of original packet. */
-		key = npc;
-	} else {
-		/* Unique IDs filled by ALG in a separate key cache. */
-		key = &algkey;
-	}
-
-	/* Note: take protocol from the key. */
-	const u_int proto = npf_cache_ipproto(key);
-
 	switch (proto) {
 	case IPPROTO_TCP: {
-		const struct tcphdr *th = &key->npc_l4.tcp;
+		const struct tcphdr *th = npc->npc_l4.tcp;
 		senkey.se_src_id = th->th_sport;
 		senkey.se_dst_id = th->th_dport;
 		break;
 	}
 	case IPPROTO_UDP: {
-		const struct udphdr *uh = &key->npc_l4.udp;
+		const struct udphdr *uh = npc->npc_l4.udp;
 		senkey.se_src_id = uh->uh_sport;
 		senkey.se_dst_id = uh->uh_dport;
 		break;
 	}
 	case IPPROTO_ICMP:
-		if (npf_iscached(key, NPC_ICMP_ID)) {
-			const struct icmp *ic = &key->npc_l4.icmp;
+		if (npf_iscached(npc, NPC_ICMP_ID)) {
+			const struct icmp *ic = npc->npc_l4.icmp;
 			senkey.se_src_id = ic->icmp_id;
 			senkey.se_dst_id = ic->icmp_id;
 			break;
 		}
 		return NULL;
 	case IPPROTO_ICMPV6:
-		if (npf_iscached(key, NPC_ICMP_ID)) {
-			const struct icmp6_hdr *ic6 = &key->npc_l4.icmp6;
+		if (npf_iscached(npc, NPC_ICMP_ID)) {
+			const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
 			senkey.se_src_id = ic6->icmp6_id;
 			senkey.se_dst_id = ic6->icmp6_id;
 			break;
@@ -529,10 +502,10 @@
 		return NULL;
 	}
 
-	KASSERT(key->npc_srcip && key->npc_dstip && key->npc_alen > 0);
-	memcpy(&senkey.se_src_addr, key->npc_srcip, key->npc_alen);
-	memcpy(&senkey.se_dst_addr, key->npc_dstip, key->npc_alen);
-	senkey.se_alen = key->npc_alen;
+	KASSERT(npc->npc_srcip && npc->npc_dstip && npc->npc_alen > 0);
+	memcpy(&senkey.se_src_addr, npc->npc_srcip, npc->npc_alen);
+	memcpy(&senkey.se_dst_addr, npc->npc_dstip, npc->npc_alen);
+	senkey.se_alen = npc->npc_alen;
 
 	/*
 	 * Note: this is a special case where we use common ID pointer
@@ -577,19 +550,63 @@
 		rw_exit(&sh->sh_lock);
 		return NULL;
 	}
+	*forw = sforw;
+
+	/* Update the last activity time, hold a reference and unlock. */
+	getnanouptime(&se->s_atime);
+	atomic_inc_uint(&se->s_refcnt);
+	rw_exit(&sh->sh_lock);
+	return se;
+}
+
+/*
+ * npf_session_inspect: lookup a session inspecting the protocol data.
+ *
+ * => If found, we will hold a reference for the caller.
+ */
+npf_session_t *
+npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di, int *error)
+{
+	npf_session_t *se;
+	bool forw;
+
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
+	/*
+	 * Check if session tracking is on.  Also, if layer 3 and 4 are not
+	 * cached - protocol is not supported or packet is invalid.
+	 */
+	if (sess_tracking == SESS_TRACKING_OFF) {
+		return NULL;
+	}
+	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
+		return NULL;
+	}
+
+	/* Query ALG which may lookup session for us. */
+	if ((se = npf_alg_session(npc, nbuf, di)) != NULL) {
+		/* Note: reference is held. */
+		return se;
+	}
+	if (nbuf_head_mbuf(nbuf) == NULL) {
+		*error = ENOMEM;
+		return NULL;
+	}
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
+	/* Main lookup of the session. */
+	if ((se = npf_session_lookup(npc, nbuf, di, &forw)) == NULL) {
+		return NULL;
+	}
 
 	/* Inspect the protocol data and handle state changes. */
-	if (npf_state_inspect(npc, nbuf, &se->s_state, sforw)) {
-		/* Update the last activity time and hold a reference. */
-		getnanouptime(&se->s_atime);
-		atomic_inc_uint(&se->s_refcnt);
-	} else {
+	if (!npf_state_inspect(npc, nbuf, &se->s_state, forw)) {
 		/* Silently block invalid packets. */
+		npf_session_release(se);
 		npf_stats_inc(NPF_STAT_INVALID_STATE);
 		*error = ENETUNREACH;
 		se = NULL;
 	}
-	rw_exit(&sh->sh_lock);
 	return se;
 }
 
@@ -600,9 +617,9 @@
  * => Session will be activated on the first reference release.
  */
 npf_session_t *
-npf_session_establish(const npf_cache_t *npc, nbuf_t *nbuf,
-    const ifnet_t *ifp, const int di)
+npf_session_establish(npf_cache_t *npc, nbuf_t *nbuf, const int di)
 {
+	const ifnet_t *ifp = nbuf->nb_ifp;
 	const struct tcphdr *th;
 	const struct udphdr *uh;
 	npf_sentry_t *fw, *bk;
@@ -611,6 +628,8 @@
 	u_int proto, alen;
 	bool ok;
 
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
 	/*
 	 * Check if session tracking is on.  Also, if layer 3 and 4 are not
 	 * cached - protocol is not supported or packet is invalid.
@@ -658,7 +677,7 @@
 	switch (proto) {
 	case IPPROTO_TCP:
 		KASSERT(npf_iscached(npc, NPC_TCP));
-		th = &npc->npc_l4.tcp;
+		th = npc->npc_l4.tcp;
 		/* Additional IDs: ports. */
 		fw->se_src_id = th->th_sport;
 		fw->se_dst_id = th->th_dport;
@@ -666,14 +685,14 @@
 	case IPPROTO_UDP:
 		KASSERT(npf_iscached(npc, NPC_UDP));
 		/* Additional IDs: ports. */
-		uh = &npc->npc_l4.udp;
+		uh = npc->npc_l4.udp;
 		fw->se_src_id = uh->uh_sport;
 		fw->se_dst_id = uh->uh_dport;
 		break;
 	case IPPROTO_ICMP:
 		if (npf_iscached(npc, NPC_ICMP_ID)) {
 			/* ICMP query ID. */
-			const struct icmp *ic = &npc->npc_l4.icmp;
+			const struct icmp *ic = npc->npc_l4.icmp;
 			fw->se_src_id = ic->icmp_id;
 			fw->se_dst_id = ic->icmp_id;
 			break;
@@ -683,7 +702,7 @@
 	case IPPROTO_ICMPV6:
 		if (npf_iscached(npc, NPC_ICMP_ID)) {
 			/* ICMP query ID. */
-			const struct icmp6_hdr *ic6 = &npc->npc_l4.icmp6;
+			const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
 			fw->se_src_id = ic6->icmp6_id;
 			fw->se_dst_id = ic6->icmp6_id;
 			break;
@@ -1215,10 +1234,10 @@
 			timespecsub(&tsnow, &se->s_atime, &tsdiff);
 			etime = npf_state_etime(&se->s_state, proto);
 
-			printf("    %p[%p]: %s proto %d flags 0x%x tsdiff %d "
-			    "etime %d\n", sen, se, sen == &se->s_forw_entry ?
-			    "forw" : "back", proto, se->s_flags,
-			    (int)tsdiff.tv_sec, etime);
+			printf("    %p[%p]:\n\t%s proto %d flags 0x%x "
+			    "tsdiff %d etime %d\n", sen, se,
+			    sen == &se->s_forw_entry ? "forw" : "back",
+			    proto, se->s_flags, (int)tsdiff.tv_sec, etime);
 			memcpy(&ip, &sen->se_src_addr, sizeof(ip));
 			printf("\tsrc (%s, %d) ",
 			    inet_ntoa(ip), ntohs(sen->se_src_id));
--- a/sys/net/npf/npf_state.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_state.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state.c,v 1.6.4.6 2012/11/18 21:45:08 riz Exp $	*/
+/*	$NetBSD: npf_state.c,v 1.6.4.7 2013/02/08 19:18:11 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.6.4.6 2012/11/18 21:45:08 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.6.4.7 2013/02/08 19:18:11 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -46,7 +46,7 @@
 /*
  * Generic session states and timeout table.
  *
- * Note: used for connnection-less protocols.
+ * Note: used for connection-less protocols.
  */
 
 #define	NPF_ANY_SESSION_CLOSED		0
@@ -92,7 +92,7 @@
  * success and false otherwise (e.g. if protocol is not supported).
  */
 bool
-npf_state_init(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst)
+npf_state_init(npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst)
 {
 	const int proto = npf_cache_ipproto(npc);
 	bool ret;
@@ -136,7 +136,7 @@
  * the packet belongs to the tracked connection) and false otherwise.
  */
 bool
-npf_state_inspect(const npf_cache_t *npc, nbuf_t *nbuf,
+npf_state_inspect(npf_cache_t *npc, nbuf_t *nbuf,
     npf_state_t *nst, const bool forw)
 {
 	const int proto = npf_cache_ipproto(npc);
--- a/sys/net/npf/npf_state_tcp.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/net/npf/npf_state_tcp.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state_tcp.c,v 1.3.2.6 2012/11/24 04:34:42 riz Exp $	*/
+/*	$NetBSD: npf_state_tcp.c,v 1.3.2.7 2013/02/08 19:18:10 riz Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_state_tcp.c,v 1.3.2.6 2012/11/24 04:34:42 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_state_tcp.c,v 1.3.2.7 2013/02/08 19:18:10 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -296,10 +296,9 @@
  * and thus part of the connection we are tracking.
  */
 static bool
-npf_tcp_inwindow(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
-    const int di)
+npf_tcp_inwindow(npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst, const int di)
 {
-	const struct tcphdr * const th = &npc->npc_l4.tcp;
+	const struct tcphdr * const th = npc->npc_l4.tcp;
 	const int tcpfl = th->th_flags;
 	npf_tcpstate_t *fstate, *tstate;
 	int tcpdlen, ackskew;
@@ -462,9 +461,9 @@
  * the connection and track its state.
  */
 bool
-npf_state_tcp(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst, int di)
+npf_state_tcp(npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst, int di)
 {
-	const struct tcphdr * const th = &npc->npc_l4.tcp;
+	const struct tcphdr * const th = npc->npc_l4.tcp;
 	const int tcpfl = th->th_flags, state = nst->nst_state;
 	int nstate;
 
--- a/sys/sys/mbuf.h	Sat Feb 02 15:44:21 2013 +0000
+++ b/sys/sys/mbuf.h	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: mbuf.h,v 1.148 2011/11/21 04:36:05 christos Exp $	*/
+/*	$NetBSD: mbuf.h,v 1.148.4.1 2013/02/08 19:18:12 riz Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
@@ -857,6 +857,8 @@
 char *	m_mapin(struct mbuf *);
 void	m_move_pkthdr(struct mbuf *to, struct mbuf *from);
 
+bool	m_ensure_contig(struct mbuf **, int);
+
 /* Inline routines. */
 static __inline u_int m_length(const struct mbuf *) __unused;
 
--- a/usr.sbin/npf/npfctl/npfctl.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npfctl.c,v 1.10.2.12 2013/01/07 18:41:37 riz Exp $	*/
+/*	$NetBSD: npfctl.c,v 1.10.2.13 2013/02/08 19:18:09 riz Exp $	*/
 
 /*-
  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: npfctl.c,v 1.10.2.12 2013/01/07 18:41:37 riz Exp $");
+__RCSID("$NetBSD: npfctl.c,v 1.10.2.13 2013/02/08 19:18:09 riz Exp $");
 
 #include <sys/ioctl.h>
 #include <sys/stat.h>
@@ -147,6 +147,10 @@
 		{ NPF_STAT_NAT_CREATE,		"NAT entry allocations"	},
 		{ NPF_STAT_NAT_DESTROY,		"NAT entry destructions"},
 
+		{ -1, "Network buffers"					},
+		{ NPF_STAT_NBUF_NONCONTIG,	"non-contiguous cases"	},
+		{ NPF_STAT_NBUF_CONTIG_FAIL,	"contig alloc failures"	},
+
 		{ -1, "Invalid packet state cases"			},
 		{ NPF_STAT_INVALID_STATE,	"cases in total"	},
 		{ NPF_STAT_INVALID_STATE_TCP1,	"TCP case I"		},
--- a/usr.sbin/npf/npftest/README	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/README	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-$NetBSD: README,v 1.1.2.3 2012/11/18 21:45:08 riz Exp $
+$NetBSD: README,v 1.1.2.4 2013/02/08 19:18:12 riz Exp $
 
 npftest - a tool for regression testing and debugging NPF.
 It uses RUMP framework to run NPF kernel module in the userspace.
@@ -25,5 +25,5 @@
 
 cd src/sys/rump/net/lib/libnpf
 make distclean
-MKDEBUG=yes MKDEBUGLIB=yes DBG=-g make -j8
-sudo MKDEBUG=yes MKDEBUGLIB=yes DBG=-g make install
+MKDEBUG=yes MKDEBUGLIB=yes DBG="-g -O2" make -j8
+sudo MKDEBUG=yes MKDEBUGLIB=yes DBG="-g -O2" make install
--- a/usr.sbin/npf/npftest/libnpftest/npf_mbuf_subr.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_mbuf_subr.c	Fri Feb 08 19:18:09 2013 +0000
@@ -22,6 +22,7 @@
 	assert(m != NULL);
 	dst = mtod(m, void *);
 	memcpy(dst, data, len);
+	m->m_pkthdr.len = len;
 	m->m_len = len;
 	return m;
 }
@@ -35,6 +36,7 @@
 	m0 = m_gethdr(M_WAITOK, MT_HEADER);
 	ethdr = mtod(m0, struct ether_header *);
 	ethdr->ether_type = htons(ETHERTYPE_IP);
+	m0->m_pkthdr.len = sizeof(struct ether_header);
 	m0->m_len = sizeof(struct ether_header);
 
 	m1 = mbuf_construct(proto);
@@ -87,6 +89,7 @@
 	size += mbuf_fill_proto(proto, l4data);
 	iphdr->ip_len = htons(size);
 
+	m->m_pkthdr.len = size;
 	m->m_len = size;
 	m->m_next = NULL;
 	return m;
@@ -112,6 +115,7 @@
 	size += mbuf_fill_proto(proto, l4data);
 	ip6->ip6_plen = htons(size);
 
+	m->m_pkthdr.len = size;
 	m->m_len = size;
 	m->m_next = NULL;
 	return m;
@@ -138,10 +142,11 @@
 	struct ip *iphdr = mtod(m, struct ip *);
 	const size_t hlen = iphdr->ip_hl << 2;
 	struct icmp *ic = (struct icmp *)((uint8_t *)iphdr + hlen);
-	const size_t addlen = m_orig->m_len;
+	const size_t addlen = m_length(m_orig);
 
 	iphdr->ip_len = htons(ntohs(iphdr->ip_len) + addlen);
 	memcpy(&ic->icmp_ip, mtod(m_orig, struct ip *), addlen);
+	m->m_pkthdr.len += addlen;
 	m->m_len += addlen;
 	m_freem(m_orig);
 }
--- a/usr.sbin/npf/npftest/libnpftest/npf_nat_test.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_nat_test.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat_test.c,v 1.1.2.2 2012/08/13 17:49:53 riz Exp $	*/
+/*	$NetBSD: npf_nat_test.c,v 1.1.2.3 2013/02/08 19:18:12 riz Exp $	*/
 
 /*
  * NPF NAT test.
@@ -126,10 +126,11 @@
 }
 
 static bool
-checkresult(bool verbose, unsigned i, struct mbuf *m, int error)
+checkresult(bool verbose, unsigned i, struct mbuf *m, ifnet_t *ifp, int error)
 {
 	const struct test_case *t = &test_cases[i];
 	npf_cache_t npc = { .npc_info = 0 };
+	nbuf_t nbuf;
 
 	if (verbose) {
 		printf("packet %d (expected %d ret %d)\n", i+1, t->ret, error);
@@ -137,13 +138,15 @@
 	if (error) {
 		return error == t->ret;
 	}
-	if (!npf_cache_all(&npc, m)) {
+
+	nbuf_init(&nbuf, m, ifp);
+	if (!npf_cache_all(&npc, &nbuf)) {
 		printf("error: could not fetch the packet data");
 		return false;
 	}
 
-	const struct ip *ip = &npc.npc_ip.v4;
-	const struct udphdr *uh = &npc.npc_l4.udp;
+	const struct ip *ip = npc.npc_ip.v4;
+	const struct udphdr *uh = npc.npc_l4.udp;
 
 	if (verbose) {
 		printf("\tpost-translation: src %s (%d)",
@@ -198,7 +201,7 @@
 			return false;
 		}
 		error = npf_packet_handler(NULL, &m, ifp, t->di);
-		ret = checkresult(verbose, i, m, error);
+		ret = checkresult(verbose, i, m, ifp, error);
 		if (m) {
 			m_freem(m);
 		}
--- a/usr.sbin/npf/npftest/libnpftest/npf_nbuf_test.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_nbuf_test.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nbuf_test.c,v 1.1.4.3 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_nbuf_test.c,v 1.1.4.4 2013/02/08 19:18:11 riz Exp $	*/
 
 /*
  * NPF nbuf interface test.
@@ -16,33 +16,57 @@
 
 CTASSERT((MBUF_CHAIN_LEN % sizeof(uint32_t)) == 0);
 
-static char *
-parse_nbuf_chain(void *nbuf, void *n_ptr)
+static void
+mbuf_consistency_check(nbuf_t *nbuf)
 {
+	struct mbuf *m = nbuf_head_mbuf(nbuf);
+
+	while (m) {
+		assert(m->m_type != MT_FREE);
+		m = m->m_next;
+	}
+}
+
+static char *
+parse_nbuf_chain(struct mbuf *m)
+{
+	const void *dummy_ifp = (void *)0xdeadbeef;
 	char *s = kmem_zalloc(MBUF_CHAIN_LEN + 1, KM_SLEEP);
-	int n, error;
+	nbuf_t nbuf;
+	void *nptr;
+	int n;
+
+	nbuf_init(&nbuf, m, dummy_ifp);
+
+	nptr = nbuf_advance(&nbuf, (random() % 16) + 1, (random() % 16) + 1);
+	mbuf_consistency_check(&nbuf);
+	assert(nptr != NULL);
+	nbuf_reset(&nbuf);
 
 	for (n = 0; ; ) {
 		char d[4 + 1];
 
-		error = nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint32_t), d);
-		if (error) {
-			return NULL;
+		nptr = nbuf_ensure_contig(&nbuf, sizeof(uint32_t));
+		if (nptr == NULL) {
+			break;
 		}
+		mbuf_consistency_check(&nbuf);
+		memcpy(&d, nptr, sizeof(uint32_t));
+
 		d[sizeof(d) - 1] = '\0';
 		strcat(s, d);
 
 		if (n + sizeof(uint32_t) == MBUF_CHAIN_LEN) {
-			assert(nbuf_advance(&nbuf, n_ptr,
-			    sizeof(uint32_t) - 1));
+			assert(nbuf_advance(&nbuf, sizeof(uint32_t) - 1, 0));
+			assert(!nbuf_advance(&nbuf, 1, 0));
 			break;
 		}
-		n_ptr = nbuf_advance(&nbuf, n_ptr, sizeof(uint32_t));
-		if (n_ptr == NULL) {
-			return NULL;
+		if (!nbuf_advance(&nbuf, sizeof(uint32_t), 0)) {
+			break;
 		}
 		n += sizeof(uint32_t);
 	}
+	mbuf_consistency_check(&nbuf);
 	return s;
 }
 
@@ -68,8 +92,9 @@
 {
 	struct mbuf *m;
 
-	m = kmem_zalloc(sizeof(struct mbuf) + off + len, KM_SLEEP);
-	m->m_data = (char *)m + sizeof(struct mbuf) + off;
+	KASSERT(off + len < MLEN);
+	m = m_get(M_WAITOK, MT_DATA);
+	m->m_data = (char *)m->m_data + off;
 	m->m_len = len;
 	return m;
 }
@@ -90,12 +115,17 @@
 
 		/* Fill data with letters from 'a' to 'z'. */
 		memset(m0->m_data, 'a' + n, 1);
-		n = ('a' + n) != 'z' ? n + 1 : 0;
+		n = ('a' + n) < 'z' ? n + 1 : 0;
 
 		/* Next mbuf.. */
 		m0->m_next = m;
 		m = m0;
 	}
+
+	m0 = m_gethdr(M_WAITOK, MT_HEADER);
+	m0->m_pkthdr.len = clen;
+	m0->m_len = 0;
+	m0->m_next = m;
 	return m0;
 }
 
@@ -127,26 +157,30 @@
 		d = m0->m_data;
 		while (len--) {
 			*d++ = ('a' + n);
-			n = ('a' + n) != 'z' ? n + 1 : 0;
+			n = ('a' + n) < 'z' ? n + 1 : 0;
 		}
 
 		/* Next mbuf.. */
 		m0->m_next = m;
 		m = m0;
 	}
-	assert(tlen == chain_len);
+	KASSERT(tlen == chain_len);
+
+	m0 = m_gethdr(M_WAITOK, MT_HEADER);
+	m0->m_pkthdr.len = tlen;
+	m0->m_next = m;
+	m0->m_len = 0;
 	return m0;
 }
 
 static bool
-validate_mbuf_data(struct mbuf *m, bool verbose, char *bufa, char *bufb)
+validate_mbuf_data(bool verbose, char *bufa, char *bufb)
 {
 	bool ret = (strcmp(bufa, bufb) == 0);
 
 	if (verbose) {
 		printf("Buffer A: %s\nBuffer B: %s\n", bufa, bufb);
 	}
-	/* XXX free m */
 	kmem_free(bufa, MBUF_CHAIN_LEN + 1);
 	kmem_free(bufb, MBUF_CHAIN_LEN + 1);
 	return ret;
@@ -161,13 +195,13 @@
 
 	m1 = mbuf_random_len(MBUF_CHAIN_LEN);
 	bufa = mbuf_getstring(m1);
-	bufb = parse_nbuf_chain(m1, m1->m_data);
-	fail |= !validate_mbuf_data(m1, verbose, bufa, bufb);
+	bufb = parse_nbuf_chain(m1);
+	fail |= !validate_mbuf_data(verbose, bufa, bufb);
 
 	m2 = mbuf_bytesize(MBUF_CHAIN_LEN);
 	bufa = mbuf_getstring(m2);
-	bufb = parse_nbuf_chain(m2, m2->m_data);
-	fail |= !validate_mbuf_data(m2, verbose, bufa, bufb);
+	bufb = parse_nbuf_chain(m2);
+	fail |= !validate_mbuf_data(verbose, bufa, bufb);
 
 	return !fail;
 }
--- a/usr.sbin/npf/npftest/libnpftest/npf_processor_test.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_processor_test.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_processor_test.c,v 1.1.4.4 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_processor_test.c,v 1.1.4.5 2013/02/08 19:18:12 riz Exp $	*/
 
 /*
  * NPF n-code processor test.
@@ -141,36 +141,49 @@
 	return fail;
 }
 
+static void
+npf_nc_cachetest(struct mbuf *m, npf_cache_t *npc, nbuf_t *nbuf)
+{
+	const void *dummy_ifp = (void *)0xdeadbeef;
+
+	nbuf_init(nbuf, m, dummy_ifp);
+	memset(npc, 0, sizeof(npf_cache_t));
+	npf_cache_all(npc, nbuf);
+}
+
 bool
 npf_processor_test(bool verbose)
 {
 	npf_cache_t npc;
 	struct mbuf *m;
+	nbuf_t nbuf;
 	int errat, ret;
 	bool fail = false;
 
+#if 0
 	/* Layer 2 (Ethernet + IP + TCP). */
-	m = fill_packet(IPPROTO_TCP, true);
 	ret = npf_ncode_validate(nc_match, sizeof(nc_match), &errat);
 	fail |= retcode_fail_p("Ether validation", verbose, ret, 0);
 
-	memset(&npc, 0, sizeof(npf_cache_t));
-	ret = npf_ncode_process(&npc, nc_match, m, NPF_LAYER_2);
+	m = fill_packet(IPPROTO_TCP, true);
+	npf_nc_cachetest(m, &npc, &nbuf);
+	ret = npf_ncode_process(&npc, nc_match, &nbuf, NPF_LAYER_2);
 	fail |= retcode_fail_p("Ether", verbose, ret, 0);
 	m_freem(m);
+#endif
 
 	/* Layer 3 (IP + TCP). */
 	m = fill_packet(IPPROTO_TCP, false);
-	memset(&npc, 0, sizeof(npf_cache_t));
-	ret = npf_ncode_process(&npc, nc_match, m, NPF_LAYER_3);
+	npf_nc_cachetest(m, &npc, &nbuf);
+	ret = npf_ncode_process(&npc, nc_match, &nbuf, NPF_LAYER_3);
 	fail |= retcode_fail_p("IPv4 mask 1", verbose, ret, 0);
 
 	/* Non-matching IPv4 case. */
 	ret = npf_ncode_validate(nc_nmatch, sizeof(nc_nmatch), &errat);
 	fail |= retcode_fail_p("IPv4 mask 2 validation", verbose, ret, 0);
 
-	memset(&npc, 0, sizeof(npf_cache_t));
-	ret = npf_ncode_process(&npc, nc_nmatch, m, NPF_LAYER_3);
+	npf_nc_cachetest(m, &npc, &nbuf);
+	ret = npf_ncode_process(&npc, nc_nmatch, &nbuf, NPF_LAYER_3);
 	fail |= retcode_fail_p("IPv4 mask 2", verbose, ret, 255);
 
 	/* Invalid n-code case. */
@@ -181,8 +194,8 @@
 	ret = npf_ncode_validate(nc_rmatch, sizeof(nc_rmatch), &errat);
 	fail |= retcode_fail_p("RISC-like n-code validation", verbose, ret, 0);
 
-	memset(&npc, 0, sizeof(npf_cache_t));
-	ret = npf_ncode_process(&npc, nc_rmatch, m, NPF_LAYER_3);
+	npf_nc_cachetest(m, &npc, &nbuf);
+	ret = npf_ncode_process(&npc, nc_rmatch, &nbuf, NPF_LAYER_3);
 	fail |= retcode_fail_p("RISC-like n-code", verbose, ret, 1);
 	m_freem(m);
 
@@ -191,8 +204,8 @@
 	fail |= retcode_fail_p("IPv6 mask validation", verbose, ret, 0);
 
 	m = fill_packet6(IPPROTO_TCP);
-	memset(&npc, 0, sizeof(npf_cache_t));
-	ret = npf_ncode_process(&npc, nc_match6, m, NPF_LAYER_3);
+	npf_nc_cachetest(m, &npc, &nbuf);
+	ret = npf_ncode_process(&npc, nc_match6, &nbuf, NPF_LAYER_3);
 	fail |= retcode_fail_p("IPv6 mask", verbose, ret, 0);
 	m_freem(m);
 
--- a/usr.sbin/npf/npftest/libnpftest/npf_rule_test.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_rule_test.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_rule_test.c,v 1.1.2.3 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_rule_test.c,v 1.1.2.4 2013/02/08 19:18:12 riz Exp $	*/
 
 /*
  * NPF ruleset test.
@@ -78,17 +78,21 @@
 npf_rule_raw_test(bool verbose, struct mbuf *m, ifnet_t *ifp, int di)
 {
 	npf_cache_t npc = { .npc_info = 0 };
+	nbuf_t nbuf;
 	npf_rule_t *rl;
 	int retfl, error;
 
+	nbuf_init(&nbuf, m, ifp);
+	npf_cache_all(&npc, &nbuf);
+
 	npf_core_enter();
-	rl = npf_ruleset_inspect(&npc, m, npf_core_ruleset(),
-	    ifp, di, NPF_LAYER_3);
+	rl = npf_ruleset_inspect(&npc, &nbuf, npf_core_ruleset(),
+	    di, NPF_LAYER_3);
 	if (rl) {
 		if (verbose) {
 			npf_rulenc_dump(rl);
 		}
-		error = npf_rule_apply(&npc, m, rl, &retfl);
+		error = npf_rule_apply(&npc, &nbuf, rl, &retfl);
 	} else {
 		npf_core_exit();
 		error = ENOENT;
--- a/usr.sbin/npf/npftest/libnpftest/npf_state_test.c	Sat Feb 02 15:44:21 2013 +0000
+++ b/usr.sbin/npf/npftest/libnpftest/npf_state_test.c	Fri Feb 08 19:18:09 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state_test.c,v 1.1.2.4 2012/11/18 21:48:56 riz Exp $	*/
+/*	$NetBSD: npf_state_test.c,v 1.1.2.5 2013/02/08 19:18:12 riz Exp $	*/
 
 /*
  * NPF state tracking test.
@@ -135,7 +135,7 @@
 {
 	const tcp_meta_t *p = &packet_sequence[i];
 	npf_cache_t npc = { .npc_info = 0 };
-	nbuf_t *nbuf;
+	nbuf_t nbuf;
 	int ret;
 
 	if (p->flags == 0) {
@@ -144,17 +144,18 @@
 		return true;
 	}
 
-	nbuf = (nbuf_t *)construct_packet(p);
-	ret = npf_cache_all(&npc, nbuf);
+	const void *dummy_ifp = (void *)0xdeadbeef;
+	nbuf_init(&nbuf, construct_packet(p), dummy_ifp);
+	ret = npf_cache_all(&npc, &nbuf);
 	KASSERT((ret & NPC_IPFRAG) == 0);
 
 	if (*snew) {
-		ret = npf_state_init(&npc, nbuf, nst);
+		ret = npf_state_init(&npc, &nbuf, nst);
 		KASSERT(ret == true);
 		*snew = false;
 	}
-	ret = npf_state_inspect(&npc, nbuf, nst, p->flags == OUT);
-	m_freem(nbuf);
+	ret = npf_state_inspect(&npc, &nbuf, nst, p->flags == OUT);
+	m_freem(nbuf.nb_mbuf);
 
 	return ret ? true : (p->flags & ERR) != 0;
 }