NPF checkpoint: trunk
authorrmind <rmind@NetBSD.org>
Thu, 11 Nov 2010 06:30:39 +0000
branchtrunk
changeset 194519 6c6ec574a71d
parent 194518 d833430a00dd
child 194520 8b9362902b26
NPF checkpoint: - Add proper TCP state tracking as described in Guido van Rooij paper, plus handle TCP Window Scaling option. - Completely rework npf_cache_t, reduce granularity, simplify code. - Add npf_addr_t as an abstraction, amend session handling code, as well as NAT code et al, to use it. Now design is prepared for IPv6 support. - Handle IPv4 fragments i.e. perform packet reassembly. - Add support for IPv4 ID randomization and minimum TTL enforcement. - Add support for TCP MSS "clamping". - Random bits for IPv6. Various fixes and clean-up.
sys/modules/npf/Makefile
sys/net/npf/files.npf
sys/net/npf/npf.h
sys/net/npf/npf_alg.c
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_ctl.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_instr.c
sys/net/npf/npf_mbuf.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_ncode.h
sys/net/npf/npf_processor.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_sendpkt.c
sys/net/npf/npf_session.c
sys/net/npf/npf_state.c
sys/net/npf/npf_tableset.c
usr.sbin/npf/npfctl/npf_data.c
usr.sbin/npf/npfctl/npf_ncgen.c
usr.sbin/npf/npfctl/npf_parser.c
usr.sbin/npf/npfctl/npfctl.c
usr.sbin/npf/npfctl/npfctl.h
--- a/sys/modules/npf/Makefile	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/modules/npf/Makefile	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.2 2010/09/16 04:53:27 rmind Exp $
+# $NetBSD: Makefile,v 1.3 2010/11/11 06:30:39 rmind Exp $
 
 .include "../Makefile.inc"
 
@@ -8,6 +8,6 @@
 
 SRCS=		npf.c npf_ctl.c npf_handler.c npf_instr.c npf_mbuf.c
 SRCS+=		npf_processor.c npf_ruleset.c npf_tableset.c npf_inet.c
-SRCS+=		npf_session.c npf_nat.c npf_sendpkt.c npf_alg.c
+SRCS+=		npf_session.c npf_state.c npf_nat.c npf_alg.c npf_sendpkt.c
 
 .include <bsd.kmodule.mk>
--- a/sys/net/npf/files.npf	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/files.npf	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.2 2010/09/16 04:53:27 rmind Exp $
+# $NetBSD: files.npf,v 1.3 2010/11/11 06:30:39 rmind Exp $
 #
 # Public Domain.
 #
@@ -20,6 +20,7 @@
 file	net/npf/npf_tableset.c			npf
 file	net/npf/npf_inet.c			npf
 file	net/npf/npf_session.c			npf
+file	net/npf/npf_state.c			npf
 file	net/npf/npf_nat.c			npf
 file	net/npf/npf_alg.c			npf
 file	net/npf/npf_sendpkt.c			npf
--- a/sys/net/npf/npf.h	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf.h	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.h,v 1.3 2010/09/25 00:25:31 rmind Exp $	*/
+/*	$NetBSD: npf.h,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -66,36 +66,57 @@
 
 typedef void			nbuf_t;
 
+#if defined(_KERNEL) || defined(_NPF_TESTING)
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+
+/*
+ * Storage of address, both IPv4 and IPv6.
+ */
+typedef struct in6_addr		npf_addr_t;
+
 /*
  * Packet information cache.
  */
 
-#define	NPC_IP46	0x01	/* IPv4,6 packet with known protocol. */
-#define	NPC_IP6VER	0x02	/* If NPI_IP46, then: 0 - IPv4, 1 - IPv6. */
-#define	NPC_ADDRS	0x04	/* Known source and destination addresses. */
-#define	NPC_PORTS	0x08	/* Known ports (for TCP/UDP cases). */
-#define	NPC_ICMP	0x10	/* ICMP with known type and code. */
-#define	NPC_ICMP_ID	0x20	/* ICMP with query ID. */
+#define	NPC_IP4		0x01	/* Indicates fetched IPv4 header. */
+#define	NPC_IP6		0x02	/* Indicates IPv6 header. */
+#define	NPC_IPFRAG	0x04	/* IPv4 fragment. */
+#define	NPC_LAYER4	0x08	/* Layer 4 has been fetched. */
 
-/* XXX: Optimise later, pack in unions, perhaps bitfields, etc. */
+#define	NPC_TCP		0x10	/* TCP header. */
+#define	NPC_UDP		0x20	/* UDP header. */
+#define	NPC_ICMP	0x40	/* ICMP header. */
+#define	NPC_ICMP_ID	0x80	/* ICMP with query ID. */
+
+#define	NPC_IP46	(NPC_IP4|NPC_IP6)
+
 typedef struct {
+	/* Information flags and packet direction. */
 	uint32_t		npc_info;
-	int			npc_dir;
-	/* NPC_IP46 */
-	uint8_t			npc_proto;
-	uint16_t		npc_hlen;
-	uint16_t		npc_ipsum;
-	/* NPC_ADDRS */
-	in_addr_t		npc_srcip;
-	in_addr_t		npc_dstip;
-	/* NPC_PORTS */
-	in_port_t		npc_sport;
-	in_port_t		npc_dport;
-	uint8_t			npc_tcp_flags;
-	/* NPC_ICMP */
-	uint8_t			npc_icmp_type;
-	uint8_t			npc_icmp_code;
-	uint16_t		npc_icmp_id;
+	int			npc_di;
+	/* Pointers to the IP v4/v6 addresses. */
+	npf_addr_t *		npc_srcip;
+	npf_addr_t *		npc_dstip;
+	/* Size (v4 or v6) of IP addresses. */
+	int			npc_ipsz;
+	/* IPv4, IPv6. */
+	union {
+		struct ip	v4;
+		struct ip6_hdr	v6;
+	} npc_ip;
+	/* TCP, UDP, ICMP. */
+	union {
+		struct tcphdr	tcp;
+		struct udphdr	udp;
+		struct icmp	icmp;
+	} npc_l4;
 } npf_cache_t;
 
 static inline bool
@@ -105,12 +126,20 @@
 	return __predict_true((npc->npc_info & inf) != 0);
 }
 
-#if defined(_KERNEL) || defined(_NPF_TESTING)
+static inline int
+npf_cache_ipproto(const npf_cache_t *npc)
+{
+	const struct ip *ip = &npc->npc_ip.v4;
+
+	KASSERT(npf_iscached(npc, NPC_IP46));
+	return ip->ip_p;
+}
 
 /* Network buffer interface. */
 void *		nbuf_dataptr(void *);
 void *		nbuf_advance(nbuf_t **, void *, u_int);
 int		nbuf_advfetch(nbuf_t **, void **, u_int, size_t, void *);
+int		nbuf_advstore(nbuf_t **, void **, u_int, size_t, void *);
 int		nbuf_fetch_datum(nbuf_t *, void *, size_t, void *);
 int		nbuf_store_datum(nbuf_t *, void *, size_t, void *);
 
@@ -118,30 +147,31 @@
 int		nbuf_find_tag(nbuf_t *, uint32_t, void **);
 
 /* Ruleset interface. */
-npf_rule_t *	npf_rule_alloc(int, pri_t, int, void *, size_t);
+npf_rule_t *	npf_rule_alloc(int, pri_t, int, void *, size_t, bool, int, int);
 void		npf_rule_free(npf_rule_t *);
 void		npf_activate_rule(npf_rule_t *);
 void		npf_deactivate_rule(npf_rule_t *);
 
 npf_hook_t *	npf_hook_register(npf_rule_t *,
-		    void (*)(const npf_cache_t *, void *), void *);
+		    void (*)(npf_cache_t *, nbuf_t *, void *), void *);
 void		npf_hook_unregister(npf_rule_t *, npf_hook_t *);
 
 #endif	/* _KERNEL */
 
 /* Rule attributes. */
 #define	NPF_RULE_PASS			0x0001
-#define	NPF_RULE_COUNT			0x0002
+#define	NPF_RULE_DEFAULT		0x0002
 #define	NPF_RULE_FINAL			0x0004
-#define	NPF_RULE_LOG			0x0008
-#define	NPF_RULE_DEFAULT		0x0010
-#define	NPF_RULE_KEEPSTATE		0x0020
+#define	NPF_RULE_KEEPSTATE		0x0008
+#define	NPF_RULE_COUNT			0x0010
+#define	NPF_RULE_LOG			0x0020
 #define	NPF_RULE_RETRST			0x0040
 #define	NPF_RULE_RETICMP		0x0080
+#define	NPF_RULE_NORMALIZE		0x0100
 
-#define	NPF_RULE_IN			0x1000
-#define	NPF_RULE_OUT			0x2000
-#define	NPF_RULE_DIMASK			0x3000
+#define	NPF_RULE_IN			0x10000000
+#define	NPF_RULE_OUT			0x20000000
+#define	NPF_RULE_DIMASK			(NPF_RULE_IN | NPF_RULE_OUT)
 
 /* Address translation types and flags. */
 #define	NPF_NATIN			1
--- a/sys/net/npf/npf_alg.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_alg.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_alg.c,v 1.2 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -31,16 +31,15 @@
 
 /*
  * NPF interface for application level gateways (ALGs).
+ *
+ * XXX: locking
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.2 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
-#endif
-
 #include <sys/kmem.h>
 #include <sys/pool.h>
 #include <net/pfil.h>
@@ -50,14 +49,14 @@
 /* NAT ALG structure for registration. */
 struct npf_alg {
 	LIST_ENTRY(npf_alg)		na_entry;
-	void *				na_ptr;
+	npf_alg_t *			na_bptr;
 	npf_algfunc_t			na_match_func;
 	npf_algfunc_t			na_out_func;
 	npf_algfunc_t			na_in_func;
 	npf_algfunc_t			na_seid_func;
 };
 
-static LIST_HEAD(, npf_alg)		nat_alg_list;
+static LIST_HEAD(, npf_alg)		nat_alg_list	__read_mostly;
 
 void
 npf_alg_sysinit(void)
@@ -85,7 +84,7 @@
 	npf_alg_t *alg;
 
 	alg = kmem_alloc(sizeof(npf_alg_t), KM_SLEEP);
-	alg->na_ptr = alg;
+	alg->na_bptr = alg;
 	alg->na_match_func = match;
 	alg->na_out_func = out;
 	alg->na_in_func = in;
@@ -114,7 +113,10 @@
 	return 0;
 }
 
-void
+/*
+ * npf_alg_match: call ALG matching inspectors, determine if any ALG matches.
+ */
+bool
 npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt)
 {
 	npf_alg_t *alg;
@@ -122,15 +124,15 @@
 
 	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
 		func = alg->na_match_func;
-		if (__predict_true(func != NULL)) {
-			func(npc, nbuf, nt);
-			return;
+		if (func && func(npc, nbuf, nt)) {
+			return true;
 		}
 	}
+	return false;
 }
 
 /*
- * npf_alg_exec: execute in/out inspection hooks of each ALG.
+ * npf_alg_exec: execute ALG hooks for translation.
  */
 void
 npf_alg_exec(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, const int di)
@@ -157,10 +159,7 @@
 
 	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
 		func = alg->na_seid_func;
-		if (__predict_true(func == NULL)) {
-			continue;
-		}
-		if (func(npc, nbuf, key)) {
+		if (func && func(npc, nbuf, (npf_nat_t *)key)) {
 			return true;
 		}
 	}
--- a/sys/net/npf/npf_alg_icmp.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.3 2010/09/25 00:25:31 rmind Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -33,13 +33,11 @@
  * NPF ALG for ICMP and traceroute translations.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
-#endif
 #include <sys/module.h>
 #include <sys/pool.h>
 
@@ -111,33 +109,37 @@
 }
 
 /*
- * npfa_icmp_match: ALG matching inspector, determines ALG case and
- * establishes a session for "backwards" stream.
+ * npfa_icmp_match: ALG matching inspector - determines ALG case and
+ * associates ALG with NAT entry.
  */
 static bool
 npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
 {
-	const int proto = npc->npc_proto;
-	void *n_ptr = nbuf_dataptr(nbuf);
-	u_int offby;
-	uint8_t ttl;
+	const int proto = npf_cache_ipproto(npc);
+	struct ip *ip = &npc->npc_ip.v4;
+	in_port_t dport;
+
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
+
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &npc->npc_l4.tcp;
+		dport = ntohs(th->th_dport);
+	} else if (proto == IPPROTO_UDP) {
+		struct udphdr *uh = &npc->npc_l4.udp;
+		dport = ntohs(uh->uh_dport);
+	} else {
+		return false;
+	}
 
 	/* Handle TCP/UDP traceroute - check for port range. */
-	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
-		return false;
-	}
-	KASSERT(npf_iscached(npc, NPC_PORTS));
-	in_port_t dport = ntohs(npc->npc_dport);
 	if (dport < TR_BASE_PORT || dport > TR_PORT_RANGE) {
 		return false;
 	}
 
 	/* Check for low TTL. */
-	offby = offsetof(struct ip, ip_ttl);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &ttl))
+	if (ip->ip_ttl > TR_MAX_TTL) {
 		return false;
-	if (ttl > TR_MAX_TTL)
-		return false;
+	}
 
 	/* Associate ALG with translation entry. */
 	npf_nat_t *nt = ntptr;
@@ -152,6 +154,7 @@
 static inline bool
 npf_icmp_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
 {
+	struct icmp *ic;
 	u_int offby;
 
 	/* Per RFC 792. */
@@ -167,17 +170,15 @@
 			return false;
 		}
 		/* Fetch into the cache. */
-		if (!npf_ip4_proto(npc, nbuf, n_ptr)) {
+		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
 			return false;
 		}
-		const int proto = npc->npc_proto;
-		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
-			return false;
-		}
-		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
-			return false;
-		}
-		if (!npf_fetch_ports(npc, nbuf, n_ptr, proto)) {
+		switch (npf_cache_ipproto(npc)) {
+		case IPPROTO_TCP:
+			return npf_fetch_tcp(npc, nbuf, n_ptr);
+		case IPPROTO_UDP:
+			return npf_fetch_udp(npc, nbuf, n_ptr);
+		default:
 			return false;
 		}
 		return true;
@@ -189,9 +190,10 @@
 	case ICMP_IREQ:
 	case ICMP_IREQREPLY:
 		/* Should contain ICMP query ID. */
+		ic = &npc->npc_l4.icmp;
 		offby = offsetof(struct icmp, icmp_id);
-		if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t),
-		    &npc->npc_icmp_id)) {
+		if (nbuf_advfetch(&nbuf, &n_ptr, offby,
+		    sizeof(uint16_t), &ic->icmp_id)) {
 			return false;
 		}
 		npc->npc_info |= NPC_ICMP_ID;
@@ -210,52 +212,48 @@
 npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
 {
 	npf_cache_t *key = keyptr;
-	void *n_ptr;
 
 	/* ICMP? Get unique identifiers from ICMP packet. */
-	if (npc->npc_proto != IPPROTO_ICMP) {
+	if (npf_cache_ipproto(npc) != IPPROTO_ICMP) {
 		return false;
 	}
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ICMP));
+	KASSERT(npf_iscached(npc, NPC_IP46));
+	KASSERT(npf_iscached(npc, NPC_ICMP));
 	key->npc_info = NPC_ICMP;
 
 	/* Advance to ICMP header. */
-	n_ptr = nbuf_dataptr(nbuf);
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen)) == NULL) {
+	struct ip *ip = &npc->npc_ip.v4;
+	void *n_ptr = nbuf_dataptr(nbuf);
+
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, ip->ip_hl << 2)) == NULL) {
 		return false;
 	}
 
-	/* Fetch into the separate (key) cache. */
-	if (!npf_icmp_uniqid(npc->npc_icmp_type, key, nbuf, n_ptr)) {
+	/* Fetch relevant data into the separate ("key") cache. */
+	struct icmp *ic = &npc->npc_l4.icmp;
+	if (!npf_icmp_uniqid(ic->icmp_type, key, nbuf, n_ptr)) {
 		return false;
 	}
 
 	if (npf_iscached(key, NPC_ICMP_ID)) {
-		/* Construct the key. */
-		key->npc_proto = npc->npc_proto;
-		key->npc_dir = npc->npc_dir;
-		/* Save IP addresses. */
-		key->npc_srcip = npc->npc_srcip;
-		key->npc_dstip = npc->npc_dstip;
-		key->npc_info |= NPC_IP46 | NPC_ADDRS | NPC_PORTS;
-		/* Fake ports with ICMP query IDs. */
-		key->npc_sport = key->npc_icmp_id;
-		key->npc_dport = key->npc_icmp_id;
-	} else {
-		in_addr_t addr;
-		in_port_t port;
-		/*
-		 * Embedded IP packet is the original of "forwards" stream.
-		 * We should imitate the "backwards" stream for inspection.
-		 */
-		KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
-		addr = key->npc_srcip;
-		port = key->npc_sport;
-		key->npc_srcip = key->npc_dstip;
-		key->npc_dstip = addr;
-		key->npc_sport = key->npc_dport;
-		key->npc_dport = port;
+		struct icmp *keyic = &key->npc_l4.icmp;
+
+		/* Copy ICMP ID to the cache and flag it. */
+		npc->npc_info |= NPC_ICMP_ID;
+		ic->icmp_id = keyic->icmp_id;
+
+		/* Note: return 'false', since key is the original cache. */
+		return false;
 	}
+
+	/*
+	 * Embedded IP packet is the original of "forwards" stream.
+	 * We should imitate the "backwards" stream for inspection.
+	 */
+	KASSERT(npf_iscached(key, NPC_IP46));
+	KASSERT(npf_iscached(key, NPC_LAYER4));
+	key->npc_di = (npc->npc_di == PFIL_IN) ? PFIL_OUT : PFIL_IN;
+
 	return true;
 }
 
@@ -266,61 +264,73 @@
 static bool
 npfa_icmp_natin(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
 {
-	void *n_ptr = nbuf_dataptr(nbuf);
 	npf_cache_t enpc;
-	u_int offby;
-	uint16_t cksum;
 
 	/* XXX: Duplicated work. */
 	if (!npfa_icmp_session(npc, nbuf, &enpc)) {
 		return false;
 	}
-	KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
+	KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_LAYER4));
+
+	const int proto = npf_cache_ipproto(&enpc);
+	void *n_ptr = nbuf_dataptr(nbuf);
+	void *cnbuf = nbuf, *cnptr = n_ptr;
+	struct icmp *ic = &npc->npc_l4.icmp;
+	uint16_t cksum = ic->icmp_cksum;
+	struct ip *ip = &enpc.npc_ip.v4;
+	uint16_t ecksum = ip->ip_sum, l4cksum;
 
-	/* Advance to ICMP checksum and fetch it. */
-	offby = npc->npc_hlen + offsetof(struct icmp, icmp_cksum);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t), &cksum)) {
+	/* Save TCP/UDP checksum for update. */
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &enpc.npc_l4.tcp;
+		l4cksum = th->th_sum;
+	} else {
+		struct udphdr *uh = &enpc.npc_l4.udp;
+		l4cksum = uh->uh_sum;
+	}
+
+	/* Advance to the original IP header, which is embedded after ICMP. */
+	u_int offby = offsetof(struct icmp, icmp_ip);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
 		return false;
 	}
 
-	/* Save the data for checksum update later. */
-	void *cnbuf = nbuf, *cnptr = n_ptr;
-	uint16_t ecksum = enpc.npc_ipsum;
+	npf_nat_t *nt = ntptr;
+	npf_addr_t *addr;
+	in_port_t port;
 
-	/* Advance to the original IP header, which is embedded after ICMP. */
-	offby = offsetof(struct icmp, icmp_ip) -
-	    offsetof(struct icmp, icmp_cksum);
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
-		return false;
-	}
+	npf_nat_getorig(nt, &addr, &port);
 
 	/*
 	 * Rewrite source IP address and port of the embedded IP header,
 	 * which represents original packet - therefore passing PFIL_OUT.
+	 * Note: checksum is first, since it uses values from the cache.
 	 */
-	npf_nat_t *nt = ntptr;
-	in_addr_t addr;
-	in_port_t port;
-
-	npf_nat_getorig(nt, &addr, &port);
-
+	if (!npf_rwrcksum(&enpc, nbuf, n_ptr, PFIL_OUT, addr, port)) {
+		return false;
+	}
 	if (!npf_rwrip(&enpc, nbuf, n_ptr, PFIL_OUT, addr)) {
 		return false;
 	}
-	if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port, addr)) {
+	if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port)) {
 		return false;
 	}
 
 	/*
-	 * Fixup and update ICMP checksum.
-	 * Note: npf_rwrip() has updated the IP checksum.
+	 * Calculate ICMP checksum.
 	 */
-	cksum = npf_fixup32_cksum(cksum, enpc.npc_srcip, addr);
-	cksum = npf_fixup16_cksum(cksum, enpc.npc_sport, port);
-	cksum = npf_fixup16_cksum(cksum, ecksum, enpc.npc_ipsum);
-	/* FIXME: Updated UDP/TCP checksum joins-in too., when != 0, sigh. */
-	if (nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum)){
-		return false;
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &enpc.npc_l4.tcp;
+		cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
+		cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum);
+	} else {
+		struct udphdr *uh = &enpc.npc_l4.udp;
+		cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
+		cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
 	}
-	return true;
+	cksum = npf_addr_cksum(cksum, enpc.npc_ipsz, enpc.npc_srcip, addr);
+	cksum = npf_fixup16_cksum(cksum, ecksum, ip->ip_sum);
+
+	/* Rewrite ICMP checksum. */
+	return nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum);
 }
--- a/sys/net/npf/npf_ctl.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_ctl.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_ctl.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -39,14 +39,12 @@
  * - Consider implementing 'sync' functionality.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
-#endif
 
 #include <prop/proplib.h>
 
@@ -190,8 +188,9 @@
 {
 	npf_rule_t *rl;
 	prop_object_t obj;
-	int attr, ifidx;
+	int attr, ifidx, minttl, maxmss;
 	pri_t pri;
+	bool rnd_ipid;
 	size_t nc_size;
 	void *nc;
 
@@ -211,6 +210,18 @@
 	obj = prop_dictionary_get(rldict, "interface");
 	ifidx = prop_number_integer_value(obj);
 
+	/* Randomize IP ID (bool). */
+	obj = prop_dictionary_get(rldict, "randomize-id");
+	rnd_ipid = prop_bool_true(obj);
+
+	/* Minimum IP TTL (integer). */
+	obj = prop_dictionary_get(rldict, "min-ttl");
+	minttl = prop_number_integer_value(obj);
+
+	/* Maximum TCP MSS (integer). */
+	obj = prop_dictionary_get(rldict, "max-mss");
+	maxmss = prop_number_integer_value(obj);
+
 	/* N-code (binary data). */
 	obj = prop_dictionary_get(rldict, "ncode");
 	if (obj) {
@@ -233,7 +244,8 @@
 	}
 
 	/* Allocate and setup NPF rule. */
-	rl = npf_rule_alloc(attr, pri, ifidx, nc, nc_size);
+	rl = npf_rule_alloc(attr, pri, ifidx, nc, nc_size,
+	    rnd_ipid, minttl, maxmss);
 	if (rl == NULL) {
 		if (nc) {
 			npf_ncode_free(nc, nc_size);	/* XXX */
@@ -328,7 +340,8 @@
 		prop_object_t obj;
 		npf_natpolicy_t *np;
 		npf_rule_t *rl;
-		in_addr_t taddr;
+		const npf_addr_t *taddr;
+		size_t taddr_sz;
 		in_port_t tport;
 		int type, flags;
 
@@ -347,12 +360,13 @@
 		flags = prop_number_integer_value(obj);
 
 		/* Translation IP. */
-		obj = prop_dictionary_get(natdict, "translation_ip");
-		taddr = (in_addr_t)prop_number_integer_value(obj);
+		obj = prop_dictionary_get(natdict, "translation-ip");
+		taddr_sz = prop_data_size(obj);
+		taddr = (const npf_addr_t *)prop_data_data_nocopy(obj);
 
 		/* Translation port (for redirect case). */
-		obj = prop_dictionary_get(natdict, "translation_port");
-		tport = (in_addr_t)prop_number_integer_value(obj);
+		obj = prop_dictionary_get(natdict, "translation-port");
+		tport = (in_port_t)prop_number_integer_value(obj);
 
 		/*
 		 * NAT policies are standard rules, plus additional
@@ -363,7 +377,7 @@
 			break;
 
 		/* Allocate a new NAT policy and assign to the rule. */
-		np = npf_nat_newpolicy(type, flags, taddr, tport);
+		np = npf_nat_newpolicy(type, flags, taddr, taddr_sz, tport);
 		if (np == NULL) {
 			error = ENOMEM;
 			break;
--- a/sys/net/npf/npf_handler.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_handler.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_handler.c,v 1.3 2010/10/10 15:29:01 rmind Exp $	*/
+/*	$NetBSD: npf_handler.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.3 2010/10/10 15:29:01 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -45,6 +45,10 @@
 #include <net/pfil.h>
 #include <sys/socketvar.h>
 
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+
 #include "npf_impl.h"
 
 /*
@@ -91,8 +95,28 @@
 	error = 0;
 	retfl = 0;
 
+	/* Cache everything.  Determine whether it is an IPv4 fragment. */
+	if (npf_cache_all(&npc, nbuf) && npf_iscached(&npc, NPC_IPFRAG)) {
+		struct ip *ip = nbuf_dataptr(*mp);
+		/*
+		 * Pass to IPv4 reassembly mechanism.
+		 */
+		if (ip_reass_packet(mp, ip) != 0) {
+			/* Failed; invalid fragment(s) or packet. */
+			error = EINVAL;
+			se = NULL;
+			goto out;
+		}
+		if (*mp == NULL) {
+			/* More fragments should come; return. */
+			return 0;
+		}
+		/* Reassembly is complete, we have the final packet. */
+		nbuf = (nbuf_t *)*mp;
+	}
+
 	/* Inspect the list of sessions. */
-	se = npf_session_inspect(&npc, nbuf, ifp, di);
+	se = npf_session_inspect(&npc, nbuf, di);
 
 	/* If "passing" session found - skip the ruleset inspection. */
 	if (se && npf_session_pass(se)) {
@@ -110,14 +134,14 @@
 	}
 
 	/* Apply the rule. */
-	error = npf_rule_apply(&npc, rl, &keepstate, &retfl);
+	error = npf_rule_apply(&npc, nbuf, rl, &keepstate, &retfl);
 	if (error) {
 		goto out;
 	}
 
 	/* Establish a "pass" session, if required. */
 	if (keepstate && !se) {
-		se = npf_session_establish(&npc, NULL, di);
+		se = npf_session_establish(&npc, nbuf, NULL, di);
 		if (se == NULL) {
 			error = ENOMEM;
 			goto out;
--- a/sys/net/npf/npf_impl.h	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_impl.h	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.3 2010/09/25 01:42:39 matt Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -37,10 +37,10 @@
 #ifndef _NPF_IMPL_H_
 #define _NPF_IMPL_H_
 
-#include <sys/rbtree.h>
+#include <sys/types.h>
+#include <sys/queue.h>
 #include <sys/hash.h>
-#include <sys/queue.h>
-#include <sys/types.h>
+#include <sys/rbtree.h>
 #include <sys/rwlock.h>
 
 #include "npf.h"
@@ -76,12 +76,34 @@
  * DEFINITIONS.
  */
 
-typedef bool	(*npf_algfunc_t)(npf_cache_t *, void *, void *);
+typedef bool	(*npf_algfunc_t)(npf_cache_t *, nbuf_t *, void *);
 
 #define	NPF_NCODE_LIMIT		1024
 #define	NPF_TABLE_SLOTS		32
 
 /*
+ * SESSION STATE STRUCTURES
+ */
+
+#define	ST_OPENING		1	/* SYN has been sent. */
+#define	ST_ACKNOWLEDGE		2	/* SYN-ACK received, wait for ACK. */
+#define	ST_ESTABLISHED		3	/* ACK seen, connection established. */
+#define	ST_CLOSING		4
+
+typedef struct {
+	uint32_t	nst_seqend;	/* SEQ number + length. */
+	uint32_t	nst_ackend;	/* ACK sequence number + window. */
+	uint32_t	nst_maxwin;	/* Maximum window seen. */
+	int		nst_wscale;	/* Window Scale. */
+} npf_tcpstate_t;
+
+typedef struct {
+	kmutex_t	nst_lock;
+	int		nst_state;
+	npf_tcpstate_t	nst_tcpst[2];
+} npf_state_t;
+
+/*
  * INTERFACES.
  */
 
@@ -95,20 +117,27 @@
 void		npf_unregister_pfil(void);
 
 /* Protocol helpers. */
-bool		npf_ip4_proto(npf_cache_t *, nbuf_t *, void *);
-bool		npf_fetch_ip4addrs(npf_cache_t *, nbuf_t *, void *);
-bool		npf_fetch_ports(npf_cache_t *, nbuf_t *, void *, const int);
-bool		npf_fetch_tcpfl(npf_cache_t *, nbuf_t *, void *);
+bool		npf_fetch_ip(npf_cache_t *, nbuf_t *, void *);
+bool		npf_fetch_tcp(npf_cache_t *, nbuf_t *, void *);
+bool		npf_fetch_udp(npf_cache_t *, nbuf_t *, void *);
 bool		npf_fetch_icmp(npf_cache_t *, nbuf_t *, void *);
 bool		npf_cache_all(npf_cache_t *, nbuf_t *);
 
+bool		npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int,
+		    npf_addr_t *);
 bool		npf_rwrport(npf_cache_t *, nbuf_t *, void *, const int,
-		    in_port_t, in_addr_t);
-bool		npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int, in_addr_t);
+		    in_port_t);
+bool		npf_rwrcksum(npf_cache_t *, nbuf_t *, void *, const int,
+		    npf_addr_t *, in_port_t);
 
 uint16_t	npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
 uint16_t	npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
-
+uint16_t	npf_addr_cksum(uint16_t, int, npf_addr_t *, npf_addr_t *);
+uint32_t	npf_addr_sum(const int, const npf_addr_t *, const npf_addr_t *);
+int		npf_tcpsaw(npf_cache_t *, tcp_seq *, tcp_seq *, uint32_t *);
+bool		npf_fetch_tcpopts(const npf_cache_t *, nbuf_t *,
+		    uint16_t *, int *);
+bool		npf_normalize(npf_cache_t *, nbuf_t *, bool, u_int, u_int);
 void		npf_return_block(npf_cache_t *, nbuf_t *, const int);
 
 /* Complex instructions. */
@@ -121,8 +150,8 @@
 		    const int, const uint32_t);
 int		npf_match_udp_ports(npf_cache_t *, nbuf_t *, void *,
 		    const int, const uint32_t);
-int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, const uint32_t);
-int		npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, const uint32_t);
+int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, uint32_t);
+int		npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, uint32_t);
 
 /* Tableset interface. */
 int		npf_tableset_sysinit(void);
@@ -160,20 +189,20 @@
 		    struct ifnet *, const int, const int);
 npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
 		    struct ifnet *, const int, const int);
-int		npf_rule_apply(const npf_cache_t *, npf_rule_t *, bool *, int *);
+int		npf_rule_apply(npf_cache_t *, nbuf_t *, npf_rule_t *,
+		    bool *, int *);
 npf_ruleset_t *	npf_rule_subset(npf_rule_t *);
 
 npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
 void		npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *);
 
-/* State handling interface. */
+/* Session handling interface. */
 int		npf_session_sysinit(void);
 void		npf_session_sysfini(void);
 int		npf_session_tracking(bool);
 
-npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *,
-		    struct ifnet *, const int);
-npf_session_t *	npf_session_establish(const npf_cache_t *,
+npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *, const int);
+npf_session_t *	npf_session_establish(const npf_cache_t *, nbuf_t *,
 		    npf_nat_t *, const int);
 void		npf_session_release(npf_session_t *);
 bool		npf_session_pass(const npf_session_t *);
@@ -181,10 +210,18 @@
 void		npf_session_link(npf_session_t *, npf_session_t *);
 npf_nat_t *	npf_session_retnat(npf_session_t *, const int, bool *);
 
+/* State handling. */
+bool		npf_state_init(const npf_cache_t *, nbuf_t *, npf_state_t *);
+bool		npf_state_inspect(const npf_cache_t *, nbuf_t *, npf_state_t *,
+		    const bool);
+int		npf_state_etime(const npf_state_t *, const int);
+void		npf_state_destroy(npf_state_t *);
+
 /* NAT. */
 void		npf_nat_sysinit(void);
 void		npf_nat_sysfini(void);
-npf_natpolicy_t *npf_nat_newpolicy(int, int, in_addr_t, in_port_t);
+npf_natpolicy_t *npf_nat_newpolicy(int, int, const npf_addr_t *, size_t,
+		    in_port_t);
 void		npf_nat_freepolicy(npf_natpolicy_t *);
 void		npf_nat_flush(void);
 void		npf_nat_reload(npf_ruleset_t *);
@@ -192,7 +229,7 @@
 int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *,
 		    struct ifnet *, const int);
 void		npf_nat_expire(npf_nat_t *);
-void		npf_nat_getorig(npf_nat_t *, in_addr_t *, in_port_t *);
+void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
 
 /* ALG interface. */
@@ -201,13 +238,14 @@
 npf_alg_t *	npf_alg_register(npf_algfunc_t, npf_algfunc_t,
 		    npf_algfunc_t, npf_algfunc_t);
 int		npf_alg_unregister(npf_alg_t *);
-void		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
+bool		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
 void		npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, const int );
 bool		npf_alg_sessionid(npf_cache_t *, nbuf_t *, npf_cache_t *);
 
 /* Debugging routines. */
 void		npf_rulenc_dump(npf_rule_t *);
 void		npf_sessions_dump(void);
+void		npf_state_dump(npf_state_t *);
 void		npf_nat_dump(npf_nat_t *);
 
 #endif
--- a/sys/net/npf/npf_inet.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_inet.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_inet.c,v 1.3 2010/09/25 00:25:31 rmind Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -33,26 +33,26 @@
  * Various procotol related helper routines.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 
+#include <net/pfil.h>
+#include <net/if.h>
+#include <net/ethertypes.h>
+#include <net/if_ether.h>
+
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
+#include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/ip_icmp.h>
 
-#include <net/if.h>
-#include <net/ethertypes.h>
-#include <net/if_ether.h>
-#endif
-#include <net/pfil.h>
-
 #include "npf_impl.h"
 
 /*
@@ -86,117 +86,234 @@
 }
 
 /*
- * npf_ip4_proto: check IPv4 header length and match protocol number.
- *
- * => Returns pointer to protocol header or NULL on failure.
- * => Stores protocol number in the cache.
- * => Updates nbuf pointer to header's nbuf.
+ * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
  */
-bool
-npf_ip4_proto(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+uint16_t
+npf_addr_cksum(uint16_t cksum, int sz, npf_addr_t *oaddr, npf_addr_t *naddr)
 {
-	u_int hlen, offby;
-	uint8_t val8;
-	int error;
+	uint32_t *oip32 = (uint32_t *)oaddr, *nip32 = (uint32_t *)naddr;
 
-	/* IPv4 header: check IP version and header length. */
-	error = nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &val8);
-	if (error || (val8 >> 4) != IPVERSION)
-		return false;
-	hlen = (val8 & 0xf) << 2;
-	if (hlen < sizeof(struct ip))
-		return false;
+	KASSERT(sz % sizeof(uint32_t) == 0);
+	do {
+		cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++);
+		sz -= sizeof(uint32_t);
+	} while (sz);
+
+	return cksum;
+}
 
-	/* IPv4 header: check fragment offset. */
-	offby = offsetof(struct ip, ip_off);
-	error = nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &val8);
-	if (error || (val8 & ~htons(IP_DF | IP_RF)))
-		return false;
+/*
+ * npf_addr_sum: provide IP address as a summed (if needed) 32-bit integer.
+ * Note: used for hash function.
+ */
+uint32_t
+npf_addr_sum(const int sz, const npf_addr_t *a1, const npf_addr_t *a2)
+{
+	uint32_t mix = 0;
+	int i;
 
-	/* Get and match protocol. */
-	KASSERT(offsetof(struct ip, ip_p) > offby);
-	offby = offsetof(struct ip, ip_p) - offby;
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &val8))
-		return false;
+	for (i = 0; i < (sz >> 2); i++) {
+		mix += a1->s6_addr32[i];
+		mix += a2->s6_addr32[i];
+	}
+	return mix;
+}
 
-	/* IP checksum. */
-	offby = offsetof(struct ip, ip_sum) - offsetof(struct ip, ip_p);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby,
-	    sizeof(uint16_t), &npc->npc_ipsum))
-		return false;
+/*
+ * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length.
+ * Returns all values in host byte-order.
+ */
+int
+npf_tcpsaw(npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
+{
+	struct ip *ip = &npc->npc_ip.v4;
+	struct tcphdr *th = &npc->npc_l4.tcp;
 
-	/* Cache: IPv4, protocol, header length. */
-	npc->npc_info |= NPC_IP46;
-	npc->npc_proto = val8;
-	npc->npc_hlen = hlen;
-	return true;
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP));
+
+	*seq = ntohl(th->th_seq);
+	*ack = ntohl(th->th_ack);
+	*win = (uint32_t)ntohs(th->th_win);
+
+	return ntohs(ip->ip_len) - (ip->ip_hl << 2) - (th->th_off << 2);
 }
 
 /*
- * npf_fetch_ip4addrs: fetch source and destination address from IPv4 header.
- *
- * => Stores both source and destination addresses into the cache.
+ * npf_fetch_tcpopts: parse and return TCP options.
  */
 bool
-npf_fetch_ip4addrs(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+npf_fetch_tcpopts(const npf_cache_t *npc, nbuf_t *nbuf,
+    uint16_t *mss, int *wscale)
 {
-	in_addr_t *src = &npc->npc_srcip, *dst = &npc->npc_dstip;
-	u_int offby;
+	void *n_ptr = nbuf_dataptr(nbuf);
+	const struct ip *ip = &npc->npc_ip.v4;
+	const struct tcphdr *th = &npc->npc_l4.tcp;
+	int topts_len, step;
+	uint16_t val16;
+	uint8_t val;
+
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP));
 
-	/* Source address. */
-	offby = offsetof(struct ip, ip_src);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(in_addr_t), src))
+	/* Determine if there are any TCP options, get their length. */
+	topts_len = (th->th_off << 2) - sizeof(struct tcphdr);
+	if (topts_len <= 0) {
+		/* No options. */
+		return false;
+	}
+	KASSERT(topts_len <= MAX_TCPOPTLEN);
+
+	/* First step: IP and TCP header up to options. */
+	step = (ip->ip_hl << 2) + sizeof(struct tcphdr);
+next:
+	if (nbuf_advfetch(&nbuf, &n_ptr, step, sizeof(val), &val)) {
 		return false;
-
-	/* Destination address. */
-	offby = offsetof(struct ip, ip_dst) - offby;
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(in_addr_t), dst))
-		return false;
-
-	/* Both addresses are cached. */
-	npc->npc_info |= NPC_ADDRS;
-	return true;
+	}
+	switch (val) {
+	case TCPOPT_EOL:
+		/* Done. */
+		return true;
+	case TCPOPT_NOP:
+		topts_len--;
+		step = 1;
+		break;
+	case TCPOPT_MAXSEG:
+		/*
+		 * XXX: clean this mess.
+		 */
+		if (mss && *mss) {
+			val16 = *mss;
+			if (nbuf_advstore(&nbuf, &n_ptr, 2,
+			    sizeof(val16), &val16))
+				return false;
+		} else if (nbuf_advfetch(&nbuf, &n_ptr, 2,
+		    sizeof(val16), &val16)) {
+			return false;
+		}
+		if (mss) {
+			*mss = val16;
+		}
+		topts_len -= TCPOLEN_MAXSEG;
+		step = sizeof(val16);
+		break;
+	case TCPOPT_WINDOW:
+		if (nbuf_advfetch(&nbuf, &n_ptr, 2, sizeof(val), &val)) {
+			return false;
+		}
+		*wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
+		topts_len -= TCPOLEN_WINDOW;
+		step = sizeof(val);
+		break;
+	default:
+		if (nbuf_advfetch(&nbuf, &n_ptr, 1, sizeof(val), &val)) {
+			return false;
+		}
+		if (val < 2 || val >= topts_len) {
+			return false;
+		}
+		topts_len -= val;
+		step = val - 1;
+	}
+	/* Soft limit, in a case of invalid packet. */
+	if (__predict_true(topts_len > 0)) {
+		goto next;
+	}
+	return false;
 }
 
 /*
- * npf_fetch_ports: fetch ports from either TCP or UDP header.
- *
- * => Stores both source and destination ports into the cache.
+ * npf_fetch_ip: fetch, check and cache IP header.
  */
 bool
-npf_fetch_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int proto)
+npf_fetch_ip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
 {
-	u_int dst_off;
-
-	/* Perform checks, advance to TCP/UDP header. */
-	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
-		return false;
-	n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
-	if (n_ptr == NULL || npc->npc_proto != proto)
-		return false;
+	struct ip *ip;
+	uint8_t ver;
 
-	/*
-	 * TCP/UDP header: fetch source and destination ports.  For both
-	 * protocols offset of the source port offset is 0.
-	 */
-	CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
-	CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
-	if (proto == IPPROTO_TCP) {
-		dst_off = offsetof(struct tcphdr, th_dport);
-	} else {
-		KASSERT(proto == IPPROTO_UDP);
-		dst_off = offsetof(struct udphdr, uh_dport);
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &ver)) {
+		return false;
+	}
+	switch (ver >> 4) {
+	case IPVERSION:
+		/* IPv4 */
+		ip = &npc->npc_ip.v4;
+		/* Fetch the header. */
+		if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(struct ip), ip)) {
+			return false;
+		}
+		/* Check header length and fragment offset. */
+		if ((ip->ip_hl << 2) < sizeof(struct ip)) {
+			return false;
+		}
+		if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
+			/* Note fragmentation. */
+			npc->npc_info |= NPC_IPFRAG;
+		}
+		/* Cache: layer 3 - IPv4. */
+		npc->npc_ipsz = sizeof(struct in_addr);
+		npc->npc_srcip = (npf_addr_t *)&ip->ip_src;
+		npc->npc_dstip = (npf_addr_t *)&ip->ip_dst;
+		npc->npc_info |= NPC_IP4;
+		break;
+
+	case (IPV6_VERSION >> 4):
+		/* TODO */
+	default:
+		return false;
+	}
+	return true;
+}
+
+bool
+npf_fetch_tcp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	struct ip *ip = &npc->npc_ip.v4;
+	struct tcphdr *th;
+	u_int hlen;
+
+	/* Must have IP header processed for its length and protocol. */
+	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
+		return false;
+	}
+	if (ip->ip_p != IPPROTO_TCP) {
+		return false;
+	}
+	hlen = ip->ip_hl << 2;
+	th = &npc->npc_l4.tcp;
+
+	/* Fetch TCP header. */
+	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct tcphdr), th)) {
+		return false;
 	}
 
-	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_sport))
-		return false;
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, dst_off)) == NULL)
+	/* Cache: layer 4 - TCP. */
+	npc->npc_info |= (NPC_LAYER4 | NPC_TCP);
+	return true;
+}
+
+bool
+npf_fetch_udp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	struct ip *ip = &npc->npc_ip.v4;
+	struct udphdr *uh;
+	u_int hlen;
+
+	/* Must have IP header processed for its length and protocol. */
+	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
 		return false;
-	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_dport))
+	}
+	if (ip->ip_p != IPPROTO_UDP) {
 		return false;
+	}
+	hlen = ip->ip_hl << 2;
+	uh = &npc->npc_l4.udp;
 
-	/* Both ports are cached. */
-	npc->npc_info |= NPC_PORTS;
+	/* Fetch ICMP header. */
+	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct udphdr), uh)) {
+		return false;
+	}
+
+	/* Cache: layer 4 - ICMP. */
+	npc->npc_info |= (NPC_LAYER4 | NPC_UDP);
 	return true;
 }
 
@@ -208,179 +325,309 @@
 bool
 npf_fetch_icmp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
 {
-	uint8_t *type = &npc->npc_icmp_type, *code = &npc->npc_icmp_code;
-	u_int offby;
-
-	KASSERT(npf_iscached(npc, NPC_IP46));
+	struct ip *ip = &npc->npc_ip.v4;
+	struct icmp *ic;
+	u_int hlen, offby;
 
-	/* ICMP type. */
-	offby = npc->npc_hlen;
-	CTASSERT(offsetof(struct icmp, icmp_type) == 0);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), type))
+	/* Must have IP header processed for its length and protocol. */
+	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
 		return false;
+	}
+	if (ip->ip_p != IPPROTO_ICMP) {
+		return false;
+	}
+	hlen = ip->ip_hl << 2;
+	ic = &npc->npc_l4.icmp;
 
-	/* ICMP code. */
-	offby = offsetof(struct icmp, icmp_code);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), code))
+	/* Fetch basic ICMP header, up to the "data" point. */
+	offby = offsetof(struct icmp, icmp_data);
+	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, offby, ic)) {
 		return false;
+	}
 
-	/* Mark as cached. */
-	npc->npc_info |= NPC_ICMP;
+	/* Cache: layer 4 - ICMP. */
+	npc->npc_info |= (NPC_LAYER4 | NPC_ICMP);
 	return true;
 }
 
 /*
- * npf_fetch_tcpfl: fetch TCP flags and store into the cache.
- */
-bool
-npf_fetch_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
-{
-	const u_int offby = npc->npc_hlen + offsetof(struct tcphdr, th_flags);
-	uint8_t *tcpfl = &npc->npc_tcp_flags;
-
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), tcpfl)) {
-		return false;
-	}
-	return true;
-}
-
-/*
- * npf_cache_all: general routine to cache all relevant IPv4 and
- * TCP, UDP or ICMP data.
+ * npf_cache_all: general routine to cache all relevant IP (v4 or v6)
+ * and TCP, UDP or ICMP data.
  */
 bool
 npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf)
 {
 	void *n_ptr = nbuf_dataptr(nbuf);
 
-	/* IPv4: get protocol, source and destination addresses. */
-	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
-		return false;
-	}
-	if (!npf_iscached(npc, NPC_ADDRS) &&
-	    !npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
 		return false;
 	}
-	switch (npc->npc_proto) {
+	if (npf_iscached(npc, NPC_IPFRAG)) {
+		return true;
+	}
+	switch (npf_cache_ipproto(npc)) {
 	case IPPROTO_TCP:
-		/* TCP flags. */
-		if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
-			return false;
-		}
-		/* FALLTHROUGH */
-
+		return npf_fetch_tcp(npc, nbuf, n_ptr);
 	case IPPROTO_UDP:
-		/* Fetch TCP/UDP ports. */
-		return npf_fetch_ports(npc, nbuf, n_ptr, npc->npc_proto);
-
+		return npf_fetch_udp(npc, nbuf, n_ptr);
 	case IPPROTO_ICMP:
-		/* Fetch ICMP data. */
 		return npf_fetch_icmp(npc, nbuf, n_ptr);
 	}
 	return false;
 }
 
 /*
- * npf_rwrport: rewrite required TCP/UDP port and update checksum.
+ * npf_rwrip: rewrite required IP address, update the cache.
+ */
+bool
+npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
+    npf_addr_t *addr)
+{
+	npf_addr_t *oaddr;
+	u_int offby;
+
+	KASSERT(npf_iscached(npc, NPC_IP46));
+
+	if (di == PFIL_OUT) {
+		/* Rewrite source address, if outgoing. */
+		offby = offsetof(struct ip, ip_src);
+		oaddr = npc->npc_srcip;
+	} else {
+		/* Rewrite destination, if incoming. */
+		offby = offsetof(struct ip, ip_dst);
+		oaddr = npc->npc_dstip;
+	}
+
+	/* Advance to the address and rewrite it. */
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, npc->npc_ipsz, addr))
+		return false;
+
+	/* Cache: IP address. */
+	memcpy(oaddr, addr, npc->npc_ipsz);
+	return true;
+}
+
+/*
+ * npf_rwrport: rewrite required TCP/UDP port, update the cache.
  */
 bool
 npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
-    in_port_t port, in_addr_t naddr)
+    in_port_t port)
 {
-	const int proto = npc->npc_proto;
-	u_int offby, toff;
-	in_addr_t oaddr;
-	in_port_t oport;
-	uint16_t cksum;
+	const int proto = npf_cache_ipproto(npc);
+	struct ip *ip = &npc->npc_ip.v4;
+	u_int offby = ip->ip_hl << 2;
+	in_port_t *oport;
 
-	KASSERT(npf_iscached(npc, NPC_PORTS));
+	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
 	KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
 
-	offby = npc->npc_hlen;
-
-	if (di == PFIL_OUT) {
-		/* Offset to the source port is zero. */
-		CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
-		CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
-		if (proto == IPPROTO_TCP) {
-			toff = offsetof(struct tcphdr, th_sum);
+	/* Offset to the port and pointer in the cache. */
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &npc->npc_l4.tcp;
+		if (di == PFIL_OUT) {
+			CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
+			oport = &th->th_sport;
 		} else {
-			toff = offsetof(struct udphdr, uh_sum);
+			offby += offsetof(struct tcphdr, th_dport);
+			oport = &th->th_dport;
 		}
-		oaddr = npc->npc_srcip;
-		oport = npc->npc_sport;
 	} else {
-		/* Calculate offset to destination port and checksum. */
-		u_int poff;
-		if (proto == IPPROTO_TCP) {
-			poff = offsetof(struct tcphdr, th_dport);
-			toff = offsetof(struct tcphdr, th_sum) - poff;
+		struct udphdr *uh = &npc->npc_l4.udp;
+		if (di == PFIL_OUT) {
+			CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
+			oport = &uh->uh_sport;
 		} else {
-			poff = offsetof(struct udphdr, uh_dport);
-			toff = offsetof(struct udphdr, uh_sum) - poff;
+			offby += offsetof(struct udphdr, uh_dport);
+			oport = &uh->uh_dport;
 		}
-		oaddr = npc->npc_dstip;
-		oport = npc->npc_dport;
-		offby += poff;
 	}
 
-	/* Advance and rewrite port. */
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
-		return false;
-	if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_port_t), &port))
+	/* Advance and rewrite the port. */
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(in_port_t), &port))
 		return false;
 
-	/* Advance and update TCP/UDP checksum. */
-	if (nbuf_advfetch(&nbuf, &n_ptr, toff, sizeof(uint16_t), &cksum)) {
-		return false;
+	/* Cache: TCP/UDP port. */
+	*oport = port;
+	return true;
+}
+
+/*
+ * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update chache.
+ */
+bool
+npf_rwrcksum(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
+    npf_addr_t *addr, in_port_t port)
+{
+	const int proto = npf_cache_ipproto(npc);
+	npf_addr_t *oaddr;
+	in_port_t *oport;
+	uint16_t *cksum;
+	u_int offby;
+
+	/* Checksum update for IPv4 header. */
+	if (npf_iscached(npc, NPC_IP4)) {
+		struct ip *ip = &npc->npc_ip.v4;
+		uint16_t ipsum;
+
+		oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip;
+		ipsum = npf_addr_cksum(ip->ip_sum, npc->npc_ipsz, oaddr, addr);
+
+		/* Advance to the IPv4 checksum and rewrite it. */
+		offby = offsetof(struct ip, ip_sum);
+		if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(ipsum), &ipsum))
+			return false;
+
+		ip->ip_sum = ipsum;
+		offby = (ip->ip_hl << 2) - offby;
+	} else {
+		/* No checksum for IPv6. */
+		KASSERT(npf_iscached(npc, NPC_IP6));
+		KASSERT(false);	/* XXX: Not yet supported. */
+		oaddr = NULL;
+		offby = 0;
 	}
-	if (__predict_true(cksum || proto == IPPROTO_TCP)) {
-		cksum = npf_fixup32_cksum(cksum, oaddr, naddr);
-		cksum = npf_fixup16_cksum(cksum, oport, port);
-		if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
-			return false;
+
+	/* Determine whether TCP/UDP checksum update is needed. */
+	if (port == 0) {
+		return true;
+	}
+	KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP));
+
+	/* Calculate TCP/UDP checksum. */
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &npc->npc_l4.tcp;
+
+		cksum = &th->th_sum;
+		offby += offsetof(struct tcphdr, th_sum);
+		oport = (di == PFIL_OUT) ? &th->th_sport : &th->th_dport;
+	} else {
+		struct udphdr *uh = &npc->npc_l4.udp;
+
+		KASSERT(proto == IPPROTO_UDP);
+		cksum = &uh->uh_sum;
+		if (*cksum == 0) {
+			/* No need to update. */
+			return true;
+		}
+		offby += offsetof(struct udphdr, uh_sum);
+		oport = (di == PFIL_OUT) ? &uh->uh_sport : &uh->uh_dport;
+	}
+	*cksum = npf_addr_cksum(*cksum, npc->npc_ipsz, oaddr, addr);
+	*cksum = npf_fixup16_cksum(*cksum, *oport, port);
+
+	/* Advance to TCP/UDP checksum and rewrite it. */
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(uint16_t), cksum)) {
+		return false;
 	}
 	return true;
 }
 
-/*
- * npf_rwrip: rewrite required IP address and update checksum.
- */
-bool
-npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
-    in_addr_t addr)
+static inline bool
+npf_normalize_ip4(npf_cache_t *npc, nbuf_t *nbuf, bool rnd, int minttl)
 {
-	u_int offby;
-	in_addr_t oaddr;
+	void *n_ptr = nbuf_dataptr(nbuf);
+	struct ip *ip = &npc->npc_ip.v4;
+	uint16_t cksum = ip->ip_sum;
+	uint8_t ttl = ip->ip_ttl;
+	u_int offby = 0;
 
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+	KASSERT(rnd || minttl);
+
+	/* Randomize IPv4 ID. */
+	if (rnd) {
+		uint16_t oid = ip->ip_id, nid;
 
-	/* Advance to the checksum in IP header and fetch it. */
-	offby = offsetof(struct ip, ip_sum);
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
-		return false;
+		nid = htons(ip_randomid(ip_ids, 0));
+		offby = offsetof(struct ip, ip_id);
+		if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(nid), &nid)) {
+			return false;
+		}
+		cksum = npf_fixup16_cksum(cksum, oid, nid);
+		ip->ip_id = nid;
+	}
 
-	if (di == PFIL_OUT) {
-		/* Rewrite source address, if outgoing. */
-		offby = offsetof(struct ip, ip_src) - offby;
-		oaddr = npc->npc_srcip;
-	} else {
-		/* Rewrite destination, if incoming. */
-		offby = offsetof(struct ip, ip_dst) - offby;
-		oaddr = npc->npc_dstip;
+	/* Enforce minimum TTL. */
+	if (minttl && ttl < minttl) {
+		if (nbuf_advstore(&nbuf, &n_ptr,
+		    offsetof(struct ip, ip_ttl) - offby,
+		    sizeof(uint8_t), &minttl)) {
+			return false;
+		}
+		cksum = npf_fixup16_cksum(cksum, ttl, minttl);
+		ip->ip_ttl = minttl;
+		offby = offsetof(struct ip, ip_ttl);
 	}
 
-	/* Write new IP checksum (it is acceptable to do this earlier). */
-	uint16_t cksum = npf_fixup32_cksum(npc->npc_ipsum, oaddr, addr);
-	if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
+	/* Update IP checksum. */
+	offby = offsetof(struct ip, ip_sum) - offby;
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
 		return false;
-
-	/* Advance to address and rewrite it. */
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
-		return false;
-	if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_addr_t), &addr))
-		return false;
-
-	npc->npc_ipsum = cksum;
+	}
+	ip->ip_sum = cksum;
 	return true;
 }
+
+bool
+npf_normalize(npf_cache_t *npc, nbuf_t *nbuf,
+    bool rnd, u_int minttl, u_int maxmss)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	struct ip *ip = &npc->npc_ip.v4;
+	struct tcphdr *th = &npc->npc_l4.tcp;
+	uint16_t cksum, mss;
+	int offby, wscale;
+
+	/* Normalize IPv4. */
+	if (npf_iscached(npc, NPC_IP4) && (rnd || minttl)) {
+		if (!npf_normalize_ip4(npc, nbuf, rnd, minttl)) {
+			return false;
+		}
+	}
+
+	/*
+	 * TCP Maximum Segment Size (MSS) "clamping".  Only if SYN packet.
+	 */
+	if (maxmss == 0 || !npf_iscached(npc, NPC_TCP) ||
+	    (th->th_flags & TH_SYN) == 0) {
+		/* Not required; done. */
+		return true;
+	}
+	mss = 0;
+	if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
+		return false;
+	}
+	if (ntohs(mss) <= maxmss) {
+		return true;
+	}
+	if (!npf_iscached(npc, NPC_IP4)) { /* XXX: IPv6 */
+		return false;
+	}
+
+	/* Calculate checksums. */
+	maxmss = htons(maxmss);
+	cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss);
+	ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, mss, maxmss);
+	ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, th->th_sum, cksum);
+	th->th_sum = cksum;
+
+	/* Rewrite MSS. */
+	mss = maxmss;
+	if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
+		return false;
+	}
+
+	/* Update checksums. */
+	cksum = ip->ip_sum;
+	offby = offsetof(struct ip, ip_sum);
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
+		return false;
+	}
+	cksum = th->th_sum;
+	offby = (ip->ip_hl << 2) - offsetof(struct ip, ip_sum) +
+	    offsetof(struct tcphdr, th_sum);
+	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
+		return false;
+	}
+	return true;
+}
--- a/sys/net/npf/npf_instr.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_instr.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_instr.c,v 1.3 2010/09/25 00:25:31 rmind Exp $	*/
+/*	$NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -98,15 +98,16 @@
 npf_match_ip4table(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
     const int sd, const u_int tid)
 {
+	struct ip *ip = &npc->npc_ip.v4;
 	in_addr_t ip4addr;
 
-	if (!npf_iscached(npc, NPC_ADDRS)) {
-		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+	if (!npf_iscached(npc, NPC_IP46)) {
+		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
 			return -1;
 		}
-		KASSERT(npf_iscached(npc, NPC_ADDRS));
+		KASSERT(npf_iscached(npc, NPC_IP46));
 	}
-	ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
+	ip4addr = sd ? ip->ip_src.s_addr : ip->ip_dst.s_addr;
 
 	/* Match address against NPF table. */
 	return npf_table_match_v4addr(tid, ip4addr);
@@ -119,15 +120,16 @@
 npf_match_ip4mask(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
     const int sd, in_addr_t netaddr, in_addr_t subnet)
 {
+	struct ip *ip = &npc->npc_ip.v4;
 	in_addr_t ip4addr;
 
-	if (!npf_iscached(npc, NPC_ADDRS)) {
-		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+	if (!npf_iscached(npc, NPC_IP46)) {
+		if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
 			return -1;
 		}
-		KASSERT(npf_iscached(npc, NPC_ADDRS));
+		KASSERT(npf_iscached(npc, NPC_IP46));
 	}
-	ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
+	ip4addr = sd ? ip->ip_src.s_addr : ip->ip_dst.s_addr;
 
 	return (ip4addr & subnet) == netaddr ? 0 : -1;
 }
@@ -139,15 +141,16 @@
 npf_match_tcp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
     const int sd, const uint32_t prange)
 {
+	struct tcphdr *th = &npc->npc_l4.tcp;
 	in_port_t p;
 
-	if (!npf_iscached(npc, NPC_PORTS)) {
-		if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_TCP)) {
+	if (!npf_iscached(npc, NPC_TCP)) {
+		if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
 			return -1;
 		}
-		KASSERT(npf_iscached(npc, NPC_PORTS));
+		KASSERT(npf_iscached(npc, NPC_TCP));
 	}
-	p = sd ? npc->npc_sport : npc->npc_dport;
+	p = sd ? th->th_sport : th->th_dport;
 
 	/* Match against the port range. */
 	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@@ -160,15 +163,16 @@
 npf_match_udp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
     const int sd, const uint32_t prange)
 {
+	struct udphdr *uh = &npc->npc_l4.udp;
 	in_port_t p;
 
-	if (!npf_iscached(npc, NPC_PORTS)) {
-		if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_UDP)) {
+	if (!npf_iscached(npc, NPC_UDP)) {
+		if (!npf_fetch_udp(npc, nbuf, n_ptr)) {
 			return -1;
 		}
-		KASSERT(npf_iscached(npc, NPC_PORTS));
+		KASSERT(npf_iscached(npc, NPC_UDP));
 	}
-	p = sd ? npc->npc_sport : npc->npc_dport;
+	p = sd ? uh->uh_sport : uh->uh_dport;
 
 	/* Match against the port range. */
 	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@@ -178,34 +182,27 @@
  * npf_match_icmp4: match ICMPv4 packet.
  */
 int
-npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t tc)
+npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t tc)
 {
+	struct icmp *ic = &npc->npc_l4.icmp;
 
 	if (!npf_iscached(npc, NPC_ICMP)) {
-		/* Perform checks, advance to ICMP header. */
-		if (!npf_iscached(npc, NPC_IP46) &&
-		    !npf_ip4_proto(npc, nbuf, n_ptr)) {
-			return -1;
-		}
-		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
-		if (n_ptr == NULL || npc->npc_proto != IPPROTO_ICMP) {
-			return -1;
-		}
 		if (!npf_fetch_icmp(npc, nbuf, n_ptr)) {
 			return -1;
 		}
 		KASSERT(npf_iscached(npc, NPC_ICMP));
 	}
+
 	/* Match code/type, if required. */
 	if ((1 << 31) & tc) {
 		const uint8_t type = (tc >> 8) & 0xff;
-		if (type != npc->npc_icmp_type) {
+		if (type != ic->icmp_type) {
 			return -1;
 		}
 	}
 	if ((1 << 30) & tc) {
 		const uint8_t code = tc & 0xff;
-		if (code != npc->npc_icmp_code) {
+		if (code != ic->icmp_code) {
 			return -1;
 		}
 	}
@@ -216,15 +213,16 @@
  * npf_match_tcpfl: match TCP flags.
  */
 int
-npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t fl)
+npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t fl)
 {
 	const uint8_t tcpfl = (fl >> 8) & 0xff, mask = fl & 0xff;
+	struct tcphdr *th = &npc->npc_l4.tcp;
 
-	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
-		return -1;
+	if (!npf_iscached(npc, NPC_TCP)) {
+		if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_TCP));
 	}
-	if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
-		return -1;
-	}
-	return ((npc->npc_tcp_flags & mask) == tcpfl) ? 0 : -1;
+	return ((th->th_flags & mask) == tcpfl) ? 0 : -1;
 }
--- a/sys/net/npf/npf_mbuf.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_mbuf.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_mbuf.c,v 1.4 2010/10/03 19:30:22 rmind Exp $	*/
+/*	$NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -36,10 +36,8 @@
  * abstracted within this source.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.4 2010/10/03 19:30:22 rmind Exp $");
-#endif
+__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -211,6 +209,29 @@
 }
 
 /*
+ * nbuf_advstore: advance and store the datum.
+ */
+int
+nbuf_advstore(nbuf_t **nbuf, void **n_ptr, u_int n, size_t len, void *buf)
+{
+	nbuf_t *orig_nbuf = *nbuf;
+	void *orig_nptr = *n_ptr;
+	int error;
+
+	*n_ptr = nbuf_advance(nbuf, *n_ptr, n);
+	if (__predict_false(*n_ptr != NULL)) {
+		error = nbuf_store_datum(*nbuf, *n_ptr, len, buf);
+	} else {
+		error = EINVAL;
+	}
+	if (__predict_false(error)) {
+		*nbuf = orig_nbuf;
+		*n_ptr = orig_nptr;
+	}
+	return error;
+}
+
+/*
  * nbuf_add_tag: add a tag to specified network buffer.
  *
  * => Returns 0 on success, or errno on failure.
--- a/sys/net/npf/npf_nat.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_nat.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -75,13 +75,11 @@
  *	"NAT" session expires.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
-#endif
 
 #include <sys/atomic.h>
 #include <sys/bitops.h>
@@ -112,16 +110,17 @@
 	LIST_ENTRY(npf_natpolicy)	n_entry;
 	int				n_type;
 	int				n_flags;
-	in_addr_t			n_taddr;
+	npf_portmap_t *			n_portmap;
+	size_t				n_addr_sz;
+	npf_addr_t			n_taddr;
 	in_port_t			n_tport;
-	npf_portmap_t *			n_portmap;
 };
 
 /* NAT translation entry for a session. */ 
 struct npf_nat {
 	npf_natpolicy_t *		nt_natpolicy;
 	/* Original address and port (for backwards translation). */
-	in_addr_t			nt_oaddr;
+	npf_addr_t			nt_oaddr;
 	in_port_t			nt_oport;
 	/* Translation port (for redirects). */
 	in_port_t			nt_tport;
@@ -166,7 +165,8 @@
  * => XXX: serialise at upper layer.
  */
 npf_natpolicy_t *
-npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
+npf_nat_newpolicy(int type, int flags, const npf_addr_t *taddr,
+    size_t addr_sz, in_port_t tport)
 {
 	npf_natpolicy_t *np, *it;
 	npf_portmap_t *pm;
@@ -178,7 +178,8 @@
 	KASSERT(type == NPF_NATIN || type == NPF_NATOUT);
 	np->n_type = type;
 	np->n_flags = flags;
-	np->n_taddr = taddr;
+	np->n_addr_sz = addr_sz;
+	memcpy(&np->n_taddr, taddr, sizeof(npf_addr_t));
 	np->n_tport = tport;
 
 	pm = NULL;
@@ -188,8 +189,9 @@
 
 	/* Search for a NAT policy using the same translation address. */
 	LIST_FOREACH(it, &nat_policy_list, n_entry) {
-		if (it->n_taddr != np->n_taddr)
+		if (memcmp(&it->n_taddr, &np->n_taddr, sizeof(npf_addr_t))) {
 			continue;
+		}
 		pm = it->n_portmap;
 		break;
 	}
@@ -247,9 +249,8 @@
 	npf_ruleset_t *oldnset;
 
 	oldnset = atomic_swap_ptr(&nat_ruleset, nset);
-	if (oldnset) {
-		npf_ruleset_destroy(oldnset);
-	}
+	KASSERT(oldnset != NULL);
+	npf_ruleset_destroy(oldnset);
 }
 
 /*
@@ -329,9 +330,11 @@
 static npf_nat_t *
 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
 {
-	const int proto = npc->npc_proto;
+	const int proto = npf_cache_ipproto(npc);
 	npf_nat_t *nt;
 
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
+
 	/* New NAT association. */
 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
 	if (nt == NULL){
@@ -343,11 +346,11 @@
 	/* Save the original address which may be rewritten. */
 	if (np->n_type == NPF_NATOUT) {
 		/* Source (local) for Outbound NAT. */
-		nt->nt_oaddr = npc->npc_srcip;
+		memcpy(&nt->nt_oaddr, npc->npc_srcip, npc->npc_ipsz);
 	} else {
 		/* Destination (external) for Inbound NAT. */
 		KASSERT(np->n_type == NPF_NATIN);
-		nt->nt_oaddr = npc->npc_dstip;
+		memcpy(&nt->nt_oaddr, npc->npc_dstip, npc->npc_ipsz);
 	}
 
 	/*
@@ -359,13 +362,17 @@
 		nt->nt_tport = 0;
 		return nt;
 	}
-	/* Save a relevant TCP/UDP port. */
-	KASSERT(npf_iscached(npc, NPC_PORTS));
-	if (np->n_type == NPF_NATOUT) {
-		nt->nt_oport = npc->npc_sport;
+	/* Save the relevant TCP/UDP port. */
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &npc->npc_l4.tcp;
+		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
+		    th->th_sport : th->th_dport;
 	} else {
-		nt->nt_oport = npc->npc_dport;
+		struct udphdr *uh = &npc->npc_l4.udp;
+		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
+		    uh->uh_sport : uh->uh_dport;
 	}
+
 	/* Get a new port for translation. */
 	if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
 		nt->nt_tport = npf_nat_getport(np);
@@ -382,12 +389,12 @@
 npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
     const bool forw, const int di)
 {
-	const npf_natpolicy_t *np = nt->nt_natpolicy;
 	void *n_ptr = nbuf_dataptr(nbuf);
-	in_addr_t addr;
+	npf_natpolicy_t *np = nt->nt_natpolicy;
+	npf_addr_t *addr;
 	in_port_t port;
 
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+	KASSERT(npf_iscached(npc, NPC_IP46));
 
 	if (forw) {
 		/* "Forwards" stream: use translation address/port. */
@@ -395,7 +402,7 @@
 		    (np->n_type == NPF_NATIN && di == PFIL_IN) ^
 		    (np->n_type == NPF_NATOUT && di == PFIL_OUT)
 		);
-		addr = np->n_taddr;
+		addr = &np->n_taddr;
 		port = nt->nt_tport;
 	} else {
 		/* "Backwards" stream: use original address/port. */
@@ -403,53 +410,47 @@
 		    (np->n_type == NPF_NATIN && di == PFIL_OUT) ^
 		    (np->n_type == NPF_NATOUT && di == PFIL_IN)
 		);
-		addr = nt->nt_oaddr;
+		addr = &nt->nt_oaddr;
 		port = nt->nt_oport;
 	}
 
-	/* Execute ALG hooks first. */
+	/* Execute ALG hook first. */
 	npf_alg_exec(npc, nbuf, nt, di);
 
 	/*
+	 * Rewrite IP and/or TCP/UDP checksums first, since it will use
+	 * the cache containing original values for checksum calculation.
+	 */
+	if (!npf_rwrcksum(npc, nbuf, n_ptr, di, addr, port)) {
+		return EINVAL;
+	}
+	/*
 	 * Address translation: rewrite source/destination address, depending
 	 * on direction (PFIL_OUT - for source, PFIL_IN - for destination).
-	 * Note: cache will be used in npf_rwrport(), update only in the end.
 	 */
 	if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) {
 		return EINVAL;
 	}
 	if ((np->n_flags & NPF_NAT_PORTS) == 0) {
-		/* Cache new address. */
-		if (di == PFIL_OUT) {
-			npc->npc_srcip = addr;
-		} else {
-			npc->npc_dstip = addr;
-		}
+		/* Done. */
 		return 0;
 	}
-	switch (npc->npc_proto) {
+	switch (npf_cache_ipproto(npc)) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
-		KASSERT(npf_iscached(npc, NPC_PORTS));
+		KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP));
 		/* Rewrite source/destination port. */
-		if (!npf_rwrport(npc, nbuf, n_ptr, di, port, addr)) {
+		if (!npf_rwrport(npc, nbuf, n_ptr, di, port)) {
 			return EINVAL;
 		}
 		break;
 	case IPPROTO_ICMP:
-		/* None. */
+		KASSERT(npf_iscached(npc, NPC_ICMP));
+		/* Nothing. */
 		break;
 	default:
 		return ENOTSUP;
 	}
-	/* Cache new address and port. */
-	if (di == PFIL_OUT) {
-		npc->npc_srcip = addr;
-		npc->npc_sport = port;
-	} else {
-		npc->npc_dstip = addr;
-		npc->npc_dport = port;
-	}
 	return 0;
 }
 
@@ -473,15 +474,13 @@
 	bool forw, new;
 
 	/* All relevant IPv4 data should be already cached. */
-	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
+	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
 		return 0;
 	}
 
 	/*
 	 * Return the NAT entry associated with the session, if any.
-	 * Assumptions:
-	 * - If associated via linked session, then "forwards" stream.
-	 * - If associated directly, then "backwards" stream.
+	 * Determines whether the stream is "forwards" or "backwards".
 	 */
 	if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
 		np = nt->nt_natpolicy;
@@ -504,6 +503,11 @@
 	}
 	new = true;
 
+	/* Determine whether any ALG matches. */
+	if (npf_alg_match(npc, nbuf, nt)) {
+		KASSERT(nt->nt_alg != NULL);
+	}
+
 	/*
 	 * If there is no local session (no "keep state" rule - unusual, but
 	 * possible configuration), establish one before translation.  Note
@@ -511,7 +515,7 @@
 	 * stream depends on other, stateless filtering rules.
 	 */
 	if (se == NULL) {
-		nse = npf_session_establish(npc, NULL, di);
+		nse = npf_session_establish(npc, nbuf, NULL, di);
 		if (nse == NULL) {
 			error = ENOMEM;
 			goto out;
@@ -533,7 +537,7 @@
 		 *
 		 * Note: packet now has a translated address in the cache.
 		 */
-		natse = npf_session_establish(npc, nt, di);
+		natse = npf_session_establish(npc, nbuf, nt, di);
 		if (natse == NULL) {
 			error = ENOMEM;
 			goto out;
@@ -562,13 +566,16 @@
  * npf_nat_getorig: return original IP address and port from translation entry.
  */
 void
-npf_nat_getorig(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
+npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
 {
 
-	*addr = nt->nt_oaddr;
+	*addr = &nt->nt_oaddr;
 	*port = nt->nt_oport;
 }
 
+/*
+ * npf_nat_setalg: associate an ALG with the NAT entry.
+ */
 void
 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
 {
@@ -606,13 +613,13 @@
 	}
 	LIST_FOREACH(np, &nat_policy_list, n_entry) {
 skip:
-		ip.s_addr = np->n_taddr;
-		printf("\tNAT policy: type = %d, flags = %d, taddr = %s\n",
-		    np->n_type, np->n_flags, inet_ntoa(ip));
+		memcpy(&ip, &np->n_taddr, sizeof(ip));
+		printf("\tNAT policy: type %d, flags 0x%x, taddr %s, tport = %d\n",
+		    np->n_type, np->n_flags, inet_ntoa(ip), np->n_tport);
 		if (nt == NULL) {
 			continue;
 		}
-		ip.s_addr = nt->nt_oaddr;
+		memcpy(&ip, &nt->nt_oaddr, sizeof(ip));
 		printf("\tNAT: original address %s, oport %d, tport = %d\n",
 		    inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
 		if (nt->nt_alg) {
--- a/sys/net/npf/npf_ncode.h	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_ncode.h	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ncode.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_ncode.h,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -41,12 +41,17 @@
 
 #include "npf.h"
 
-/* N-code processing, validation & building. */
+#if defined(_KERNEL) || defined(_NPF_TESTING)
+/*
+ * N-code processing, validation & building.
+ */
+void *	npf_ncode_alloc(size_t);
+void	npf_ncode_free(void *, size_t);
+
 int	npf_ncode_process(npf_cache_t *, const void *, nbuf_t *, const int);
 int	npf_ncode_validate(const void *, size_t, int *);
 
-void *	npf_ncode_alloc(size_t);
-void	npf_ncode_free(void *, size_t);
+#endif
 
 /* Error codes. */
 #define	NPF_ERR_OPCODE		-1	/* Invalid instruction. */
--- a/sys/net/npf/npf_processor.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_processor.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_processor.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -53,10 +53,8 @@
  *   maintenance in npf_ncode_process() and nc_insn_check() might be avoided.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
-#endif
+__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
--- a/sys/net/npf/npf_ruleset.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_ruleset.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -39,11 +39,10 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
-#endif
 
 #include <sys/atomic.h>
 #include <sys/kmem.h>
@@ -52,22 +51,23 @@
 #include <sys/rwlock.h>
 #include <sys/types.h>
 
+#include <net/pfil.h>
 #include <net/if.h>
-#include <net/pfil.h>
+#endif
 
 #include "npf_ncode.h"
 #include "npf_impl.h"
 
 struct npf_hook {
-	void				(*hk_fn)(const npf_cache_t *, void *);
-	void *				hk_arg;
-	LIST_ENTRY(npf_hook)		hk_entry;
+	void			(*hk_fn)(npf_cache_t *, nbuf_t *, void *);
+	void *			hk_arg;
+	LIST_ENTRY(npf_hook)	hk_entry;
 };
 
 struct npf_ruleset {
-	TAILQ_HEAD(, npf_rule)		rs_queue;
-	npf_rule_t *			rs_default;
-	int				_reserved;
+	TAILQ_HEAD(, npf_rule)	rs_queue;
+	npf_rule_t *		rs_default;
+	int			_reserved;
 };
 
 /* Rule structure. */
@@ -83,11 +83,15 @@
 	void *				r_ncode;
 	size_t				r_nc_size;
 	/* Attributes of this rule. */
-	int				r_attr;
+	uint32_t			r_attr;
 	/* Interface. */
 	u_int				r_ifid;
 	/* Hit counter. */
 	u_long				r_hitcount;
+	/* Normalization options (XXX - abstract). */
+	bool				rl_rnd_ipid;
+	u_int				rl_minttl;
+	u_int				rl_maxmss;
 	/* List of hooks to process on match. */
 	LIST_HEAD(, npf_hook)		r_hooks;
 };
@@ -187,6 +191,7 @@
 	 */
 	rw_enter(&ruleset_lock, RW_WRITER);
 	oldrlset = atomic_swap_ptr(&ruleset, nrlset);
+	KASSERT(oldrlset != NULL);
 
 	/*
 	 * Setup a new tableset.  It will lock the global tableset lock,
@@ -205,7 +210,8 @@
  * npf_rule_alloc: allocate a rule and copy ncode from user-space.
  */
 npf_rule_t *
-npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz)
+npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz,
+    bool rnd_ipid, int minttl, int maxmss)
 {
 	npf_rule_t *rl;
 	int errat;
@@ -228,6 +234,11 @@
 	rl->r_nc_size = sz;
 	rl->r_hitcount = 0;
 	rl->r_nat = NULL;
+
+	rl->rl_rnd_ipid = rnd_ipid;
+	rl->rl_minttl = minttl;
+	rl->rl_maxmss = maxmss;
+
 	return rl;
 }
 
@@ -296,6 +307,8 @@
 void
 npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
 {
+
+	KASSERT(rl->r_nat == NULL);
 	rl->r_nat = np;
 }
 
@@ -304,7 +317,7 @@
  */
 npf_hook_t *
 npf_hook_register(npf_rule_t *rl,
-    void (*fn)(const npf_cache_t *, void *), void *arg)
+    void (*fn)(npf_cache_t *, nbuf_t *, void *), void *arg)
 {
 	npf_hook_t *hk;
 
@@ -420,7 +433,7 @@
  * => Releases the ruleset lock.
  */
 int
-npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl,
+npf_rule_apply(npf_cache_t *npc, nbuf_t *nbuf, npf_rule_t *rl,
     bool *keepstate, int *retfl)
 {
 	npf_hook_t *hk;
@@ -443,11 +456,17 @@
 	/* Passing.  Run the hooks. */
 	LIST_FOREACH(hk, &rl->r_hooks, hk_entry) {
 		KASSERT(hk->hk_fn != NULL);
-		(*hk->hk_fn)(npc, hk->hk_arg);
+		(*hk->hk_fn)(npc, nbuf, hk->hk_arg);
 	}
+
+	/* Normalize the packet, if required. */
+	if (rl->r_attr & NPF_RULE_NORMALIZE) {
+		(void)npf_normalize(npc, nbuf,
+		    rl->rl_rnd_ipid, rl->rl_minttl, rl->rl_maxmss);
+	}
+
 	*keepstate = (rl->r_attr & NPF_RULE_KEEPSTATE) != 0;
 	rw_exit(&ruleset_lock);
-
 	return 0;
 }
 
--- a/sys/net/npf/npf_sendpkt.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_sendpkt.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_sendpkt.c,v 1.2 2010/09/25 00:25:31 rmind Exp $	*/
+/*	$NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.2 2010/09/25 00:25:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -54,60 +54,25 @@
 #define	DEFAULT_IP_TTL		(ip_defttl)
 
 /*
- * npf_fetch_seqack: fetch TCP data length, SEQ and ACK numbers.
- *
- * NOTE: Returns in host byte-order.
- */
-static inline bool
-npf_fetch_seqack(nbuf_t *nbuf, npf_cache_t *npc,
-    tcp_seq *seq, tcp_seq *ack, size_t *tcpdlen)
-{
-	void *n_ptr = nbuf_dataptr(nbuf);
-	u_int offby;
-	tcp_seq seqack[2];
-	uint16_t iplen;
-	uint8_t toff;
-
-	/* Fetch total length of IP. */
-	offby = offsetof(struct ip, ip_len);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t), &iplen))
-		return false;
-
-	/* Fetch SEQ and ACK numbers. */
-	offby = (npc->npc_hlen - offby) + offsetof(struct tcphdr, th_seq);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(seqack), seqack))
-		return false;
-
-	/* Fetch TCP data offset (header length) value. */
-	offby = sizeof(seqack);
-	if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &toff))
-		return false;
-	toff >>= 4;
-
-	*seq = ntohl(seqack[0]);
-	*ack = ntohl(seqack[1]);
-	*tcpdlen = ntohs(iplen) - npc->npc_hlen - (toff << 2);
-	return true;
-}
-
-/*
  * npf_return_tcp: return a TCP reset (RST) packet.
  */
 static int
 npf_return_tcp(npf_cache_t *npc, nbuf_t *nbuf)
 {
 	struct mbuf *m;
-	struct ip *ip;
-	struct tcphdr *th;
+	struct ip *oip, *ip;
+	struct tcphdr *oth, *th;
 	tcp_seq seq, ack;
-	size_t tcpdlen, len;
+	int tcpdlen, len;
+	uint32_t win;
 
 	/* Fetch relevant data. */
-	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS | NPC_PORTS) ||
-	    !npf_fetch_seqack(nbuf, npc, &seq, &ack, &tcpdlen)) {
-		return EBADMSG;
-	}
-	if (npc->npc_tcp_flags & TH_RST) {
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
+	tcpdlen = npf_tcpsaw(npc, &seq, &ack, &win);
+	oip = &npc->npc_ip.v4;
+	oth = &npc->npc_l4.tcp;
+
+	if (oth->th_flags & TH_RST) {
 		return 0;
 	}
 
@@ -129,16 +94,16 @@
 	 * Note: IP length contains TCP header length.
 	 */
 	ip->ip_p = IPPROTO_TCP;
-	ip->ip_src.s_addr = npc->npc_dstip;
-	ip->ip_dst.s_addr = npc->npc_srcip;
+	ip->ip_src.s_addr = oip->ip_dst.s_addr;
+	ip->ip_dst.s_addr = oip->ip_src.s_addr;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 
 	/* Construct TCP header and compute the checksum. */
 	th = (struct tcphdr *)(ip + 1);
-	th->th_sport = npc->npc_dport;
-	th->th_dport = npc->npc_sport;
+	th->th_sport = oth->th_dport;
+	th->th_dport = oth->th_sport;
 	th->th_seq = htonl(ack);
-	if (npc->npc_tcp_flags & TH_SYN) {
+	if (oth->th_flags & TH_SYN) {
 		tcpdlen++;
 	}
 	th->th_ack = htonl(seq + tcpdlen);
@@ -151,7 +116,6 @@
 	ip->ip_hl = sizeof(struct ip) >> 2;
 	ip->ip_tos = IPTOS_LOWDELAY;
 	ip->ip_len = htons(len);
-	ip->ip_off = htons(IP_DF);
 	ip->ip_ttl = DEFAULT_IP_TTL;
 
 	/* Pass to IP layer. */
@@ -177,20 +141,23 @@
 npf_return_block(npf_cache_t *npc, nbuf_t *nbuf, const int retfl)
 {
 	void *n_ptr = nbuf_dataptr(nbuf);
-	const int proto = npc->npc_proto;
 
-	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
-		return;
-	if ((proto == IPPROTO_TCP && (retfl & NPF_RULE_RETRST) == 0) ||
-	    (proto == IPPROTO_UDP && (retfl & NPF_RULE_RETICMP) == 0)) {
+	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
 		return;
 	}
-	switch (proto) {
+	switch (npf_cache_ipproto(npc)) {
 	case IPPROTO_TCP:
-		(void)npf_return_tcp(npc, nbuf);
+		if (retfl & NPF_RULE_RETRST) {
+			if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
+				return;
+			}
+			(void)npf_return_tcp(npc, nbuf);
+		}
 		break;
 	case IPPROTO_UDP:
-		(void)npf_return_icmp(nbuf);
+		if (retfl & NPF_RULE_RETICMP) {
+			(void)npf_return_icmp(nbuf);
+		}
 		break;
 	}
 }
--- a/sys/net/npf/npf_session.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_session.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_session.c,v 1.4 2010/10/03 19:36:38 rmind Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.5 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -65,8 +65,10 @@
  *
  *	Often NAT policies have overlapping stateful filtering rules.  In
  *	order to avoid unnecessary lookups, "pass" session can be linked
- *	with a "NAT" session (npf_session_t::s_nat_se pointer).  Such link
- *	is used to detect translation on "forwards" stream.
+ *	with a "NAT" session (npf_session_t::s_linked pointer).  Such link
+ *	is used to detect translation on "forwards" stream.  "NAT" session
+ *	also contains the link back to the "pass" session, therefore, both
+ *	sessions point to each other.
  *
  *	Additional reference is held on linked "NAT" sessions to prevent
  *	them from destruction while linked.  Link is broken and reference
@@ -83,16 +85,15 @@
  * - Session monitoring via descriptor.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.4 2010/10/03 19:36:38 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.5 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
-#endif
+
 #include <sys/atomic.h>
 #include <sys/condvar.h>
 #include <sys/hash.h>
@@ -118,14 +119,15 @@
 	/* Session type.  Supported: TCP, UDP, ICMP. */
 	int				s_type;
 	int				s_direction;
-	uint16_t			s_state;
-	uint16_t			s_flags;
-	/* NAT data associated with this session (if any). */
+	int				s_flags;
+	npf_state_t			s_state;
+	/* NAT associated with this session (if any) and link. */
 	npf_nat_t *			s_nat;
-	npf_session_t *			s_nat_se;
+	npf_session_t *			s_linked;
 	/* Source and destination addresses. */
-	in_addr_t			s_src_addr;
-	in_addr_t			s_dst_addr;
+	npf_addr_t			s_src_addr;
+	npf_addr_t			s_dst_addr;
+	int				s_addr_sz;
 	/* Source and destination ports (TCP / UDP) or generic IDs. */
 	union {
 		in_port_t		port;
@@ -139,6 +141,8 @@
 	struct timespec 		s_atime;
 };
 
+#define	SE_PASSSING			0x01
+
 LIST_HEAD(npf_sesslist, npf_session);
 
 #define	SESS_HASH_BUCKETS		1024	/* XXX tune + make tunable */
@@ -162,28 +166,13 @@
 
 #define	SESS_GC_INTERVAL		5		/* 5 sec */
 
-/* Session expiration table.  XXX: TCP close: 2 * tcp_msl (e.g. 120)?  Maybe. */
-static const u_int sess_expire_table[ ] = {
-	[IPPROTO_TCP]		= 600,		/* 10 min */
-	[IPPROTO_UDP]		= 300,		/*  5 min */
-	[IPPROTO_ICMP]		= 30		/*  1 min */
-};
-
-/* Session states and flags. */
-#define	SE_OPENING		1
-#define	SE_ACKNOWLEDGE		2
-#define	SE_ESTABLISHED		3
-#define	SE_CLOSING		4
-
-#define	SE_PASSSING		0x01
-
 static void	sess_tracking_stop(void);
 static void	npf_session_worker(void *);
 
 #ifdef SE_DEBUG
-#define	DPRINTF(x)	printf x
+#define	SEPRINTF(x)	printf x
 #else
-#define	DPRINTF(x)
+#define	SEPRINTF(x)
 #endif
 
 /*
@@ -229,39 +218,32 @@
 {
 	const npf_session_t * const se1 = n1;
 	const npf_session_t * const se2 = n2;
+	const npf_addr_t *se2_addr1, *se2_addr2;
+	uint32_t se2_id1, se2_id2;
+	int ret;
 
 	/*
 	 * Note: must compare equivalent streams.
 	 * See sess_rbtree_cmp_key() below.
 	 */
 	if (se1->s_direction == se2->s_direction) {
-		/*
-		 * Direction "forwards".
-		 */
-		if (se1->s_src.id != se2->s_src.id)
-			return (se1->s_src.id < se2->s_src.id) ? -1 : 1;
-		if (se1->s_dst.id != se2->s_dst.id)
-			return (se1->s_dst.id < se2->s_dst.id) ? -1 : 1;
-
-		if (__predict_false(se1->s_src_addr != se2->s_src_addr))
-			return (se1->s_src_addr < se2->s_src_addr) ? -1 : 1;
-		if (__predict_false(se1->s_dst_addr != se2->s_dst_addr))
-			return (se1->s_dst_addr < se2->s_dst_addr) ? -1 : 1;
+		/* Direction "forwards". */
+		se2_id1 = se2->s_src.id; se2_addr1 = &se2->s_src_addr;
+		se2_id2 = se2->s_dst.id; se2_addr2 = &se2->s_dst_addr;
 	} else {
-		/*
-		 * Direction "backwards".
-		 */
-		if (se1->s_src.id != se2->s_dst.id)
-			return (se1->s_src.id < se2->s_dst.id) ? -1 : 1;
-		if (se1->s_dst.id != se2->s_src.id)
-			return (se1->s_dst.id < se2->s_src.id) ? -1 : 1;
-
-		if (__predict_false(se1->s_src_addr != se2->s_dst_addr))
-			return (se1->s_src_addr < se2->s_dst_addr) ? -1 : 1;
-		if (__predict_false(se1->s_dst_addr != se2->s_src_addr))
-			return (se1->s_dst_addr < se2->s_src_addr) ? -1 : 1;
+		/* Direction "backwards". */
+		se2_id1 = se2->s_dst.id; se2_addr1 = &se2->s_dst_addr;
+		se2_id2 = se2->s_src.id; se2_addr2 = &se2->s_src_addr;
 	}
-	return 0;
+	if (se1->s_src.id != se2_id1)
+		return (se1->s_src.id < se2_id1) ? -1 : 1;
+	if (se1->s_dst.id != se2_id2)
+		return (se1->s_dst.id < se2_id2) ? -1 : 1;
+	if (se1->s_addr_sz != se2->s_addr_sz)
+		return (se1->s_addr_sz < se2->s_addr_sz) ? -1 : 1;
+	if ((ret = memcmp(&se1->s_src_addr, se2_addr1, se1->s_addr_sz)) != 0)
+		return ret;
+	return memcmp(&se1->s_dst_addr, se2_addr2, se1->s_addr_sz);
 }
 
 static signed int
@@ -269,33 +251,42 @@
 {
 	const npf_session_t * const se = n1;
 	const npf_cache_t * const npc = key;
+	const npf_addr_t *addr1, *addr2;
 	in_port_t sport, dport;
-	in_addr_t src, dst;
+	uint32_t id1, id2;
+	int ret;
 
-	if (se->s_direction == npc->npc_dir) {
+	if (npf_cache_ipproto(npc) == IPPROTO_TCP) {
+		const struct tcphdr *th = &npc->npc_l4.tcp;
+		sport = th->th_sport;
+		dport = th->th_dport;
+	} else {
+		const struct udphdr *uh = &npc->npc_l4.udp;
+		sport = uh->uh_sport;
+		dport = uh->uh_dport;
+	}
+	if (se->s_direction == npc->npc_di) {
 		/* Direction "forwards". */
-		src = npc->npc_srcip; sport = npc->npc_sport;
-		dst = npc->npc_dstip; dport = npc->npc_dport;
+		addr1 = npc->npc_srcip; id1 = sport;
+		addr2 = npc->npc_dstip; id2 = dport;
 	} else {
 		/* Direction "backwards". */
-		src = npc->npc_dstip; sport = npc->npc_dport;
-		dst = npc->npc_srcip; dport = npc->npc_sport;
+		addr1 = npc->npc_dstip; id1 = dport;
+		addr2 = npc->npc_srcip; id2 = sport;
 	}
 
 	/* Ports are the main criteria and are first. */
-	if (se->s_src.id != sport)
-		return (se->s_src.id < sport) ? -1 : 1;
-
-	if (se->s_dst.id != dport)
-		return (se->s_dst.id < dport) ? -1 : 1;
+	if (se->s_src.id != id1)
+		return (se->s_src.id < id1) ? -1 : 1;
+	if (se->s_dst.id != id2)
+		return (se->s_dst.id < id2) ? -1 : 1;
 
 	/* Note that hash should minimise differentiation on these. */
-	if (__predict_false(se->s_src_addr != src))
-		return (se->s_src_addr < src) ? -1 : 1;
-	if (__predict_false(se->s_dst_addr < dst))
-		return (se->s_dst_addr < dst) ? -1 : 1;
-
-	return 0;
+	if (se->s_addr_sz != npc->npc_ipsz)
+		return (se->s_addr_sz < npc->npc_ipsz) ? -1 : 1;
+	if ((ret = memcmp(&se->s_src_addr, addr1, se->s_addr_sz)) != 0)
+		return ret;
+	return memcmp(&se->s_dst_addr, addr2, se->s_addr_sz);
 }
 
 static const rb_tree_ops_t sess_rbtree_ops = {
@@ -308,12 +299,12 @@
 static inline npf_sess_hash_t *
 sess_hash_bucket(const npf_cache_t *key)
 {
-	uint32_t hash, mix;
+	uint32_t hash, mix = npf_cache_ipproto(key);
 
-	KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS));
+	KASSERT(npf_iscached(key, NPC_IP46));
 
-	/* Sum addresses for both directions and mix in protocol. */
-	mix = key->npc_srcip + key->npc_dstip + key->npc_proto;
+	/* Sum protocol and both addresses (for both directions). */
+	mix += npf_addr_sum(key->npc_ipsz, key->npc_srcip, key->npc_dstip);
 	hash = hash32_buf(&mix, sizeof(uint32_t), HASH32_BUF_INIT);
 	return &sess_hashtbl[hash & SESS_HASH_MASK];
 }
@@ -428,58 +419,12 @@
 }
 
 /*
- * npf_session_pstate: handle session state according to protocol data.
- */
-static inline bool
-npf_session_pstate(const npf_cache_t *npc, npf_session_t *se, const int dir)
-{
-	const bool backwards = (se->s_direction != dir);
-	const int proto = npc->npc_proto;
-
-	if (proto != IPPROTO_TCP) {
-		/* Handle UDP or ICMP response for opening session. */
-		if (se->s_state == SE_OPENING && backwards) {
-			se->s_state = SE_ESTABLISHED;
-		}
-		return true;
-	}
-
-	const int tcpfl = npc->npc_tcp_flags & (TH_SYN|TH_ACK|TH_RST|TH_FIN);
-
-	switch (tcpfl) {
-	case TH_ACK:
-		/* Common case. */
-		if (__predict_true(se->s_state == SE_ESTABLISHED)) {
-			return true;
-		}
-		/* ACK seen after SYN-ACK: session fully established. */
-		if (se->s_state == SE_ACKNOWLEDGE && !backwards) {
-			se->s_state = SE_ESTABLISHED;
-		}
-		break;
-	case TH_SYN | TH_ACK:
-		/* SYN-ACK seen, wait for ACK. */
-		if (se->s_state == SE_OPENING && backwards) {
-			se->s_state = SE_ACKNOWLEDGE;
-		}
-		break;
-	case TH_RST:
-	case TH_FIN:
-		/* XXX/TODO: Handle TCP reset attacks; later. */
-		se->s_state = SE_CLOSING;
-		break;
-	}
-	return true;
-}
-
-/*
  * npf_session_inspect: look if there is an established session (connection).
  *
  * => If found, we will hold a reference for caller.
  */
 npf_session_t *
-npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
-    struct ifnet *ifp, const int di)
+npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di)
 {
 	npf_sess_hash_t *sh;
 	npf_session_t *se;
@@ -488,8 +433,8 @@
 	if (!sess_tracking || !npf_cache_all(npc, nbuf)) {
 		return NULL;
 	}
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
-	KASSERT(npf_iscached(npc, NPC_PORTS) || npf_iscached(npc, NPC_ICMP));
+	KASSERT(npf_iscached(npc, NPC_IP46));
+	KASSERT(npf_iscached(npc, NPC_LAYER4));
 
 	/*
 	 * Execute ALG session helpers.
@@ -503,7 +448,7 @@
 		/* Default: original packet, pass its cache. */
 		key = npc;
 	}
-	key->npc_dir = di;
+	key->npc_di = di;
 
 	/*
 	 * Get a hash bucket from the cached key data.
@@ -523,7 +468,17 @@
 	}
 
 	/* Inspect the protocol data and handle state changes. */
-	if (npf_session_pstate(npc, se, di)) {
+	const bool forw = (se->s_direction == di);
+	npf_state_t *nst;
+
+	if (se->s_nat) {
+		npf_session_t *lse = se->s_linked;
+		nst = &lse->s_state;
+	} else {
+		nst = &se->s_state;
+	}
+
+	if (npf_state_inspect(npc, nbuf, nst, forw)) {
 		/* Must update the last activity time. */
 		getnanouptime(&se->s_atime);
 		/* Hold a reference. */
@@ -542,14 +497,20 @@
  * => Sessions is created with the held reference (for caller).
  */
 npf_session_t *
-npf_session_establish(const npf_cache_t *npc, npf_nat_t *nt, const int di)
+npf_session_establish(const npf_cache_t *npc, nbuf_t *nbuf,
+    npf_nat_t *nt, const int di)
 {
+	const struct tcphdr *th;
+	const struct udphdr *uh;
 	npf_sess_hash_t *sh;
 	npf_session_t *se;
+	int proto, sz;
 	bool ok;
 
-	if (!sess_tracking)	/* XXX */
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
+	if (!sess_tracking) {	/* XXX */
 		return NULL;
+	}
 
 	/* Allocate and initialise new state. */
 	se = pool_cache_get(sess_cache, PR_NOWAIT);
@@ -563,53 +524,68 @@
 
 	/* NAT and backwards session. */
 	se->s_nat = nt;
-	se->s_nat_se = NULL;
+	se->s_linked = NULL;
 
 	/* Unique IDs: IP addresses. */
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
-	se->s_src_addr = npc->npc_srcip;
-	se->s_dst_addr = npc->npc_dstip;
+	KASSERT(npf_iscached(npc, NPC_IP46));
+	sz = npc->npc_ipsz;
+	memcpy(&se->s_src_addr, npc->npc_srcip, sz);
+	memcpy(&se->s_dst_addr, npc->npc_dstip, sz);
+	se->s_addr_sz = sz;
 
 	/* Procotol. */
-	se->s_type = npc->npc_proto;
+	proto = npf_cache_ipproto(npc);
+	se->s_type = proto;
 
-	switch (npc->npc_proto) {
+	switch (proto) {
 	case IPPROTO_TCP:
+		KASSERT(npf_iscached(npc, NPC_TCP));
+		th = &npc->npc_l4.tcp;
+		/* Additional IDs: ports. */
+		se->s_src.id = th->th_sport;
+		se->s_dst.id = th->th_dport;
+		break;
 	case IPPROTO_UDP:
-		KASSERT(npf_iscached(npc, NPC_PORTS));
+		KASSERT(npf_iscached(npc, NPC_UDP));
 		/* Additional IDs: ports. */
-		se->s_src.id = npc->npc_sport;
-		se->s_dst.id = npc->npc_dport;
+		uh = &npc->npc_l4.udp;
+		se->s_src.id = uh->uh_sport;
+		se->s_dst.id = uh->uh_dport;
 		break;
 	case IPPROTO_ICMP:
 		if (npf_iscached(npc, NPC_ICMP_ID)) {
 			/* ICMP query ID. (XXX) */
-			se->s_src.id = npc->npc_icmp_id;
-			se->s_dst.id = npc->npc_icmp_id;
+			const struct icmp *ic = &npc->npc_l4.icmp;
+			se->s_src.id = ic->icmp_id;
+			se->s_dst.id = ic->icmp_id;
 			break;
 		}
 		/* FALLTHROUGH */
 	default:
 		/* Unsupported. */
-		pool_cache_put(sess_cache, se);
-		return NULL;
+		ok = false;
+		goto out;
 	}
 
+	/* Initialize protocol state, but not for NAT sessions. */
+	if (nt == NULL && !npf_state_init(npc, nbuf, &se->s_state)) {
+		ok = false;
+		goto out;
+	}
 	/* Set last activity time for a new session. */
-	se->s_state = SE_OPENING;
 	getnanouptime(&se->s_atime);
 
 	/* Find the hash bucket and insert the state into the tree. */
 	sh = sess_hash_bucket(npc);
 	rw_enter(&sh->sh_lock, RW_WRITER);
-	ok = rb_tree_insert_node(&sh->sh_tree, se) == se;
+	ok = (rb_tree_insert_node(&sh->sh_tree, se) == se);
 	if (__predict_true(ok)) {
 		sh->sh_count++;
-		DPRINTF(("NPF: new se %p (link %p, nat %p)\n",
-		    se, se->s_nat_se, se->s_nat));
+		SEPRINTF(("NPF: new se %p (link %p, nat %p)\n",
+		    se, se->s_linked, se->s_nat));
 	}
 	rw_exit(&sh->sh_lock);
-
+out:
 	if (__predict_false(!ok)) {
 		/* Race with duplicate packet. */
 		pool_cache_put(sess_cache, se);
@@ -630,19 +606,15 @@
 }
 
 /*
- * npf_session_setpass: mark session as a "pass" one, also mark the
- * linked session if there is one.
+ * npf_session_setpass: mark session as a "pass" one.
  */
 void
 npf_session_setpass(npf_session_t *se)
 {
 
 	KASSERT(se->s_refcnt > 0);
+	KASSERT(se->s_linked == NULL);
 	se->s_flags |= SE_PASSSING;		/* XXXSMP */
-	if (se->s_nat_se) {
-		se = se->s_nat_se;
-		se->s_flags |= SE_PASSSING;	/* XXXSMP */
-	}
 }
 
 /*
@@ -665,13 +637,17 @@
 npf_session_link(npf_session_t *se, npf_session_t *natse)
 {
 
-	/* Hold a reference on the session we link.  Inherit the flags. */
+	/* Hold a reference on the "NAT" session.  Inherit the flags. */
+	KASSERT(se->s_nat == NULL && natse->s_nat != NULL);
 	KASSERT(se->s_refcnt > 0 && natse->s_refcnt > 0);
 	atomic_inc_uint(&natse->s_refcnt);
 	natse->s_flags = se->s_flags;
 
-	KASSERT(se->s_nat_se == NULL);
-	se->s_nat_se = natse;
+	/* Link both sessions (point to each other). */
+	KASSERT(se->s_linked == NULL && natse->s_linked == NULL);
+	se->s_linked = natse;
+	natse->s_linked = se;
+	SEPRINTF(("NPF: linked se %p -> %p\n", se, se->s_linked));
 }
 
 /*
@@ -683,9 +659,12 @@
 {
 
 	KASSERT(se->s_refcnt > 0);
+	if (se->s_linked == NULL) {
+		return NULL;
+	}
 	*forw = (se->s_direction == di);
-	if (se->s_nat_se) {
-		se = se->s_nat_se;
+	if (se->s_nat == NULL) {
+		se = se->s_linked;
 		KASSERT(se->s_refcnt > 0);
 	}
 	return se->s_nat;
@@ -697,21 +676,9 @@
 static inline bool
 npf_session_expired(const npf_session_t *se, const struct timespec *tsnow)
 {
+	const int etime = npf_state_etime(&se->s_state, se->s_type);
 	struct timespec tsdiff;
-	int etime = 0;
 
-	switch (se->s_state) {
-	case SE_ESTABLISHED:
-		etime = sess_expire_table[se->s_type];
-		break;
-	case SE_OPENING:
-	case SE_ACKNOWLEDGE:
-	case SE_CLOSING:
-		etime = 10;	/* XXX: figure out reasonable time */
-		break;
-	default:
-		KASSERT(false);
-	}
 	timespecsub(tsnow, &se->s_atime, &tsdiff);
 	return (tsdiff.tv_sec > etime);
 }
@@ -752,13 +719,19 @@
 			LIST_INSERT_HEAD(gc_list, se, se_entry.gclist);
 			sh->sh_count--;
 
-			/* If linked, drop the reference. */
-			DPRINTF(("NPF: se %p expired\n", se));
-			if (se->s_nat_se) {
-				npf_session_release(se->s_nat_se);
-				DPRINTF(("NPF: se %p unlinked %p\n",
-				    se, se->s_nat_se));
-				se->s_nat_se = NULL;
+			/*
+			 * If there is a link and it is a "pass" session,
+			 * then drop the reference and unlink.
+			 */
+			SEPRINTF(("NPF: se %p expired\n", se));
+			if (se->s_linked && se->s_nat == NULL) {
+				npf_session_t *natse = se->s_linked;
+
+				SEPRINTF(("NPF: se %p unlinked %p\n",
+				    se, se->s_linked));
+				natse->s_linked = NULL;
+				npf_session_release(natse);
+				se->s_linked = NULL;
 			}
 			se = nse;
 		}
@@ -785,8 +758,11 @@
 			if (se->s_nat) {
 				/* Release any NAT related structures. */
 				npf_nat_expire(se->s_nat);
+			} else {
+				/* Destroy the state. */
+				npf_state_destroy(&se->s_state);
 			}
-			DPRINTF(("NPF: se %p destroyed\n", se));
+			SEPRINTF(("NPF: se %p destroyed\n", se));
 			pool_cache_put(sess_cache, se);
 		}
 		se = nse;
@@ -853,7 +829,7 @@
 		sh = &sess_hashtbl[i];
 		if (sh->sh_count == 0) {
 			KASSERT(rb_tree_iterate(&sh->sh_tree,
-			    NULL, RB_DIR_RIGHT) == NULL);
+			    NULL, RB_DIR_LEFT) == NULL);
 			continue;
 		}
 		printf("s_bucket %d (count = %d)\n", i, sh->sh_count);
@@ -863,20 +839,20 @@
 			int etime;
 
 			timespecsub(&tsnow, &se->s_atime, &tsdiff);
-			etime = (se->s_state == SE_ESTABLISHED) ?
-			    sess_expire_table[se->s_type] : 10;
+			etime = npf_state_etime(&se->s_state, se->s_type);
 
 			printf("\t%p: type(%d) di %d, pass %d, tsdiff %d, "
 			    "etime %d\n", se, se->s_type, se->s_direction,
 			    se->s_flags, (int)tsdiff.tv_sec, etime);
-			ip.s_addr = se->s_src_addr;
+			memcpy(&ip, &se->s_src_addr, sizeof(ip));
 			printf("\tsrc (%s, %d) ",
 			    inet_ntoa(ip), ntohs(se->s_src.port));
-			ip.s_addr = se->s_dst_addr;
-			printf("dst (%s, %d)\n", 
+			memcpy(&ip, &se->s_dst_addr, sizeof(ip));
+			printf("dst (%s, %d)\n",
 			    inet_ntoa(ip), ntohs(se->s_dst.port));
-			if (se->s_nat_se != NULL) {
-				printf("\tlinked with %p\n", se->s_nat_se);
+			npf_state_dump(&se->s_state);
+			if (se->s_linked != NULL) {
+				printf("\tlinked with %p\n", se->s_linked);
 			}
 			if (se->s_nat != NULL) {
 				npf_nat_dump(se->s_nat);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_state.c	Thu Nov 11 06:30:39 2010 +0000
@@ -0,0 +1,316 @@
+/*	$NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF state engine to track connections.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <sys/mutex.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+
+#include "npf_impl.h"
+
+#define	MAXACKWINDOW		66000
+
+/* Session expiration table.  XXX revisit later */
+static const u_int expire_table[ ] = {
+	[IPPROTO_TCP]		= 86400,	/* 24 hours */
+	[IPPROTO_UDP]		= 120,		/* 2 min */
+	[IPPROTO_ICMP]		= 30		/* 1 min */
+};
+
+static bool
+npf_tcp_inwindow(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
+    const bool forw)
+{
+	const struct tcphdr *th = &npc->npc_l4.tcp;
+	const int tcpfl = th->th_flags;
+	npf_tcpstate_t *fstate, *tstate;
+	int tcpdlen, wscale, ackskew;
+	tcp_seq seq, ack, end;
+	uint32_t win;
+
+	KASSERT(npf_iscached(npc, NPC_TCP));
+	tcpdlen = npf_tcpsaw(__UNCONST(npc), &seq, &ack, &win);
+	end = seq + tcpdlen;
+	if (tcpfl & TH_SYN) {
+		end++;
+	}
+	if (tcpfl & TH_FIN) {
+		end++;
+	}
+
+	/*
+	 * Perform SEQ/ACK numbers check against boundaries.  Reference:
+	 *
+	 *	Rooij G., "Real stateful TCP packet filtering in IP Filter",
+	 *	10th USENIX Security Symposium invited talk, Aug. 2001.
+	 */
+
+	fstate = &nst->nst_tcpst[forw ? 0 : 1];
+	tstate = &nst->nst_tcpst[forw ? 1 : 0];
+	win = win ? (win << fstate->nst_wscale) : 1;
+
+	if (tcpfl == TH_SYN) {
+		/*
+		 * First SYN or re-transmission of SYN.  Initialize all
+		 * values.  State of other side will get set with a SYN-ACK
+		 * reply (see below).
+		 */
+		fstate->nst_seqend = end;
+		fstate->nst_ackend = end;
+		fstate->nst_maxwin = win;
+		tstate->nst_ackend = 0;
+		tstate->nst_ackend = 0;
+		tstate->nst_maxwin = 0;
+		/*
+		 * Handle TCP Window Scaling (RFC 1323).  Both sides may
+		 * send this option in their SYN packets.
+		 */
+		if (npf_fetch_tcpopts(npc, nbuf, NULL, &wscale)) {
+			fstate->nst_wscale = wscale;
+		} else {
+			fstate->nst_wscale = 0;
+		}
+		tstate->nst_wscale = 0;
+		/* Done. */
+		return true;
+	}
+	if (fstate->nst_seqend == 0) {
+		/*
+		 * Should be a SYN-ACK reply to SYN.  If SYN is not set,
+		 * then we are in the middle connection and lost tracking.
+		 */
+		fstate->nst_seqend = end;
+		fstate->nst_ackend = end + 1;
+		fstate->nst_maxwin = 1;
+
+		/* Handle TCP Window Scaling (must be ignored if no SYN). */
+		if (tcpfl & TH_SYN) {
+			fstate->nst_wscale =
+			    npf_fetch_tcpopts(npc, nbuf, NULL, &wscale) ?
+			    wscale : 0;
+		}
+	}
+	if ((tcpfl & TH_ACK) == 0) {
+		/* Pretend that an ACK was sent. */
+		ack = tstate->nst_seqend;
+	} else if ((tcpfl & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST) && ack == 0) {
+		/* Workaround for some TCP stacks. */
+		ack = tstate->nst_seqend;
+	}
+	if (seq == end) {
+		/* If packet contains no data - assume it is valid. */
+		end = fstate->nst_seqend;
+		seq = end;
+	}
+
+	/*
+	 * Determine whether the data is within previously noted window,
+	 * that is, upper boundary for valid data (I).
+	 */
+	if (!SEQ_GEQ(fstate->nst_ackend, end)) {
+		return false;
+	}
+	/* Lower boundary (II), which is no more than one window back. */
+	if (!SEQ_GEQ(seq, fstate->nst_seqend - tstate->nst_maxwin)) {
+		return false;
+	}
+	/*
+	 * Boundaries for valid acknowledgments (III, IV) - on predicted
+	 * window up or down, since packets may be fragmented.
+	 */
+	ackskew = tstate->nst_seqend - ack;
+	if (ackskew < -MAXACKWINDOW || ackskew > MAXACKWINDOW) {
+		return false;
+	}
+
+	/*
+	 * Negative ackskew might be due to fragmented packets.  Since the
+	 * total length of the packet is unknown - bump the boundary.
+	 */
+	if (ackskew < 0) {
+		tstate->nst_seqend = end;
+	}
+	/* Keep track of the maximum window seen. */
+	if (fstate->nst_maxwin < win) {
+		fstate->nst_maxwin = win;
+	}
+	if (SEQ_GT(end, fstate->nst_seqend)) {
+		fstate->nst_seqend = end;
+	}
+	/* Note the window for upper boundary. */
+	if (SEQ_GEQ(ack + win, tstate->nst_ackend)) {
+		tstate->nst_ackend = ack + win;
+	}
+	return true;
+}
+
+static inline bool
+npf_state_tcp(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
+    const bool forw)
+{
+	const struct tcphdr *th = &npc->npc_l4.tcp;
+	const int tcpfl = th->th_flags;
+
+	/*
+	 * Handle 3-way handshake (SYN -> SYN,ACK -> ACK).
+	 */
+	switch (nst->nst_state) {
+	case ST_ESTABLISHED:
+		/* Common case - connection established. */
+		if (tcpfl & TH_ACK) {
+			/*
+			 * Data transmission.
+			 */
+		} else if (tcpfl & TH_FIN) {
+			/* XXX TODO */
+		}
+		break;
+	case ST_OPENING:
+		/* SYN has been sent, expecting SYN-ACK. */
+		if (tcpfl == (TH_SYN | TH_ACK) && !forw) {
+			/* Received backwards SYN-ACK. */
+			nst->nst_state = ST_ACKNOWLEDGE;
+		} else if (tcpfl == TH_SYN && forw) {
+			/* Re-transmission of SYN. */
+		} else {
+			return false;
+		}
+		break;
+	case ST_ACKNOWLEDGE:
+		/* SYN-ACK was seen, expecting ACK. */
+		if (tcpfl == TH_ACK && forw) {
+			nst->nst_state = ST_ESTABLISHED;
+		} else {
+			return false;
+		}
+		break;
+	case ST_CLOSING:
+		/* XXX TODO */
+		break;
+	default:
+		npf_state_dump(nst);
+		KASSERT(false);
+	}
+	return npf_tcp_inwindow(npc, nbuf, nst, forw);
+}
+
+bool
+npf_state_init(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst)
+{
+	const int proto = npf_cache_ipproto(npc);
+
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
+	if (proto == IPPROTO_TCP) {
+		const struct tcphdr *th = &npc->npc_l4.tcp;
+		/* TCP case: must be SYN. */
+		KASSERT(npf_iscached(npc, NPC_TCP));
+		if (th->th_flags != TH_SYN) {
+			return false;
+		}
+		/* Initial values for TCP window and sequence tracking. */
+		if (!npf_tcp_inwindow(npc, nbuf, nst, true)) {
+			return false;
+		}
+	}
+	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
+	nst->nst_state = ST_OPENING;
+	return true;
+}
+
+void
+npf_state_destroy(npf_state_t *nst)
+{
+
+	KASSERT(nst->nst_state != 0);
+	mutex_destroy(&nst->nst_lock);
+}
+
+bool
+npf_state_inspect(const npf_cache_t *npc, nbuf_t *nbuf,
+    npf_state_t *nst, const bool forw)
+{
+	const int proto = npf_cache_ipproto(npc);
+	bool ret;
+
+	mutex_enter(&nst->nst_lock);
+	switch (proto) {
+	case IPPROTO_TCP:
+		/* Handle TCP. */
+		ret = npf_state_tcp(npc, nbuf, nst, forw);
+		break;
+	default:
+		/* Handle UDP or ICMP response for opening session. */
+		if (nst->nst_state == ST_OPENING && !forw) {
+			nst->nst_state = ST_ESTABLISHED;
+		}
+		ret = true;
+	}
+	mutex_exit(&nst->nst_lock);
+	return ret;
+}
+
+int
+npf_state_etime(const npf_state_t *nst, const int proto)
+{
+
+	if (nst->nst_state == ST_ESTABLISHED) {
+		return expire_table[proto];
+	}
+	return 10;	/* XXX TODO */
+}
+
+#if defined(DDB) || defined(_NPF_TESTING)
+
+void
+npf_state_dump(npf_state_t *nst)
+{
+	npf_tcpstate_t *fst = &nst->nst_tcpst[0], *tst = &nst->nst_tcpst[1];
+
+	printf("\tstate (%p) %d:\n\t\t"
+	    "F { seqend %u ackend %u mwin %u wscale %u }\n\t\t"
+	    "T { seqend %u, ackend %u mwin %u wscale %u }\n",
+	    nst, nst->nst_state,
+	    fst->nst_seqend, fst->nst_ackend, fst->nst_maxwin, fst->nst_wscale,
+	    tst->nst_seqend, tst->nst_ackend, tst->nst_maxwin, tst->nst_wscale
+	);
+}
+
+#endif
--- a/sys/net/npf/npf_tableset.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/sys/net/npf/npf_tableset.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $	*/
+/*	$NetBSD: npf_tableset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -41,10 +41,8 @@
  * - Dynamic array resize.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $");
-#endif
+__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
--- a/usr.sbin/npf/npfctl/npf_data.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_data.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_data.c,v 1.3 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_data.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -32,6 +32,9 @@
  * XXX: Needs some clean-up.
  */
 
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: npf_data.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
+
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
@@ -389,7 +392,8 @@
 }
 
 void
-npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface)
+npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface,
+    bool ipid_rnd, int minttl, int maxmss)
 {
 	prop_number_t attrnum;
 
@@ -405,6 +409,14 @@
 		ifnum = prop_number_create_integer(if_idx);
 		prop_dictionary_set(rl, "interface", ifnum);
 	}
+	if (attr & NPF_RULE_NORMALIZE) {
+		prop_dictionary_set(rl, "randomize-id",
+		    prop_bool_create(ipid_rnd));
+		prop_dictionary_set(rl, "min-ttl",
+		    prop_number_create_integer(minttl));
+		prop_dictionary_set(rl, "max-mss",
+		    prop_number_create_integer(maxmss));
+	}
 }
 
 /*
@@ -649,6 +661,7 @@
 {
 	int attr = NPF_RULE_PASS | NPF_RULE_FINAL;
 	in_addr_t addr, mask;
+	void *addrptr;
 
 	/* Translation type and flags. */
 	prop_dictionary_set(rl, "type",
@@ -658,12 +671,15 @@
 
 	/* Interface and attributes. */
 	attr |= (type == NPF_NATOUT) ? NPF_RULE_OUT : NPF_RULE_IN;
-	npfctl_rule_setattr(rl, attr, iface);
+	npfctl_rule_setattr(rl, attr, iface, false, 0, 0);
 
 	/* Translation IP, XXX should be no mask. */
 	npfctl_parse_cidr(taddr, &addr, &mask);
-	prop_dictionary_set(rl, "translation_ip",
-	    prop_number_create_integer(addr));
+	addrptr = prop_data_create_data(&addr, sizeof(in_addr_t));
+	if (addrptr == NULL) {
+		err(EXIT_FAILURE, "prop_data_create_data");
+	}
+	prop_dictionary_set(rl, "translation-ip", addrptr);
 
 	/* Translation port (for redirect case). */
 	if (rport) {
@@ -676,7 +692,7 @@
 		if (range) {
 			errx(EXIT_FAILURE, "range is not supported for 'rdr'");
 		}
-		prop_dictionary_set(rl, "translation_port",
+		prop_dictionary_set(rl, "translation-port",
 		    prop_number_create_integer(port));
 	}
 }
--- a/usr.sbin/npf/npfctl/npf_ncgen.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_ncgen.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ncgen.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_ncgen.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -36,6 +36,9 @@
  * calculations, when changing generation routines.
  */
 
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: npf_ncgen.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
+
 #include <sys/types.h>
 
 #include "npfctl.h"
@@ -77,7 +80,6 @@
 }
 
 #if 0
-
 /*
  * npfctl_gennc_ether: initial n-code fragment to check Ethernet frame.
  */
@@ -111,7 +113,6 @@
 	/* + 13 words. */
 	*ncptr = (void *)nc;
 }
-
 #endif
 
 /*
--- a/usr.sbin/npf/npfctl/npf_parser.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_parser.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_parser.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npf_parser.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -30,6 +30,9 @@
  * XXX: This needs clean-up!
  */
 
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: npf_parser.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -140,6 +143,34 @@
 	return vr;
 }
 
+static inline int
+npfctl_parsenorm(char *buf, bool *rnd, int *minttl, int *maxmss)
+{
+	char *p = buf, *sptr;
+
+	DPRINTF(("norm\t|%s|\n", p));
+
+	p = strtok_r(buf, ", \t", &sptr);
+	if (p == NULL) {
+		return -1;
+	}
+	do {
+		if (strcmp(p, "random-id") == 0) {
+			*rnd = true;
+		} else if (strcmp(p, "min-ttl") == 0) {
+			p = strtok_r(NULL, ", \t", &sptr);
+			*minttl = atoi(p);
+		} else if (strcmp(p, "max-mss") == 0) {
+			p = strtok_r(NULL, ", \t", &sptr);
+			*maxmss = atoi(p);
+		} else {
+			return -1;
+		}
+	} while ((p = strtok_r(NULL, ", \t", &sptr)) != 0);
+
+	return 0;
+}
+
 /*
  * npfctl_parserule: main routine to parse a rule.  Syntax:
  *
@@ -154,10 +185,9 @@
 {
 	var_t *from_cidr = NULL, *fports = NULL;
 	var_t *to_cidr = NULL, *tports = NULL;
-	char *proto = NULL, *tcp_flags = NULL;
-	char *p, *sptr, *iface;
-	bool icmp = false, tcp = false;
-	int icmp_type = -1, icmp_code = -1;
+	char *p, *sptr, *iface, *proto = NULL, *tcp_flags = NULL;
+	int icmp_type = -1, icmp_code = -1, minttl = 0, maxmss = 0;
+	bool icmp = false, tcp = false, rnd = false;
 	int ret, attr = 0;
 
 	DPRINTF(("rule\t|%s|\n", buf));
@@ -337,10 +367,32 @@
 	if (p && strcmp(p, "keep") == 0) {
 		attr |= NPF_RULE_KEEPSTATE;
 		PARSE_NEXT_TOKEN();
+		if (p == NULL || strcmp(p, "state") != 0) {
+			return PARSE_ERR();
+		}
+		PARSE_NEXT_TOKEN_NOCHECK();
+	}
+
+	/* normalize ( .. ) */
+	if (p && strcmp(p, "normalize") == 0) {
+		p = strtok_r(NULL, "()", &sptr);
+		if (p == NULL) {
+			return PARSE_ERR();
+		}
+		if (npfctl_parsenorm(p, &rnd, &minttl, &maxmss)) {
+			return PARSE_ERR();
+		}
+		attr |= NPF_RULE_NORMALIZE;
+		PARSE_NEXT_TOKEN_NOCHECK();
+	}
+
+	/* Should have nothing more. */
+	if (p != NULL) {
+		return PARSE_ERR();
 	}
 
 	/* Set the rule attributes and interface, if any. */
-	npfctl_rule_setattr(rl, attr, iface);
+	npfctl_rule_setattr(rl, attr, iface, rnd, minttl, maxmss);
 
 	/*
 	 * Generate all protocol data.
@@ -386,7 +438,8 @@
 	if (strcmp(p, "default") == 0) {
 		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
 		npfctl_rule_setattr(rl,
-		    GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL);
+		    GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL,
+		    false, 0, 0);
 		return 0;
 	}
 
@@ -433,7 +486,7 @@
 		else
 			return -1;
 	}
-	npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface);
+	npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface, false, 0, 0);
 	return 0;
 }
 
--- a/usr.sbin/npf/npfctl/npfctl.c	Thu Nov 11 04:51:18 2010 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.c	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npfctl.c,v 1.1 2010/08/22 18:56:24 rmind Exp $	*/
+/*	$NetBSD: npfctl.c,v 1.2 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -29,6 +29,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: npfctl.c,v 1.2 2010/11/11 06:30:39 rmind Exp $");
+
 #include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -145,12 +148,6 @@
 	npf_ioctl_table_t tbl;
 	char *arg;
 
-#ifdef DEBUG
-	npfctl_init_data();
-	npfctl_parsecfg("npf.conf");
-	ret = npfctl_ioctl_send(fd);
-	return;
-#endif
 	fd = open(NPF_DEV_PATH, O_RDONLY);
 	if (fd == -1) {
 		err(EXIT_FAILURE, "cannot open " NPF_DEV_PATH);
@@ -218,6 +215,12 @@
 	}
 	cmd = argv[1];
 
+#ifdef DEBUG
+	npfctl_init_data();
+	npfctl_parsecfg("npf.conf");
+	return npfctl_ioctl_send(0);
+#endif
+
 	/* Find and call the subroutine */
 	for (n = 0; operations[n].cmd != NULL; n++) {
 		if (strcmp(cmd, operations[n].cmd) != 0)
--- a/usr.sbin/npf/npfctl/npfctl.h	Thu Nov 11 04:51:18 2010 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.h	Thu Nov 11 06:30:39 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npfctl.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
+/*	$NetBSD: npfctl.h,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -77,7 +77,8 @@
 
 prop_dictionary_t npfctl_mk_rule(bool);
 void		npfctl_add_rule(prop_dictionary_t, prop_dictionary_t);
-void		npfctl_rule_setattr(prop_dictionary_t, int, char *);
+void		npfctl_rule_setattr(prop_dictionary_t, int, char *,
+		    bool, int, int);
 void		npfctl_rule_protodata(prop_dictionary_t, char *, char *,
 		    int, int, var_t *, var_t *, var_t *, var_t *);
 void		npfctl_rule_icmpdata(prop_dictionary_t, var_t *, var_t *);