NPF checkpoint: trunk
authorrmind <rmind@NetBSD.org>
Tue, 18 Jan 2011 20:33:45 +0000
branchtrunk
changeset 196924 6ab7fbf8ee49
parent 196923 f61feb41542e
child 196925 a2a3da194316
NPF checkpoint: - Add the concept of rule procedure: separate normalization, logging and potentially other functions from the rule structure. Rule procedure can be shared amongst the rules. Separation is both at kernel level (npf_rproc_t) and configuration ("procedure" + "apply"). - Fix portmap sharing for NAT policy. - Update TCP state tracking logic. Use TCP FSM definitions. - Add if_byindex(), OK by matt@. Use in logging for the lookup. - Fix traceroute ALG and many other bugs; misc clean-up.
sys/net/if.c
sys/net/if.h
sys/net/npf/npf.c
sys/net/npf/npf.h
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_ctl.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_instr.c
sys/net/npf/npf_log.c
sys/net/npf/npf_mbuf.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_sendpkt.c
sys/net/npf/npf_session.c
sys/net/npf/npf_state.c
usr.sbin/npf/npfctl/npf.conf.5
usr.sbin/npf/npfctl/npf_data.c
usr.sbin/npf/npfctl/npf_parser.c
usr.sbin/npf/npfctl/npfctl.8
usr.sbin/npf/npfctl/npfctl.c
usr.sbin/npf/npfctl/npfctl.h
--- a/sys/net/if.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/if.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if.c,v 1.249 2010/11/15 22:42:36 pooka Exp $	*/
+/*	$NetBSD: if.c,v 1.250 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
@@ -90,7 +90,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.249 2010/11/15 22:42:36 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.250 2011/01/18 20:33:45 rmind Exp $");
 
 #include "opt_inet.h"
 
@@ -1477,6 +1477,13 @@
 	return NULL;
 }
 
+ifnet_t *
+if_byindex(u_int idx)
+{
+
+	return (idx < if_indexlim) ? ifindex2ifnet[idx] : NULL;
+}
+
 /* common */
 int
 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
--- a/sys/net/if.h	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/if.h	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if.h,v 1.148 2010/11/15 22:42:36 pooka Exp $	*/
+/*	$NetBSD: if.h,v 1.149 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
@@ -206,7 +206,7 @@
  */
 TAILQ_HEAD(ifnet_head, ifnet);		/* the actual queue head */
 
-struct ifnet {				/* and the entries */
+typedef struct ifnet {
 	void	*if_softc;		/* lower-level data for this if */
 	TAILQ_ENTRY(ifnet) if_list;	/* all struct ifnets are chained */
 	TAILQ_HEAD(, ifaddr) if_addrlist; /* linked list of addresses per if */
@@ -296,7 +296,8 @@
 					 * same, they are the same ifnet.
 					 */
 	struct sysctllog	*if_sysctl_log;
-};
+} ifnet_t;
+
 #define	if_mtu		if_data.ifi_mtu
 #define	if_type		if_data.ifi_type
 #define	if_addrlen	if_data.ifi_addrlen
@@ -897,6 +898,9 @@
 #endif /* _KERNEL */ /* XXX really ALTQ? */
 
 #ifdef _KERNEL
+
+ifnet_t *	if_byindex(u_int);
+
 /*
  * ifq sysctl support
  */
--- a/sys/net/npf/npf.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.c,v 1.2 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf.c,v 1.3 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.2 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.3 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -250,9 +250,9 @@
 npf_core_destroy(npf_core_t *nc)
 {
 
-	npf_tableset_destroy(nc->n_tables);
 	npf_ruleset_destroy(nc->n_rules);
 	npf_ruleset_destroy(nc->n_nat_rules);
+	npf_tableset_destroy(nc->n_tables);
 	kmem_free(nc, sizeof(npf_core_t));
 }
 
--- a/sys/net/npf/npf.h	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf.h	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf.h,v 1.5 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf.h,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -56,6 +56,7 @@
 struct npf_rule;
 struct npf_hook;
 
+typedef struct npf_rproc	npf_rproc_t;
 typedef struct npf_ruleset	npf_ruleset_t;
 typedef struct npf_rule		npf_rule_t;
 typedef struct npf_hook		npf_hook_t;
@@ -146,7 +147,7 @@
 int		nbuf_find_tag(nbuf_t *, uint32_t, void **);
 
 /* Ruleset interface. */
-npf_rule_t *	npf_rule_alloc(prop_dictionary_t, void *, size_t);
+npf_rule_t *	npf_rule_alloc(prop_dictionary_t, npf_rproc_t *, void *, size_t);
 void		npf_rule_free(npf_rule_t *);
 void		npf_activate_rule(npf_rule_t *);
 void		npf_deactivate_rule(npf_rule_t *);
@@ -162,16 +163,17 @@
 #define	NPF_RULE_DEFAULT		0x0002
 #define	NPF_RULE_FINAL			0x0004
 #define	NPF_RULE_KEEPSTATE		0x0008
-#define	NPF_RULE_COUNT			0x0010
-#define	NPF_RULE_LOG			0x0020
-#define	NPF_RULE_RETRST			0x0040
-#define	NPF_RULE_RETICMP		0x0080
-#define	NPF_RULE_NORMALIZE		0x0100
+#define	NPF_RULE_RETRST			0x0010
+#define	NPF_RULE_RETICMP		0x0020
 
 #define	NPF_RULE_IN			0x10000000
 #define	NPF_RULE_OUT			0x20000000
 #define	NPF_RULE_DIMASK			(NPF_RULE_IN | NPF_RULE_OUT)
 
+/* Rule procedure flags. */
+#define	NPF_RPROC_LOG			0x0001
+#define	NPF_RPROC_NORMALIZE		0x0002
+
 /* Address translation types and flags. */
 #define	NPF_NATIN			1
 #define	NPF_NATOUT			2
@@ -226,6 +228,11 @@
 	/* Raced packets. */
 	NPF_STAT_RACE_SESSION,
 	NPF_STAT_RACE_NAT,
+	/* Rule procedure cases. */
+	NPF_STAT_RPROC_LOG,
+	NPF_STAT_RPROC_NORM,
+	/* Other errors. */
+	NPF_STAT_ERROR,
 	/* Count (last). */
 	NPF_STATS_COUNT
 } npf_stats_t;
--- a/sys/net/npf/npf_alg_icmp.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.5 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.5 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.6 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -64,7 +64,7 @@
 #define	TR_PORT_RANGE	33484
 #define	TR_MAX_TTL	50
 
-static npf_alg_t *	alg_icmp;
+static npf_alg_t *	alg_icmp	__read_mostly;
 
 static bool		npfa_icmp_match(npf_cache_t *, nbuf_t *, void *);
 static bool		npfa_icmp_natin(npf_cache_t *, nbuf_t *, void *);
@@ -121,6 +121,11 @@
 
 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
 
+	/* Check for low TTL. */
+	if (ip->ip_ttl > TR_MAX_TTL) {
+		return false;
+	}
+
 	if (proto == IPPROTO_TCP) {
 		struct tcphdr *th = &npc->npc_l4.tcp;
 		dport = ntohs(th->th_dport);
@@ -136,11 +141,6 @@
 		return false;
 	}
 
-	/* Check for low TTL. */
-	if (ip->ip_ttl > TR_MAX_TTL) {
-		return false;
-	}
-
 	/* Associate ALG with translation entry. */
 	npf_nat_t *nt = ntptr;
 	npf_nat_setalg(nt, alg_icmp, 0);
@@ -205,21 +205,46 @@
 	return false;
 }
 
+static void
+npfa_srcdst_invert(npf_cache_t *npc)
+{
+	const int proto = npf_cache_ipproto(npc);
+	npf_addr_t *tmp_ip;
+
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &npc->npc_l4.tcp;
+		in_port_t tmp_sport = th->th_sport;
+		th->th_sport = th->th_dport;
+		th->th_dport = tmp_sport;
+
+	} else if (proto == IPPROTO_UDP) {
+		struct udphdr *uh = &npc->npc_l4.udp;
+		in_port_t tmp_sport = uh->uh_sport;
+		uh->uh_sport = uh->uh_dport;
+		uh->uh_dport = tmp_sport;
+	}
+	tmp_ip = npc->npc_srcip;
+	npc->npc_srcip = npc->npc_dstip;
+	npc->npc_dstip = tmp_ip;
+}
+
 /*
- * npfa_icmp_session: ALG session inspector, determines unique identifiers.
+ * npfa_icmp_session: ALG session inspector, returns unique identifiers.
  */
 static bool
 npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
 {
 	npf_cache_t *key = keyptr;
+	KASSERT(key->npc_info == 0);
 
-	/* ICMP? Get unique identifiers from ICMP packet. */
+	/* IP + ICMP?  Get unique identifiers from ICMP packet. */
+	if (!npf_iscached(npc, NPC_IP4)) {
+		return false;
+	}
 	if (npf_cache_ipproto(npc) != IPPROTO_ICMP) {
 		return false;
 	}
-	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_ICMP));
-	key->npc_info = NPC_ICMP;
 
 	/* Advance to ICMP header. */
 	struct ip *ip = &npc->npc_ip.v4;
@@ -242,7 +267,7 @@
 		npc->npc_info |= NPC_ICMP_ID;
 		ic->icmp_id = keyic->icmp_id;
 
-		/* Note: return 'false', since key is the original cache. */
+		/* Note: return False, since key is the original cache. */
 		return false;
 	}
 
@@ -252,6 +277,7 @@
 	 */
 	KASSERT(npf_iscached(key, NPC_IP46));
 	KASSERT(npf_iscached(key, NPC_LAYER4));
+	npfa_srcdst_invert(key);
 	key->npc_ipsz = npc->npc_ipsz;
 
 	return true;
@@ -264,47 +290,57 @@
 static bool
 npfa_icmp_natin(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
 {
-	npf_cache_t enpc;
+	npf_cache_t enpc = { .npc_info = 0 };
 
-	/* XXX: Duplicated work. */
+	/* XXX: Duplicated work (done at session inspection). */
 	if (!npfa_icmp_session(npc, nbuf, &enpc)) {
 		return false;
 	}
+	/* XXX: Restore inversion (inefficient). */
 	KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_LAYER4));
-
-	const int proto = npf_cache_ipproto(&enpc);
-	void *n_ptr = nbuf_dataptr(nbuf);
-	void *cnbuf = nbuf, *cnptr = n_ptr;
-	struct icmp *ic = &npc->npc_l4.icmp;
-	uint16_t cksum = ic->icmp_cksum;
-	struct ip *ip = &enpc.npc_ip.v4;
-	uint16_t ecksum = ip->ip_sum, l4cksum;
+	npfa_srcdst_invert(&enpc);
 
-	/* Save TCP/UDP checksum for update. */
-	if (proto == IPPROTO_TCP) {
-		struct tcphdr *th = &enpc.npc_l4.tcp;
-		l4cksum = th->th_sum;
-	} else {
-		struct udphdr *uh = &enpc.npc_l4.udp;
-		l4cksum = uh->uh_sum;
-	}
-
-	/* Advance to the original IP header, which is embedded after ICMP. */
-	u_int offby = offsetof(struct icmp, icmp_ip);
-	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
-		return false;
-	}
-
+	/*
+	 * Save ICMP and embedded IP with TCP/UDP header checksums, retrieve
+	 * the original address and port, and calculate ICMP checksum for
+	 * embedded packet changes, while data is not rewritten in the cache.
+	 */
+	const int proto = npf_cache_ipproto(&enpc);
+	const struct ip * const ip = &npc->npc_ip.v4, *eip = &enpc.npc_ip.v4;
+	const struct icmp * const ic = &npc->npc_l4.icmp;
+	uint16_t cksum = ic->icmp_cksum, ecksum = eip->ip_sum, l4cksum;
 	npf_nat_t *nt = ntptr;
 	npf_addr_t *addr;
 	in_port_t port;
 
 	npf_nat_getorig(nt, &addr, &port);
 
+	if (proto == IPPROTO_TCP) {
+		struct tcphdr *th = &enpc.npc_l4.tcp;
+		cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
+		l4cksum = th->th_sum;
+	} else {
+		struct udphdr *uh = &enpc.npc_l4.udp;
+		cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
+		l4cksum = uh->uh_sum;
+	}
+	cksum = npf_addr_cksum(cksum, enpc.npc_ipsz, enpc.npc_srcip, addr);
+
+	/*
+	 * Save the original pointers to the main IP header and then advance
+	 * to the embedded IP header after ICMP header.
+	 */
+	void *n_ptr = nbuf_dataptr(nbuf), *cnbuf = nbuf, *cnptr = n_ptr;
+	u_int offby = (ip->ip_hl << 2) + offsetof(struct icmp, icmp_ip);
+
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+		return false;
+	}
+
 	/*
 	 * Rewrite source IP address and port of the embedded IP header,
 	 * which represents original packet - therefore passing PFIL_OUT.
-	 * Note: checksum is first, since it uses values from the cache.
+	 * Note: checksums are first, since it uses values from the cache.
 	 */
 	if (!npf_rwrcksum(&enpc, nbuf, n_ptr, PFIL_OUT, addr, port)) {
 		return false;
@@ -317,20 +353,21 @@
 	}
 
 	/*
-	 * Calculate ICMP checksum.
+	 * Finish calculation of the ICMP checksum.  Update for embedded IP
+	 * and TCP/UDP checksum changes.  Finally, rewrite ICMP checksum.
 	 */
 	if (proto == IPPROTO_TCP) {
 		struct tcphdr *th = &enpc.npc_l4.tcp;
-		cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
 		cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum);
-	} else {
+	} else if (l4cksum) {
 		struct udphdr *uh = &enpc.npc_l4.udp;
-		cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
 		cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
 	}
-	cksum = npf_addr_cksum(cksum, enpc.npc_ipsz, enpc.npc_srcip, addr);
-	cksum = npf_fixup16_cksum(cksum, ecksum, ip->ip_sum);
+	cksum = npf_fixup16_cksum(cksum, ecksum, eip->ip_sum);
 
-	/* Rewrite ICMP checksum. */
-	return nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum);
+	offby = (ip->ip_hl << 2) + offsetof(struct icmp, icmp_cksum);
+	if (nbuf_advstore(&cnbuf, &cnptr, offby, sizeof(uint16_t), &cksum)) {
+		return false;
+	}
+	return true;
 }
--- a/sys/net/npf/npf_ctl.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_ctl.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_ctl.c,v 1.4 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_ctl.c,v 1.5 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.4 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.5 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -146,11 +146,43 @@
 	return error;
 }
 
+static npf_rproc_t *
+npf_mk_rproc(prop_array_t rprocs, uint64_t rproc_id)
+{
+	prop_object_iterator_t it;
+	prop_dictionary_t rpdict;
+	prop_object_t obj;
+	npf_rproc_t *rp;
+	uint64_t id;
+
+	it = prop_array_iterator(rprocs);
+	while ((rpdict = prop_object_iterator_next(it)) != NULL) {
+		obj = prop_dictionary_get(rpdict, "id");
+		id = prop_number_unsigned_integer_value(obj);
+		if (id == rproc_id)
+			break;
+	}
+	if (rpdict == NULL) {
+		return NULL;
+	}
+	CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
+	obj = prop_dictionary_get(rpdict, "rproc-ptr");
+	if (obj == NULL) {
+		rp = npf_rproc_create(rpdict);
+		prop_dictionary_set(rpdict, "rproc-ptr",
+		    prop_number_create_unsigned_integer((uintptr_t)rp));
+	} else {
+		rp = (void *)(uintptr_t)prop_number_unsigned_integer_value(obj);
+	}
+	return rp;
+}
+
 static int
-npf_mk_singlerule(prop_dictionary_t rldict,
-    npf_ruleset_t *rlset, npf_rule_t **parent)
+npf_mk_singlerule(prop_dictionary_t rldict, npf_ruleset_t *rlset,
+    prop_array_t rprocs, npf_rule_t **parent)
 {
 	npf_rule_t *rl;
+	npf_rproc_t *rp;
 	prop_object_t obj;
 	size_t nc_size;
 	void *nc;
@@ -190,14 +222,23 @@
 		nc_size = 0;
 	}
 
+	/* Check for rule procedure. */
+	obj = prop_dictionary_get(rldict, "rproc-id");
+	if (obj && rprocs) {
+		uint64_t rproc_id = prop_number_unsigned_integer_value(obj);
+		rp = npf_mk_rproc(rprocs, rproc_id);
+		if (rp == NULL) {
+			if (nc) {
+				npf_ncode_free(nc, nc_size);	/* XXX */
+			}
+			return EINVAL;
+		}
+	} else {
+		rp = NULL;
+	}
+
 	/* Allocate and setup NPF rule. */
-	rl = npf_rule_alloc(rldict, nc, nc_size);
-	if (rl == NULL) {
-		if (nc) {
-			npf_ncode_free(nc, nc_size);	/* XXX */
-		}
-		return ENOMEM;
-	}
+	rl = npf_rule_alloc(rldict, rp, nc, nc_size);
 	npf_ruleset_insert(rlset, rl);
 	if (parent) {
 		*parent = rl;
@@ -206,16 +247,24 @@
 }
 
 static int
-npf_mk_rules(npf_ruleset_t *rlset, prop_array_t rules)
+npf_mk_rules(npf_ruleset_t *rlset, prop_array_t rules, prop_array_t rprocs)
 {
 	prop_object_iterator_t it;
-	prop_dictionary_t rldict;
+	prop_dictionary_t rldict, rpdict;
 	int error;
 
-	/* Ruleset - array. */
-	if (prop_object_type(rules) != PROP_TYPE_ARRAY)
+	/* Rule procedures and the ruleset - arrays. */
+	if (prop_object_type(rprocs) != PROP_TYPE_ARRAY ||
+	    prop_object_type(rules) != PROP_TYPE_ARRAY)
 		return EINVAL;
 
+	it = prop_array_iterator(rprocs);
+	while ((rpdict = prop_object_iterator_next(it)) != NULL) {
+		if (prop_dictionary_get(rpdict, "rproc-ptr"))
+			return EINVAL;
+	}
+	prop_object_iterator_release(it);
+
 	error = 0;
 	it = prop_array_iterator(rules);
 	while ((rldict = prop_object_iterator_next(it)) != NULL) {
@@ -225,7 +274,7 @@
 		npf_rule_t *myrl;
 
 		/* Generate a single rule. */
-		error = npf_mk_singlerule(rldict, rlset, &myrl);
+		error = npf_mk_singlerule(rldict, rlset, rprocs, &myrl);
 		if (error)
 			break;
 
@@ -244,7 +293,7 @@
 		while ((srldict = prop_object_iterator_next(sit)) != NULL) {
 			/* For subrule, pass ruleset pointer of parent. */
 			error = npf_mk_singlerule(srldict,
-			    npf_rule_subset(myrl), NULL);
+			    npf_rule_subset(myrl), rprocs, NULL);
 			if (error)
 				break;
 		}
@@ -286,12 +335,12 @@
 		 * NAT policies are standard rules, plus additional
 		 * information for translation.  Make a rule.
 		 */
-		error = npf_mk_singlerule(natdict, nset, &rl);
+		error = npf_mk_singlerule(natdict, nset, NULL, &rl);
 		if (error)
 			break;
 
 		/* Allocate a new NAT policy and assign to the rule. */
-		np = npf_nat_newpolicy(natdict);
+		np = npf_nat_newpolicy(natdict, nset);
 		if (np == NULL) {
 			npf_rule_free(rl);
 			error = ENOMEM;
@@ -315,11 +364,11 @@
 npfctl_reload(u_long cmd, void *data)
 {
 	const struct plistref *pref = data;
+	prop_array_t natlist, tables, rprocs, rules;
 	npf_tableset_t *tblset = NULL;
 	npf_ruleset_t *rlset = NULL;
 	npf_ruleset_t *nset = NULL;
 	prop_dictionary_t dict;
-	prop_array_t natlist, tables, rules;
 	int error;
 
 	/* Retrieve the dictionary. */
@@ -346,10 +395,11 @@
 	if (error)
 		goto fail;
 
-	/* Rules. */
+	/* Rules and rule procedures. */
 	rlset = npf_ruleset_create();
+	rprocs = prop_dictionary_get(dict, "rprocs");
 	rules = prop_dictionary_get(dict, "rules");
-	error = npf_mk_rules(rlset, rules);
+	error = npf_mk_rules(rlset, rules, rprocs);
 	if (error)
 		goto fail;
 
--- a/sys/net/npf/npf_handler.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_handler.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_handler.c,v 1.5 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_handler.c,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.5 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.6 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -60,13 +60,13 @@
 
 static bool			default_pass = true;
 
-int	npf_packet_handler(void *, struct mbuf **, struct ifnet *, int);
+int	npf_packet_handler(void *, struct mbuf **, ifnet_t *, int);
 
 /*
  * npf_ifhook: hook handling interface changes.
  */
 static int
-npf_ifhook(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
+npf_ifhook(void *arg, struct mbuf **mp, ifnet_t *ifp, int di)
 {
 
 	return 0;
@@ -78,7 +78,7 @@
  * Note: packet flow and inspection logic is in strict order.
  */
 int
-npf_packet_handler(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
+npf_packet_handler(void *arg, struct mbuf **mp, ifnet_t *ifp, int di)
 {
 	nbuf_t *nbuf = *mp;
 	npf_cache_t npc;
@@ -125,7 +125,7 @@
 		goto pass;
 	}
 
-	/* Inspect the ruleset using this packet. */
+	/* Inspect the ruleset using this packet, acquire the lock. */
 	rl = npf_ruleset_inspect(&npc, nbuf, ifp, di, NPF_LAYER_3);
 	if (rl == NULL) {
 		if (default_pass) {
@@ -134,14 +134,18 @@
 		}
 		npf_stats_inc(NPF_STAT_BLOCK_DEFAULT);
 		error = ENETUNREACH;
-		goto out;
+		goto block;
 	}
 
-	/* Apply the rule. */
+	/* Get rule procedure for assocation and/or execution. */
+	KASSERT(rp == NULL);
+	rp = npf_rproc_return(rl);
+
+	/* Apply the rule, release the lock. */
 	error = npf_rule_apply(&npc, nbuf, rl, &retfl);
 	if (error) {
 		npf_stats_inc(NPF_STAT_BLOCK_RULESET);
-		goto out;
+		goto block;
 	}
 	npf_stats_inc(NPF_STAT_PASS_RULESET);
 
@@ -152,29 +156,27 @@
 			error = ENOMEM;
 			goto out;
 		}
-		/* Associate rule processing data (XXX locking). */
-		rp = npf_rproc_return(rl);
 		npf_session_setpass(se, rp);
-	} else {
-		/* XXX: Return rule processing, needs locking. */
 	}
 pass:
 	KASSERT(error == 0);
-
 	/*
-	 * Perform rule processing, if required.
+	 * Perform NAT.
+	 */
+	error = npf_do_nat(&npc, se, nbuf, ifp, di);
+block:
+	/*
+	 * Perform rule procedure, if any.
 	 */
 	if (rp) {
 		npf_rproc_run(&npc, nbuf, rp);
 	}
-	/*
-	 * Perform NAT.
-	 */
-	error = npf_do_nat(&npc, se, nbuf, ifp, di);
 out:
-	/* Release reference on session. */
-	if (se != NULL) {
+	/* Release the reference on session, or rule procedure. */
+	if (se) {
 		npf_session_release(se);
+	} else if (rp) {
+		npf_rproc_release(rp); /* XXXkmem */
 	}
 
 	/*
@@ -189,6 +191,10 @@
 		if (retfl) {
 			npf_return_block(&npc, nbuf, retfl);
 		}
+		if (error != ENETUNREACH) {
+			NPF_PRINTF(("NPF: error in handler '%d'\n", error));
+			npf_stats_inc(NPF_STAT_ERROR);
+		}
 		m_freem(*mp);
 		*mp = NULL;
 	} else {
--- a/sys/net/npf/npf_impl.h	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_impl.h	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_impl.h,v 1.5 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -42,6 +42,7 @@
 #include <sys/hash.h>
 #include <sys/rbtree.h>
 #include <sys/rwlock.h>
+#include <net/if.h>
 
 #include "npf.h"
 #include "npf_ncode.h"
@@ -50,6 +51,12 @@
 #include "testing.h"
 #endif
 
+#ifdef _NPF_DEBUG
+#define	NPF_PRINTF(x)	printf x
+#else
+#define	NPF_PRINTF(x)
+#endif
+
 /*
  * STRUCTURE DECLARATIONS.
  *
@@ -61,7 +68,6 @@
 struct npf_session;
 
 typedef struct npf_nat		npf_nat_t;
-typedef struct npf_rproc	npf_rproc_t;
 typedef struct npf_alg		npf_alg_t;
 typedef struct npf_natpolicy	npf_natpolicy_t;
 typedef struct npf_session	npf_session_t;
@@ -85,16 +91,10 @@
 #define	NPF_NCODE_LIMIT		1024
 #define	NPF_TABLE_SLOTS		32
 
-
 /*
  * SESSION STATE STRUCTURES
  */
 
-#define	ST_OPENING		1	/* SYN has been sent. */
-#define	ST_ACKNOWLEDGE		2	/* SYN-ACK received, wait for ACK. */
-#define	ST_ESTABLISHED		3	/* ACK seen, connection established. */
-#define	ST_CLOSING		4	/* FIN or RST seen. */
-
 typedef struct {
 	uint32_t	nst_seqend;	/* SEQ number + length. */
 	uint32_t	nst_ackend;	/* ACK sequence number + window. */
@@ -203,11 +203,12 @@
 void		npf_ruleset_insert(npf_ruleset_t *, npf_rule_t *);
 void		npf_ruleset_natreload(npf_ruleset_t *, npf_ruleset_t *);
 npf_rule_t *	npf_ruleset_matchnat(npf_ruleset_t *, npf_natpolicy_t *);
+npf_rule_t *	npf_ruleset_sharepm(npf_ruleset_t *, npf_natpolicy_t *);
 
 npf_rule_t *	npf_ruleset_match(npf_ruleset_t *, npf_cache_t *, nbuf_t *,
-		    struct ifnet *, const int, const int);
+		    ifnet_t *, const int, const int);
 npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
-		    struct ifnet *, const int, const int);
+		    ifnet_t *, const int, const int);
 int		npf_rule_apply(npf_cache_t *, nbuf_t *, npf_rule_t *, int *);
 
 npf_ruleset_t *	npf_rule_subset(npf_rule_t *);
@@ -250,12 +251,13 @@
 /* NAT. */
 void		npf_nat_sysinit(void);
 void		npf_nat_sysfini(void);
-npf_natpolicy_t *npf_nat_newpolicy(prop_dictionary_t);
+npf_natpolicy_t *npf_nat_newpolicy(prop_dictionary_t, npf_ruleset_t *);
 void		npf_nat_freepolicy(npf_natpolicy_t *);
 bool		npf_nat_matchpolicy(npf_natpolicy_t *, npf_natpolicy_t *);
+bool		npf_nat_sharepm(npf_natpolicy_t *, npf_natpolicy_t *);
 
 int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *,
-		    struct ifnet *, const int);
+		    ifnet_t *, const int);
 void		npf_nat_expire(npf_nat_t *);
 void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
--- a/sys/net/npf/npf_inet.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_inet.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_inet.c,v 1.5 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.5 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.6 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -215,11 +215,11 @@
 		topts_len -= val;
 		step = val - 1;
 	}
-	/* Soft limit, in a case of invalid packet. */
+	/* Any options left? */
 	if (__predict_true(topts_len > 0)) {
 		goto next;
 	}
-	return false;
+	return true;
 }
 
 /*
@@ -329,7 +329,7 @@
 {
 	struct ip *ip = &npc->npc_ip.v4;
 	struct icmp *ic;
-	u_int hlen, offby;
+	u_int hlen, iclen;
 
 	/* Must have IP header processed for its length and protocol. */
 	if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
@@ -342,8 +342,8 @@
 	ic = &npc->npc_l4.icmp;
 
 	/* Fetch basic ICMP header, up to the "data" point. */
-	offby = offsetof(struct icmp, icmp_data);
-	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, offby, ic)) {
+	iclen = offsetof(struct icmp, icmp_data);
+	if (nbuf_advfetch(&nbuf, &n_ptr, hlen, iclen, ic)) {
 		return false;
 	}
 
@@ -455,7 +455,7 @@
 }
 
 /*
- * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update chache.
+ * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update the cache.
  */
 bool
 npf_rwrcksum(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
@@ -485,13 +485,13 @@
 	} else {
 		/* No checksum for IPv6. */
 		KASSERT(npf_iscached(npc, NPC_IP6));
-		KASSERT(false);	/* XXX: Not yet supported. */
 		oaddr = NULL;
 		offby = 0;
+		return false;	/* XXX: Not yet supported. */
 	}
 
 	/* Determine whether TCP/UDP checksum update is needed. */
-	if (port == 0) {
+	if (proto == IPPROTO_ICMP || port == 0) {
 		return true;
 	}
 	KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP));
@@ -557,7 +557,7 @@
 
 		if (nbuf_advstore(&nbuf, &n_ptr,
 		    offsetof(struct ip, ip_off) - offby,
-		    sizeof(uint8_t), &nip_off)) {
+		    sizeof(uint16_t), &nip_off)) {
 			return false;
 		}
 		cksum = npf_fixup16_cksum(cksum, ip_off, nip_off);
@@ -601,10 +601,14 @@
 		if (!npf_normalize_ip4(npc, nbuf, rnd, no_df, minttl)) {
 			return false;
 		}
+	} else if (!npf_iscached(npc, NPC_IP4)) {
+		/* XXX: no IPv6 */
+		return false;
 	}
 
 	/*
 	 * TCP Maximum Segment Size (MSS) "clamping".  Only if SYN packet.
+	 * Fetch MSS and check whether rewrite to lower is needed.
 	 */
 	if (maxmss == 0 || !npf_iscached(npc, NPC_TCP) ||
 	    (th->th_flags & TH_SYN) == 0) {
@@ -618,32 +622,16 @@
 	if (ntohs(mss) <= maxmss) {
 		return true;
 	}
-	if (!npf_iscached(npc, NPC_IP4)) { /* XXX: IPv6 */
-		return false;
-	}
 
-	/* Calculate checksums. */
+	/* Calculate TCP checksum, then rewrite MSS and the checksum. */
 	maxmss = htons(maxmss);
 	cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss);
-	ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, mss, maxmss);
-	ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, th->th_sum, cksum);
 	th->th_sum = cksum;
-
-	/* Rewrite MSS. */
 	mss = maxmss;
 	if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
 		return false;
 	}
-
-	/* Update checksums. */
-	cksum = ip->ip_sum;
-	offby = offsetof(struct ip, ip_sum);
-	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
-		return false;
-	}
-	cksum = th->th_sum;
-	offby = (ip->ip_hl << 2) - offsetof(struct ip, ip_sum) +
-	    offsetof(struct tcphdr, th_sum);
+	offby = (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
 	if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
 		return false;
 	}
--- a/sys/net/npf/npf_instr.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_instr.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $	*/
+/*	$NetBSD: npf_instr.c,v 1.5 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -33,9 +33,8 @@
  * NPF complex instructions.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.5 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -45,7 +44,6 @@
 #include <net/if_ether.h>
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
-#endif
 
 #include "npf_impl.h"
 
--- a/sys/net/npf/npf_log.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_log.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_log.c,v 1.1 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_log.c,v 1.2 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2010-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_log.c,v 1.1 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_log.c,v 1.2 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -54,12 +54,12 @@
 typedef struct npflog_softc {
 	LIST_ENTRY(npflog_softc)	sc_entry;
 	kmutex_t			sc_lock;
-	struct ifnet			sc_if;
+	ifnet_t				sc_if;
 	int				sc_unit;
 } npflog_softc_t;
 
 static int	npflog_clone_create(struct if_clone *, int );
-static int	npflog_clone_destroy(struct ifnet *);
+static int	npflog_clone_destroy(ifnet_t *);
 
 static LIST_HEAD(, npflog_softc)	npflog_if_list	__cacheline_aligned;
 static struct if_clone			npflog_cloner =
@@ -85,7 +85,7 @@
 }
 
 static int
-npflog_ioctl(struct ifnet *ifp, u_long cmd, void *data)
+npflog_ioctl(ifnet_t *ifp, u_long cmd, void *data)
 {
 	npflog_softc_t *sc = ifp->if_softc;
 	int error = 0;
@@ -107,7 +107,7 @@
 npflog_clone_create(struct if_clone *ifc, int unit)
 {
 	npflog_softc_t *sc;
-	struct ifnet *ifp;
+	ifnet_t *ifp;
 
 	sc = kmem_zalloc(sizeof(npflog_softc_t), KM_SLEEP);
 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTNET);
@@ -129,7 +129,7 @@
 }
 
 static int
-npflog_clone_destroy(struct ifnet *ifp)
+npflog_clone_destroy(ifnet_t *ifp)
 {
 	npflog_softc_t *sc = ifp->if_softc;
 
@@ -142,34 +142,32 @@
 }
 
 void
-npf_log_packet(npf_cache_t *npc, nbuf_t *nbuf, int ifidx)
+npf_log_packet(npf_cache_t *npc, nbuf_t *nbuf, int if_idx)
 {
 	struct mbuf *m = nbuf;
-	npflog_softc_t *sc;
-	struct ifnet *ifp;
+	ifnet_t *ifp;
 	int family;
 
-	KASSERT(m != NULL);
+	/* Find a pseudo-interface to log. */
+	ifp = if_byindex(if_idx);
+	if (ifp == NULL) {
+		/* No interface. */
+		return;
+	}
 
-	/* Lookup for a pseudo-interface to log. */
-	LIST_FOREACH(sc, &npflog_if_list, sc_entry) {
-		ifp = &sc->sc_if;
-		if (ifp->if_index != ifidx) {
-			continue;
-		}
-		/* Set the address family. */
-		if (npf_iscached(npc, NPC_IP4)) {
-			family = AF_INET;
-		} else if (npf_iscached(npc, NPC_IP6)) {
-			family = AF_INET6;
-		} else {
-			family = AF_UNSPEC;
-		}
-		/* Pass through BPF. */
-		KERNEL_LOCK(1, NULL);
-		ifp->if_opackets++;
-		ifp->if_obytes += m->m_pkthdr.len;
-		bpf_mtap_af(ifp, family, m);
-		KERNEL_UNLOCK_ONE(NULL);
+	/* Set the address family. */
+	if (npf_iscached(npc, NPC_IP4)) {
+		family = AF_INET;
+	} else if (npf_iscached(npc, NPC_IP6)) {
+		family = AF_INET6;
+	} else {
+		family = AF_UNSPEC;
 	}
+
+	/* Pass through BPF. */
+	KERNEL_LOCK(1, NULL);
+	ifp->if_opackets++;
+	ifp->if_obytes += m->m_pkthdr.len;
+	bpf_mtap_af(ifp, family, m);
+	KERNEL_UNLOCK_ONE(NULL);
 }
--- a/sys/net/npf/npf_mbuf.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_mbuf.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $	*/
+/*	$NetBSD: npf_mbuf.c,v 1.6 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.6 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -110,10 +110,9 @@
 #define	NBUF_DATA_WRITE		1
 
 static inline int
-nbuf_rw_datum(const int wr, nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
+nbuf_rw_datum(const int wr, struct mbuf *m, void *n_ptr, size_t len, void *buf)
 {
 	uint8_t *d = n_ptr, *b = buf;
-	struct mbuf *m = nbuf;
 	u_int off, wmark, end;
 
 	/* Current offset in mbuf. */
@@ -174,15 +173,18 @@
 int
 nbuf_fetch_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
 {
+	struct mbuf *m = nbuf;
 
-	return nbuf_rw_datum(NBUF_DATA_READ, nbuf, n_ptr, len, buf);
+	return nbuf_rw_datum(NBUF_DATA_READ, m, n_ptr, len, buf);
 }
 
 int
 nbuf_store_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
 {
+	struct mbuf *m = nbuf;
 
-	return nbuf_rw_datum(NBUF_DATA_WRITE, nbuf, n_ptr, len, buf);
+	KASSERT((m->m_flags & M_PKTHDR) != 0 || !M_READONLY(m));
+	return nbuf_rw_datum(NBUF_DATA_WRITE, m, n_ptr, len, buf);
 }
 
 /*
--- a/sys/net/npf/npf_nat.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_nat.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_nat.c,v 1.4 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.5 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2010-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.4 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.5 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -172,10 +172,10 @@
  * => XXX: serialise at upper layer.
  */
 npf_natpolicy_t *
-npf_nat_newpolicy(prop_dictionary_t natdict)
+npf_nat_newpolicy(prop_dictionary_t natdict, npf_ruleset_t *nrlset)
 {
-	npf_natpolicy_t *np/*, *it */;
 	const npf_addr_t *taddr;
+	npf_natpolicy_t *np;
 	prop_object_t obj;
 	npf_portmap_t *pm;
 
@@ -205,38 +205,26 @@
 
 	KASSERT(np->n_type == NPF_NATIN || np->n_type == NPF_NATOUT);
 
-	pm = NULL;
+	/* Determine if port map is needed. */
+	np->n_portmap = NULL;
 	if ((np->n_flags & NPF_NAT_PORTMAP) == 0) {
-		goto nopm;
+		/* No port map. */
+		return np;
 	}
 
-	/* Search for a NAT policy using the same translation address. */
-#if 0
-	LIST_FOREACH(it, &nat_policy_list, n_entry) {
-		if (memcmp(&it->n_taddr, &np->n_taddr, sizeof(npf_addr_t))) {
-			continue;
-		}
-		pm = it->n_portmap;
-		break;
-	}
-#else
-	pm = NULL;
-#endif
-	if (pm == NULL) {
+	/*
+	 * Inspect NAT policies in the ruleset for port map sharing.
+	 * Note that npf_ruleset_sharepm() will increase the reference count.
+	 */
+	if (!npf_ruleset_sharepm(nrlset, np)) {
 		/* Allocate a new port map for the NAT policy. */
 		pm = kmem_zalloc(PORTMAP_MEM_SIZE, KM_SLEEP);
-		if (pm == NULL) {
-			kmem_free(np, sizeof(npf_natpolicy_t));
-			return NULL;
-		}
 		pm->p_refcnt = 1;
 		KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
+		np->n_portmap = pm;
 	} else {
-		/* Share the port map. */
-		pm->p_refcnt++;
+		KASSERT(np->n_portmap != NULL);
 	}
-nopm:
-	np->n_portmap = pm;
 	return np;
 }
 
@@ -249,14 +237,17 @@
 npf_nat_freepolicy(npf_natpolicy_t *np)
 {
 	npf_portmap_t *pm = np->n_portmap;
+	npf_session_t *se;
 	npf_nat_t *nt;
 
 	/* De-associate all entries from the policy. */
 	mutex_enter(&np->n_lock);
 	LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
-		if (nt->nt_session == NULL) { /* XXXSMP */
-			npf_session_expire(nt->nt_session);
+		se = nt->nt_session; /* XXXSMP */
+		if (se == NULL) {
+			continue;
 		}
+		npf_session_expire(se);
 	}
 	while (!LIST_EMPTY(&np->n_nat_list)) {
 		cv_wait(&np->n_cv, &np->n_lock);
@@ -273,6 +264,11 @@
 	kmem_free(np, sizeof(npf_natpolicy_t));
 }
 
+/*
+ * npf_nat_matchpolicy: compare two NAT policies.
+ *
+ * => Return 0 on match, and non-zero otherwise.
+ */
 bool
 npf_nat_matchpolicy(npf_natpolicy_t *np, npf_natpolicy_t *mnp)
 {
@@ -281,11 +277,43 @@
 	 * Compare the relevant NAT policy information (in raw form),
 	 * which is enough for matching criterion.
 	 */
+	KASSERT(np && mnp && np != mnp);
 	np_raw = (uint8_t *)np + NPF_NP_CMP_START;
 	mnp_raw = (uint8_t *)mnp + NPF_NP_CMP_START;
 	return (memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0);
 }
 
+bool
+npf_nat_sharepm(npf_natpolicy_t *np, npf_natpolicy_t *mnp)
+{
+	npf_portmap_t *pm, *mpm;
+
+	KASSERT(np && mnp && np != mnp);
+
+	/* Using port map and having equal translation address? */
+	if ((np->n_flags & mnp->n_flags & NPF_NAT_PORTMAP) == 0) {
+		return false;
+	}
+	if (np->n_addr_sz != mnp->n_addr_sz) {
+		return false;
+	}
+	if (memcmp(&np->n_taddr, &mnp->n_taddr, np->n_addr_sz) != 0) {
+		return false;
+	}
+	/* If NAT policy has an old port map - drop the reference. */
+	mpm = mnp->n_portmap;
+	if (mpm) {
+		/* Note: in such case, we must not be a last reference. */
+		KASSERT(mpm->p_refcnt > 1);
+		mpm->p_refcnt--;
+	}
+	/* Share the port map. */
+	pm = np->n_portmap;
+	mnp->n_portmap = pm;
+	pm->p_refcnt++;
+	return true;
+}
+
 /*
  * npf_nat_getport: allocate and return a port in the NAT policy portmap.
  *
@@ -370,7 +398,7 @@
  * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
  */
 static npf_natpolicy_t *
-npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, struct ifnet *ifp, const int di)
+npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, ifnet_t *ifp, const int di)
 {
 	npf_ruleset_t *rlset;
 	npf_rule_t *rl;
@@ -397,12 +425,13 @@
 		return NULL;
 	}
 	npf_stats_inc(NPF_STAT_NAT_CREATE);
+	nt->nt_natpolicy = np;
+	nt->nt_session = NULL;
+	nt->nt_alg = NULL;
+
 	mutex_enter(&np->n_lock);
 	LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
-	nt->nt_natpolicy = np;
-	nt->nt_session = NULL;
 	mutex_exit(&np->n_lock);
-	nt->nt_alg = NULL;
 
 	/* Save the original address which may be rewritten. */
 	if (np->n_type == NPF_NATOUT) {
@@ -474,6 +503,7 @@
 		addr = &nt->nt_oaddr;
 		port = nt->nt_oport;
 	}
+	KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0);
 
 	/* Execute ALG hook first. */
 	npf_alg_exec(npc, nbuf, nt, di);
@@ -526,7 +556,7 @@
  */
 int
 npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
-    struct ifnet *ifp, const int di)
+    ifnet_t *ifp, const int di)
 {
 	npf_session_t *nse = NULL;
 	npf_natpolicy_t *np;
@@ -702,6 +732,7 @@
 	/* Find or create a NAT policy. */
 	it = prop_array_iterator(natlist);
 	while ((npdict = prop_object_iterator_next(it)) != NULL) {
+		CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
 		itnp = (uintptr_t)prop_number_unsigned_integer_value(
 		    prop_dictionary_get(npdict, "id-ptr"));
 		if (itnp == (uintptr_t)np) {
@@ -714,6 +745,7 @@
 		npd = prop_data_create_data(np, sizeof(npf_natpolicy_t));
 
 		/* Set the data, insert into the array. */
+		CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
 		prop_dictionary_set(npdict, "id-ptr",
 		    prop_number_create_unsigned_integer((uintptr_t)np));
 		prop_dictionary_set(npdict, "nat-policy-data", npd);
--- a/sys/net/npf/npf_ruleset.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_ruleset.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_ruleset.c,v 1.5 2010/12/27 14:58:55 uebayasi Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.6 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -33,9 +33,8 @@
  * NPF ruleset module.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.5 2010/12/27 14:58:55 uebayasi Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.6 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -48,7 +47,6 @@
 
 #include <net/pfil.h>
 #include <net/if.h>
-#endif
 
 #include "npf_ncode.h"
 #include "npf_impl.h"
@@ -66,10 +64,11 @@
 	LIST_ENTRY(npf_hook)	hk_entry;
 };
 
-/* Rule processing structure. */
+/* Rule procedure structure. */
 struct npf_rproc {
 	/* Reference count. */
 	u_int			rp_refcnt;
+	uint32_t		rp_flags;
 	/* Normalization options. */
 	bool			rp_rnd_ipid;
 	bool			rp_no_df;
@@ -94,9 +93,7 @@
 	uint32_t		r_attr;
 	/* Interface. */
 	u_int			r_ifid;
-	/* Hit counter. */
-	u_long			r_hitcount;
-	/* Rule processing data. */
+	/* Rule procedure data. */
 	npf_rproc_t *		r_rproc;
 	/* List of hooks to process on match. */
 	kmutex_t		r_hooks_lock;
@@ -167,9 +164,31 @@
 	return rl;
 }
 
+npf_rule_t *
+npf_ruleset_sharepm(npf_ruleset_t *rlset, npf_natpolicy_t *mnp)
+{
+	npf_natpolicy_t *np;
+	npf_rule_t *rl;
+
+	/* Find a matching NAT policy in the old ruleset. */
+	TAILQ_FOREACH(rl, &rlset->rs_queue, r_entry) {
+		/*
+		 * NAT policy might not yet be set during the creation of
+		 * the ruleset (in such case, rule is for our policy), or
+		 * policies might be equal due to rule exchange on reload.
+		 */
+		np = rl->r_natp;
+		if (np == NULL || np == mnp)
+			continue;
+		if (npf_nat_sharepm(np, mnp))
+			break;
+	}
+	return rl;
+}
+
 /*
  * npf_ruleset_natreload: minimum reload of NAT policies by maching
- * two (active  and new) NAT rulesets.
+ * two (active and new) NAT rulesets.
  *
  * => Active ruleset should be exclusively locked.
  */
@@ -192,6 +211,8 @@
 		anp = arl->r_natp;
 		rl->r_natp = anp;
 		arl->r_natp = np;
+		/* Update other NAT policies to share portmap. */
+		(void)npf_ruleset_sharepm(nrlset, anp);
 	}
 }
 
@@ -204,6 +225,10 @@
 	rp = kmem_alloc(sizeof(npf_rproc_t), KM_SLEEP);
 	rp->rp_refcnt = 1;
 
+	/* Flags. */
+	obj = prop_dictionary_get(rpdict, "flags");
+	rp->rp_flags = prop_number_integer_value(obj);
+
 	/* Logging interface ID (integer). */
 	obj = prop_dictionary_get(rpdict, "log-interface");
 	rp->rp_log_ifid = prop_number_integer_value(obj);
@@ -232,6 +257,7 @@
 {
 	npf_rproc_t *rp = rl->r_rproc;
 
+	KASSERT(npf_core_locked());
 	if (rp) {
 		atomic_inc_uint(&rp->rp_refcnt);
 	}
@@ -252,43 +278,47 @@
 void
 npf_rproc_run(npf_cache_t *npc, nbuf_t *nbuf, npf_rproc_t *rp)
 {
+	const uint32_t flags = rp->rp_flags;
 
 	KASSERT(rp->rp_refcnt > 0);
 
 	/* Normalize the packet, if required. */
-	(void)npf_normalize(npc, nbuf,
-	    rp->rp_rnd_ipid, rp->rp_no_df, rp->rp_minttl, rp->rp_maxmss);
+	if (flags & NPF_RPROC_NORMALIZE) {
+		(void)npf_normalize(npc, nbuf,
+		    rp->rp_rnd_ipid, rp->rp_no_df,
+		    rp->rp_minttl, rp->rp_maxmss);
+		npf_stats_inc(NPF_STAT_RPROC_NORM);
+	}
 
 	/* Log packet, if required. */
-	if (rp->rp_log_ifid) {
+	if (flags & NPF_RPROC_LOG) {
 		npf_log_packet(npc, nbuf, rp->rp_log_ifid);
+		npf_stats_inc(NPF_STAT_RPROC_LOG);
 	}
-
 }
 
 /*
- * npf_rule_alloc: allocate a rule and copy ncode from user-space.
+ * npf_rule_alloc: allocate a rule and copy n-code from user-space.
  *
  * => N-code should be validated by the caller.
  */
 npf_rule_t *
-npf_rule_alloc(prop_dictionary_t rldict, void *nc, size_t nc_size)
+npf_rule_alloc(prop_dictionary_t rldict, npf_rproc_t *rp,
+   void *nc, size_t nc_size)
 {
 	npf_rule_t *rl;
 	prop_object_t obj;
-#ifdef DIAGNOSTIC
 	int errat;
-#endif
 
 	/* Allocate a rule structure. */
 	rl = kmem_alloc(sizeof(npf_rule_t), KM_SLEEP);
 	TAILQ_INIT(&rl->r_subset.rs_queue);
 	mutex_init(&rl->r_hooks_lock, MUTEX_DEFAULT, IPL_SOFTNET);
 	LIST_INIT(&rl->r_hooks);
-	rl->r_hitcount = 0;
 	rl->r_natp = NULL;
 
 	/* N-code. */
+	(void)errat;
 	KASSERT(nc == NULL || npf_ncode_validate(nc, nc_size, &errat) == 0);
 	rl->r_ncode = nc;
 	rl->r_nc_size = nc_size;
@@ -305,12 +335,12 @@
 	obj = prop_dictionary_get(rldict, "interface");
 	rl->r_ifid = prop_number_integer_value(obj);
 
-	/* Create rule processing structure, if any. */
-	if (rl->r_attr & (NPF_RULE_LOG | NPF_RULE_NORMALIZE)) {
-		rl->r_rproc = npf_rproc_create(rldict);
-	} else {
-		rl->r_rproc = NULL;
+	/* Rule procedure. */
+	if (rp) {
+		atomic_inc_uint(&rp->rp_refcnt);
 	}
+	rl->r_rproc = rp;
+
 	return rl;
 }
 
@@ -328,7 +358,7 @@
 		npf_nat_freepolicy(np);
 	}
 	if (rp) {
-		/* Release/free rule processing structure. */
+		/* Release rule procedure. */
 		npf_rproc_release(rp);
 	}
 	if (rl->r_ncode) {
@@ -411,7 +441,7 @@
  */
 npf_rule_t *
 npf_ruleset_match(npf_ruleset_t *rlset, npf_cache_t *npc, nbuf_t *nbuf,
-    struct ifnet *ifp, const int di, const int layer)
+    ifnet_t *ifp, const int di, const int layer)
 {
 	npf_rule_t *final_rl = NULL, *rl;
 
@@ -455,7 +485,7 @@
  */
 npf_rule_t *
 npf_ruleset_inspect(npf_cache_t *npc, nbuf_t *nbuf,
-    struct ifnet *ifp, const int di, const int layer)
+    ifnet_t *ifp, const int di, const int layer)
 {
 	npf_ruleset_t *rlset;
 	npf_rule_t *rl;
@@ -498,11 +528,6 @@
 
 	KASSERT(npf_core_locked());
 
-	/* Update the "hit" counter. */
-	if (rl->r_attr & NPF_RULE_COUNT) {
-		atomic_inc_ulong(&rl->r_hitcount);
-	}
-
 	/* If not passing - drop the packet. */
 	if ((rl->r_attr & NPF_RULE_PASS) == 0) {
 		error = ENETUNREACH;
--- a/sys/net/npf/npf_sendpkt.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_sendpkt.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $	*/
+/*	$NetBSD: npf_sendpkt.c,v 1.4 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -33,9 +33,8 @@
  * NPF module for packet construction routines.
  */
 
-#ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.4 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -46,7 +45,6 @@
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
-#endif
 #include <sys/mbuf.h>
 
 #include "npf_impl.h"
--- a/sys/net/npf/npf_session.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_session.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_session.c,v 1.6 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.7 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2010-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.6 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.7 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -97,8 +97,10 @@
 
 #include "npf_impl.h"
 
-#define	SESS_HASH_BUCKETS	1024	/* XXX tune + make tunable */
-#define	SESS_HASH_MASK		(SESS_HASH_BUCKETS - 1)
+/*
+ * Session structures: entry for embedding and the main structure.
+ * WARNING: update npf_session_restore() when adding fields.
+ */
 
 typedef struct {
 	/* Session entry node and backpointer to the actual session. */
@@ -125,7 +127,7 @@
 	int			s_type;
 	int			s_flags;
 	npf_state_t		s_state;
-	/* Association rule processing data. */
+	/* Association of rule procedure data. */
 	npf_rproc_t *		s_rproc;
 	/* NAT associated with this session (if any). */
 	npf_nat_t *		s_nat;
@@ -133,6 +135,9 @@
 	struct timespec 	s_atime;
 };
 
+#define	SESS_HASH_BUCKETS	1024	/* XXX tune + make tunable */
+#define	SESS_HASH_MASK		(SESS_HASH_BUCKETS - 1)
+
 LIST_HEAD(npf_sesslist, npf_session);
 
 struct npf_sehash {
@@ -170,12 +175,6 @@
 static void	npf_session_destroy(npf_session_t *);
 static void	npf_session_worker(void *);
 
-#ifdef SE_DEBUG
-#define	SEPRINTF(x)	printf x
-#else
-#define	SEPRINTF(x)
-#endif
-
 /*
  * npf_session_sys{init,fini}: initialise/destroy session handling structures.
  *
@@ -418,8 +417,7 @@
 	 * Construct a key for hash and tree lookup.  Execute ALG session
 	 * helpers, which may construct a custom key.
 	 */
-	const int proto = npf_cache_ipproto(npc);
-	npf_cache_t algkey, *key;
+	npf_cache_t algkey = { .npc_info = 0 }, *key;
 	npf_sentry_t senkey;
 
 	if (!npf_alg_sessionid(npc, nbuf, &algkey)) {
@@ -429,14 +427,22 @@
 		/* Unique IDs filled by ALG in a separate key cache. */
 		key = &algkey;
 	}
+
+	/* Note: take protocol from the key. */
+	const int proto = npf_cache_ipproto(key);
+
 	if (proto == IPPROTO_TCP) {
 		const struct tcphdr *th = &key->npc_l4.tcp;
 		senkey.se_src_id = th->th_sport;
 		senkey.se_dst_id = th->th_dport;
-	} else {
+	} else if (proto == IPPROTO_UDP) {
 		const struct udphdr *uh = &key->npc_l4.udp;
 		senkey.se_src_id = uh->uh_sport;
 		senkey.se_dst_id = uh->uh_dport;
+	} else if (npf_iscached(key, NPC_ICMP_ID)) {
+		const struct icmp *ic = &key->npc_l4.icmp;
+		senkey.se_src_id = ic->icmp_id;
+		senkey.se_dst_id = ic->icmp_id;
 	}
 	KASSERT(key->npc_srcip && key->npc_dstip && key->npc_ipsz > 0);
 	memcpy(&senkey.se_src_addr, key->npc_srcip, key->npc_ipsz);
@@ -507,6 +513,7 @@
 	if (__predict_false(se == NULL)) {
 		return NULL;
 	}
+	NPF_PRINTF(("NPF: create se %p\n", se));
 	npf_stats_inc(NPF_STAT_SESSION_CREATE);
 
 	/* Reference count and flags (indicate direction). */
@@ -549,7 +556,7 @@
 		break;
 	case IPPROTO_ICMP:
 		if (npf_iscached(npc, NPC_ICMP_ID)) {
-			/* ICMP query ID. (XXX) */
+			/* ICMP query ID. */
 			const struct icmp *ic = &npc->npc_l4.icmp;
 			fw->se_src_id = ic->icmp_id;
 			fw->se_dst_id = ic->icmp_id;
@@ -590,7 +597,7 @@
 			/* Success: insert session, count both entries. */
 			LIST_INSERT_HEAD(&sh->sh_list, se, s_list);
 			sh->sh_count += 2;
-			SEPRINTF(("NPF: new se %p\n", se));
+			NPF_PRINTF(("NPF: establish se %p\n", se));
 		} else {
 			/* Race with duplicate packet. */
 			rb_tree_remove_node(&sh->sh_tree, fw);
@@ -615,7 +622,7 @@
 		npf_nat_expire(se->s_nat);
 	}
 	if (se->s_rproc) {
-		/* Release rule processing data. */
+		/* Release rule procedure. */
 		npf_rproc_release(se->s_rproc);
 	}
 
@@ -625,7 +632,7 @@
 	/* Free the structure, increase the counter. */
 	pool_cache_put(sess_cache, se);
 	npf_stats_inc(NPF_STAT_SESSION_DESTROY);
-	SEPRINTF(("NPF: se %p destroyed\n", se));
+	NPF_PRINTF(("NPF: se %p destroyed\n", se));
 }
 
 /*
@@ -691,7 +698,7 @@
 	ok = (rb_tree_insert_node(&sh->sh_tree, sen) == sen);
 	if (__predict_true(ok)) {
 		sh->sh_count++;
-		SEPRINTF(("NPF: se %p assoc with nat %p\n", se, se->s_nat));
+		NPF_PRINTF(("NPF: se %p assoc with nat %p\n", se, se->s_nat));
 	} else {
 		/* FIXMEgc */
 		printf("npf_session_setnat: Houston, we've had a problem.\n");
@@ -707,7 +714,7 @@
 npf_session_expire(npf_session_t *se)
 {
 
-	KASSERT(se->s_refcnt > 0);
+	/* KASSERT(se->s_refcnt > 0); XXX: npf_nat_freepolicy() */
 	se->s_flags |= SE_EXPIRE;		/* XXXSMP */
 }
 
@@ -728,7 +735,7 @@
 
 /*
  * npf_session_setpass: mark session as a "pass" one and associate rule
- * processing data with it.
+ * procedure with it.
  */
 void
 npf_session_setpass(npf_session_t *se, npf_rproc_t *rp)
@@ -922,8 +929,9 @@
 	}
 
 	/*
-	 * Note: normally, saving should be done while tracking is disabled,
-	 * so there is no point to exclusively lock the entire hash table.
+	 * Note: hold the session lock to prevent G/C thread from session
+	 * expiring and removing.  Therefore, no need to exclusively lock
+	 * the entire hash table.
 	 */
 	mutex_enter(&sess_lock);
 	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
@@ -944,6 +952,7 @@
 			sedict = prop_dictionary_create();
 			sd = prop_data_create_data(se, sizeof(npf_session_t));
 			prop_dictionary_set(sedict, "data", sd);
+			CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
 			prop_dictionary_set(sedict, "id-ptr",
 			    prop_number_create_unsigned_integer((uintptr_t)se));
 			if (se->s_nat) {
@@ -990,11 +999,12 @@
 
 	/*
 	 * Copy the binary data of the structure.  Warning: must reset
-	 * reference count and state lock.
+	 * reference count, rule procedure and state lock.
 	 */
 	se = pool_cache_get(sess_cache, PR_WAITOK);
 	memcpy(se, d, sizeof(npf_session_t));
 	se->s_refcnt = 0;
+	se->s_rproc = NULL;
 
 	nst = &se->s_state;
 	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
--- a/sys/net/npf/npf_state.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/sys/net/npf/npf_state.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state.c,v 1.2 2010/12/18 01:07:25 rmind Exp $	*/
+/*	$NetBSD: npf_state.c,v 1.3 2011/01/18 20:33:46 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.2 2010/12/18 01:07:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.3 2011/01/18 20:33:46 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -43,23 +43,41 @@
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
+#include <netinet/tcp_fsm.h>
 
 #include "npf_impl.h"
 
-#define	MAXACKWINDOW		66000
+/* TCP session expiration table. */
+static const u_int tcp_expire_table[ ] __read_mostly = {
+	/* Initial synchronisation.  Timeout: 30 sec and 1 minute. */
+	[TCPS_SYN_SENT]		= 30,
+	[TCPS_SYN_RECEIVED]	= 60,
+	/* Established (synchronised).  Timeout: 24 hours. */
+	[TCPS_ESTABLISHED]	= 60 * 60 * 24,
+	[TCPS_FIN_WAIT_1]	= 60 * 60 * 24,
+	[TCPS_FIN_WAIT_2]	= 60 * 60 * 24,
+	/* UNUSED [TCPS_CLOSE_WAIT]	= 60 * 60 * 24, */
+	/* Closure.  Timeout: 4 minutes (2 * MSL). */
+	[TCPS_CLOSING]		= 60 * 4,
+	[TCPS_LAST_ACK]		= 60 * 4,
+	[TCPS_TIME_WAIT]	= 60 * 4,
+	/* Fully closed.  Timeout immediately. */
+	[TCPS_CLOSED]		= 0
+};
 
-/* Session expiration table.  XXX revisit later */
-static const u_int expire_table[ ] = {
-	[IPPROTO_TCP]		= 86400,	/* 24 hours */
-	[IPPROTO_UDP]		= 120,		/* 2 min */
-	[IPPROTO_ICMP]		= 30		/* 1 min */
+/* Session expiration table. */
+static const u_int expire_table[ ] __read_mostly = {
+	[IPPROTO_UDP]		= 60,		/* 1 min */
+	[IPPROTO_ICMP]		= 30		/* 30 sec */
 };
 
+#define	MAXACKWINDOW		66000
+
 static bool
 npf_tcp_inwindow(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
     const bool forw)
 {
-	const struct tcphdr *th = &npc->npc_l4.tcp;
+	const struct tcphdr * const th = &npc->npc_l4.tcp;
 	const int tcpfl = th->th_flags;
 	npf_tcpstate_t *fstate, *tstate;
 	int tcpdlen, wscale, ackskew;
@@ -191,56 +209,116 @@
 npf_state_tcp(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
     const bool forw)
 {
-	const struct tcphdr *th = &npc->npc_l4.tcp;
-	const int tcpfl = th->th_flags;
-	int nstate = 0;
-
+	const struct tcphdr * const th = &npc->npc_l4.tcp;
+	const int tcpfl = th->th_flags, state = nst->nst_state;
+#if 0
+	/* Determine whether TCP packet really belongs to this connection. */
+	if (!npf_tcp_inwindow(npc, nbuf, nst, forw)) {
+		return false;
+	}
+#endif
 	/*
-	 * Handle 3-way handshake (SYN -> SYN,ACK -> ACK).
+	 * Handle 3-way handshake (SYN -> SYN,ACK -> ACK), connection
+	 * reset (RST), half-open connections, connection closure, etc.
 	 */
-	switch (nst->nst_state) {
-	case ST_ESTABLISHED:
-		/* Common case - connection established. */
-		if (__predict_false(tcpfl & (TH_FIN | TH_RST))) {
-			/* Handle connection closure (FIN or RST). */
-			nstate = ST_CLOSING;
+	if (__predict_false(tcpfl & TH_RST)) {
+		nst->nst_state = TCPS_CLOSED;
+		return true;
+	}
+	switch (state) {
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_2:
+		/* Common case - connection is established. */
+		if ((tcpfl & (TH_SYN | TH_ACK | TH_FIN)) == TH_ACK) {
+			return true;
+		}
+		/* Otherwise, can only be a FIN. */
+		if ((tcpfl & TH_FIN) == 0) {
+			break;
+		}
+		/* XXX see below TCPS_CLOSE_WAIT */
+		if (state != TCPS_FIN_WAIT_2) {
+			/* First FIN: closure of one end. */
+			nst->nst_state = TCPS_FIN_WAIT_1;
+		} else {
+			/* Second FIN: connection closure, wait for ACK. */
+			nst->nst_state = TCPS_LAST_ACK;
+		}
+		return true;
+	case TCPS_SYN_SENT:
+		/* After SYN expecting SYN-ACK. */
+		if (tcpfl == (TH_SYN | TH_ACK) && !forw) {
+			/* Received backwards SYN-ACK. */
+			nst->nst_state = TCPS_SYN_RECEIVED;
+			return true;
+		}
+		if (tcpfl == TH_SYN && forw) {
+			/* Re-transmission of SYN. */
+			return true;
 		}
 		break;
-	case ST_OPENING:
-		/* SYN has been sent, expecting SYN-ACK. */
-		if (tcpfl == (TH_SYN | TH_ACK) && !forw) {
-			/* Received backwards SYN-ACK. */
-			nstate = ST_ACKNOWLEDGE;
-		} else if (tcpfl == TH_SYN && forw) {
-			/* Re-transmission of SYN. */
-		} else {
-			return false;
+	case TCPS_SYN_RECEIVED:
+		/* SYN-ACK was seen, expecting ACK. */
+		if ((tcpfl & (TH_SYN | TH_ACK | TH_FIN)) == TH_ACK) {
+			/* ACK - establish connection. */
+			nst->nst_state = TCPS_ESTABLISHED;
+			return true;
+		}
+		if (tcpfl == (TH_SYN | TH_ACK)) {
+			/* Re-transmission of SYN-ACK. */
+			return true;
 		}
 		break;
-	case ST_ACKNOWLEDGE:
-		/* SYN-ACK was seen, expecting ACK. */
-		if (tcpfl == TH_ACK && forw) {
-			nstate = ST_ESTABLISHED;
-		} else {
+	case TCPS_CLOSE_WAIT:
+		/* UNUSED */
+	case TCPS_FIN_WAIT_1:
+		/*
+		 * XXX: FIN re-transmission is not handled, use TCPS_CLOSE_WAIT.
+		 */
+		/*
+		 * First FIN was seen, expecting ACK.  However, we may receive
+		 * a simultaneous FIN or exchange of FINs with FIN-ACK.
+		 */
+		if ((tcpfl & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
+			/* Exchange of FINs with ACK.  Wait for last ACK. */
+			nst->nst_state = TCPS_LAST_ACK;
+			return true;
+		} else if (tcpfl & TH_ACK) {
+			/* ACK of first FIN. */
+			nst->nst_state = TCPS_FIN_WAIT_2;
+			return true;
+		} else if (tcpfl & TH_FIN) {
+			/* Simultaneous FIN.  Need to wait for ACKs. */
+			nst->nst_state = TCPS_CLOSING;
+			return true;
+		}
+		break;
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+		/* Expecting only ACK. */
+		if ((tcpfl & (TH_SYN | TH_ACK | TH_FIN)) != TH_ACK) {
 			return false;
 		}
-		break;
-	case ST_CLOSING:
-		/* XXX TODO */
+		switch (state) {
+		case TCPS_CLOSING:
+			/* One ACK noted, wait for last one. */
+			nst->nst_state = TCPS_LAST_ACK;
+			break;
+		case TCPS_LAST_ACK:
+			/* Last ACK received, quiet wait now. */
+			nst->nst_state = TCPS_TIME_WAIT;
+			break;
+		}
+		return true;
+	case TCPS_CLOSED:
+		/* XXX: Drop or pass? */
 		break;
 	default:
 		npf_state_dump(nst);
 		KASSERT(false);
 	}
-#if 0
-	if (!npf_tcp_inwindow(npc, nbuf, nst, forw)) {
-		return false;
-	}
-#endif
-	if (__predict_false(nstate)) {
-		nst->nst_state = nstate;
-	}
-	return true;
+	return false;
 }
 
 bool
@@ -251,10 +329,10 @@
 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
 
 	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
-	nst->nst_state = ST_OPENING;
 
 	if (proto == IPPROTO_TCP) {
 		const struct tcphdr *th = &npc->npc_l4.tcp;
+
 		/* TCP case: must be SYN. */
 		KASSERT(npf_iscached(npc, NPC_TCP));
 		if (th->th_flags != TH_SYN) {
@@ -267,6 +345,12 @@
 			return false;
 		}
 	}
+
+	/*
+	 * Initial state: SYN sent, waiting for response from the other side.
+	 * Note: for UDP or ICMP, reuse SYN-sent flag to note response.
+	 */
+	nst->nst_state = TCPS_SYN_SENT;
 	return true;
 }
 
@@ -274,7 +358,6 @@
 npf_state_destroy(npf_state_t *nst)
 {
 
-	KASSERT(nst->nst_state != 0);
 	mutex_destroy(&nst->nst_lock);
 }
 
@@ -292,9 +375,11 @@
 		ret = npf_state_tcp(npc, nbuf, nst, forw);
 		break;
 	default:
-		/* Handle UDP or ICMP response for opening session. */
-		if (nst->nst_state == ST_OPENING && !forw) {
-			nst->nst_state = ST_ESTABLISHED;
+		/*
+		 * Handle UDP or ICMP response for opening session.
+		 */
+		if (nst->nst_state == TCPS_SYN_SENT && !forw) {
+			nst->nst_state= TCPS_ESTABLISHED;
 		}
 		ret = true;
 	}
@@ -305,14 +390,18 @@
 	return ret;
 }
 
+/*
+ * npf_state_etime: return session expiration time according to the state.
+ */
 int
 npf_state_etime(const npf_state_t *nst, const int proto)
 {
+	const int state = nst->nst_state;
 
-	if (nst->nst_state == ST_ESTABLISHED) {
-		return expire_table[proto];
+	if (__predict_true(proto == IPPROTO_TCP)) {
+		return tcp_expire_table[state];
 	}
-	return 10;	/* XXX TODO */
+	return expire_table[proto];
 }
 
 #if defined(DDB) || defined(_NPF_TESTING)
@@ -324,7 +413,7 @@
 
 	printf("\tstate (%p) %d:\n\t\t"
 	    "F { seqend %u ackend %u mwin %u wscale %u }\n\t\t"
-	    "T { seqend %u, ackend %u mwin %u wscale %u }\n",
+	    "T { seqend %u ackend %u mwin %u wscale %u }\n",
 	    nst, nst->nst_state,
 	    fst->nst_seqend, fst->nst_ackend, fst->nst_maxwin, fst->nst_wscale,
 	    tst->nst_seqend, tst->nst_ackend, tst->nst_maxwin, tst->nst_wscale
--- a/usr.sbin/npf/npfctl/npf.conf.5	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npf.conf.5	Tue Jan 18 20:33:45 2011 +0000
@@ -1,6 +1,6 @@
-.\"	$NetBSD: npf.conf.5,v 1.2 2010/09/16 04:53:27 rmind Exp $
+.\"	$NetBSD: npf.conf.5,v 1.3 2011/01/18 20:33:45 rmind Exp $
 .\"
-.\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+.\" Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
 .\" All rights reserved.
 .\"
 .\" This material is based upon work partially supported by The
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd September 16, 2010
+.Dd January 18, 2011
 .Dt NPF.CONF 5
 .Os
 .Sh NAME
@@ -37,7 +37,7 @@
 .Sh DESCRIPTION
 .Nm
 is the default configuration file for NPF packet filter.
-It can contain definitions, grouped rules, and tables.
+It can contain definitions, grouped rules, rule procedures, and tables.
 .Sh DEFINITIONS
 Definitions are general purpose keywords which can be used in the
 ruleset to make it more flexible and easier to manage.
@@ -93,9 +93,9 @@
 .\" -----
 .Sh GRAMMAR
 .Bd -literal
-line		= ( def | table | nat | group )
+line		= ( def | table | nat | group | rproc )
 
-def		= ( \*[Lt]name\*[Gt] "=" "{ a, b, ... }" | "text" | "$\*[Lt]interface\*[Gt]" )
+def		= ( \*[Lt]name\*[Gt] "=" "{ a, b, ... }" | "\*[Lt]text\*[Gt]" | "$\*[Lt]interface\*[Gt]" )
 iface		= ( \*[Lt]interface\*[Gt] | def )
 
 table		= "table" \*[Lt]tid\*[Gt] "type" ( "hash" | "tree" )
@@ -105,17 +105,22 @@
 binat		= "binat" iface filt-opts "->" \*[Lt]addr\*[Gt]
 rdr		= "rdr" iface filt-opts "->" \*[Lt]addr\*[Gt] port-opts
 
-group		= "group" "(" ( "default" | group-opts ) "") ruleset
+rproc		= "procedure" \*[Lt]name\*[Gt] procs
+procs		= "{" op1 \*[Lt]newline\*[Gt], op2 \*[Lt]newline\*[Gt], ... "}"
+op		= ( "log" iface | "normalize" "(" norm-opt1 "," norm-opt2 ... ")" )
+norm-opt	= [ "random-id" | "min-ttl" \*[Lt]num\*[Gt] | "max-mss" \*[Lt]num\*[Gt] | "no-df" ]
+
+group		= "group" "(" ( "default" | group-opts ) ")" ruleset
 group-opts	= "interface" iface "," [ "in" | "out" ]
 
 ruleset		= "{" rule1 \*[Lt]newline\*[Gt], rule2 \*[Lt]newline\*[Gt], ... "}"
 
-rule		= ( "block" block-opts | "pass" ) [ "in" | out" ] rule-opts
+rule		= ( "block" block-opts | "pass" ) [ "in" | out" ] [ "quick" ]
 		  [ "on" iface ] [ "inet" | "inet6" ] [ "proto" \*[Lt]protocol\*[Gt] ]
 		  ( "all" | filt-opts [ "flags" \*[Lt]tcp_flags> \*[Gt] )
+		  [ "keep state" ] [ "apply" rproc }
 
 block-opts	= [ "return-rst" | "return-icmp" | "return" ]
-rule-opts	= [ "log" ] [ "count" ] [ "quick" ]
 filt-opts	= [ "from" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
 		  [ "to" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
 port-opts	= [ "port" ( \*[Lt]port-num\*[Gt] | \*[Lt]port-from\*[Gt] ":" \*[Lt]port-to\*[Gt] | def ) ]
@@ -141,13 +146,21 @@
 table "1" type "hash" file "/etc/npf_blacklist"
 table "2" type "tree" dynamic
 
-nat $ext_if from 192.168.0.0/24 to 0.0.0.0/0 -> $ext_if
+nat $ext_if from 192.168.0.0/24 to any -> $ext_if
+
+procedure "log" {
+	log npflog0
+}
+
+procedure "rid" {
+	normalize (random-id)
+}
 
 group (name "external", interface $ext_if) {
 	block in quick from \*[Lt]1\*[Gt]
-	pass out quick from $ext_if keep state
+	pass out quick from $ext_if keep state apply "rid"
 
-	pass in log quick inet proto tcp to $ext_if port ssh
+	pass in quick inet proto tcp to $ext_if port ssh apply "log"
 	pass in quick proto tcp to $ext_if port $services_tcp
 	pass in quick proto udp to $ext_if port $services_udp
 	pass in quick proto tcp to $ext_if port 49151:65535	# Passive FTP
@@ -161,7 +174,7 @@
 }
 
 group (default) {
-        block all
+	block all
 }
 .Ed
 .\" -----
--- a/usr.sbin/npf/npfctl/npf_data.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npf_data.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_data.c,v 1.5 2010/12/18 01:07:26 rmind Exp $	*/
+/*	$NetBSD: npf_data.c,v 1.6 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: npf_data.c,v 1.5 2010/12/18 01:07:26 rmind Exp $");
+__RCSID("$NetBSD: npf_data.c,v 1.6 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -58,11 +58,12 @@
 static struct ifaddrs *		ifs_list = NULL;
 
 static prop_dictionary_t	npf_dict, settings_dict;
-static prop_array_t		nat_arr, tables_arr, rules_arr;
+static prop_array_t		nat_arr, tables_arr, rproc_arr, rules_arr;
 
 static pri_t			gr_prio_counter = 1;
 static pri_t			rl_prio_counter = 1;
 static pri_t			nat_prio_counter = 1;
+static u_int			rproc_id_counter = 1;
 
 void
 npfctl_init_data(void)
@@ -82,6 +83,9 @@
 	tables_arr = prop_array_create();
 	prop_dictionary_set(npf_dict, "tables", tables_arr);
 
+	rproc_arr = prop_array_create();
+	prop_dictionary_set(npf_dict, "rprocs", rproc_arr);
+
 	rules_arr = prop_array_create();
 	prop_dictionary_set(npf_dict, "rules", rules_arr);
 }
@@ -91,13 +95,12 @@
 {
 	int ret = 0, errval;
 
-#ifdef DEBUG
+#ifdef _NPF_TESTING
 	prop_dictionary_externalize_to_file(npf_dict, "./npf.plist");
 #else
 	errval = prop_dictionary_send_ioctl(npf_dict, fd, IOC_NPF_RELOAD);
 	if (errval) {
-		errx(EXIT_FAILURE, "npf_ioctl_send: %s\n", strerror(errval));
-		ret = -1;
+		errx(EXIT_FAILURE, "npfctl_ioctl_send: %s\n", strerror(errval));
 	}
 #endif
 	prop_object_release(npf_dict);
@@ -105,6 +108,25 @@
 }
 
 int
+npfctl_ioctl_flushse(int fd)
+{
+	prop_dictionary_t sesdict;
+	prop_array_t selist;
+	int errval;
+
+	sesdict = prop_dictionary_create();
+	selist = prop_array_create();
+	prop_dictionary_set(sesdict, "session-list", selist);
+	errval = prop_dictionary_send_ioctl(sesdict, fd, IOC_NPF_SESSIONS_LOAD);
+	if (errval) {
+		errx(EXIT_FAILURE, "npfctl_ioctl_flushse: %s\n",
+		    strerror(errval));
+	}
+	prop_object_release(sesdict);
+	return errval;
+}
+
+int
 npfctl_ioctl_sendse(int fd)
 {
 	prop_dictionary_t sesdict;
@@ -149,16 +171,15 @@
  *	npfctl_parse_tcpfl() - parse TCP flags.
  */
 
-static struct ifaddrs *
-npfctl_getif(char *ifname, unsigned int *if_idx)
+struct ifaddrs *
+npfctl_getif(char *ifname, unsigned int *if_idx, bool reqaddr)
 {
 	struct ifaddrs *ifent;
 	struct sockaddr_in *sin;
 
 	for (ifent = ifs_list; ifent != NULL; ifent = ifent->ifa_next) {
 		sin = (struct sockaddr_in *)ifent->ifa_addr;
-
-		if (sin->sin_family != AF_INET)
+		if (sin->sin_family != AF_INET && reqaddr)
 			continue;
 		if (strcmp(ifent->ifa_name, ifname) == 0)
 			break;
@@ -222,12 +243,16 @@
 npfctl_parse_cidr(char *str, in_addr_t *addr, in_addr_t *mask)
 {
 
-	if (isalpha((unsigned char)*str)) {
+	if (strcmp(str, "any") == 0) {
+		*addr = 0x0;
+		*mask = 0x0;
+
+	} else if (isalpha((unsigned char)*str)) {
 		struct ifaddrs *ifa;
 		struct sockaddr_in *sin;
 		u_int idx;
 
-		if ((ifa = npfctl_getif(str, &idx)) == NULL) {
+		if ((ifa = npfctl_getif(str, &idx, true)) == NULL) {
 			errx(EXIT_FAILURE, "invalid interface '%s'", str);
 		}
 		/* Interface address. */
@@ -286,10 +311,8 @@
 	prop_object_t obj;
 	u_int tid;
 
-	if ((it = prop_array_iterator(tables_arr)) == NULL)
-		err(EXIT_FAILURE, "prop_array_iterator");
-
 	tid = atoi(tidstr);
+	it = prop_array_iterator(tables_arr);
 	while ((tl = prop_object_iterator_next(it)) != NULL) {
 		obj = prop_dictionary_get(tl, "id");
 		if (tid == prop_number_integer_value(obj))
@@ -299,40 +322,21 @@
 }
 
 prop_dictionary_t
-npfctl_mk_table(void)
+npfctl_construct_table(int id, int type)
 {
 	prop_dictionary_t tl;
-	prop_array_t tlist;
 
 	tl = prop_dictionary_create();
-	tlist = prop_array_create();
-	prop_dictionary_set(tl, "entries", tlist);
-
+	/* TODO: 1. check ID range 2. check if not a duplicate */
+	prop_dictionary_set(tl, "id", prop_number_create_integer(id));
+	prop_dictionary_set(tl, "type", prop_number_create_integer(type));
+	prop_dictionary_set(tl, "entries", prop_array_create());
+	prop_array_add(tables_arr, tl);
 	return tl;
 }
 
 void
-npfctl_table_setup(prop_dictionary_t tl, char *idstr, char *typestr)
-{
-	prop_number_t typenum;
-	unsigned int id;
-
-	id = atoi(idstr);
-	/* TODO: 1. check ID range 2. check if not a duplicate */
-	prop_dictionary_set(tl, "id", prop_number_create_integer(id));
-
-	if (strcmp(typestr, "hash")) {
-		typenum = prop_number_create_integer(NPF_TABLE_HASH);
-	} else if (strcmp(typestr, "tree")) {
-		typenum = prop_number_create_integer(NPF_TABLE_RBTREE);
-	} else {
-		errx(EXIT_FAILURE, "invalid table type '%s'\n", typestr);
-	}
-	prop_dictionary_set(tl, "type", typenum);
-}
-
-void
-npfctl_construct_table(prop_dictionary_t tl, char *fname)
+npfctl_fill_table(prop_dictionary_t tl, char *fname)
 {
 	prop_dictionary_t entdict;
 	prop_array_t tblents;
@@ -346,7 +350,7 @@
 
 	fp = fopen(fname, "r");
 	if (fp == NULL) {
-		err(EXIT_FAILURE, "fopen");
+		err(EXIT_FAILURE, "open '%s'", fname);
 	}
 	l = 1;
 	buf = NULL;
@@ -374,23 +378,16 @@
 	}
 }
 
-void
-npfctl_add_table(prop_dictionary_t tl)
-{
-
-	prop_array_add(tables_arr, tl);
-}
-
 /*
  * npfctl_mk_rule: create a rule (or group) dictionary.
  *
  * Note: group is a rule containing subrules.  It has no n-code, however.
  */
 prop_dictionary_t
-npfctl_mk_rule(bool group)
+npfctl_mk_rule(bool group, prop_dictionary_t parent)
 {
 	prop_dictionary_t rl;
-	prop_array_t subrl;
+	prop_array_t subrl, rlset;
 	pri_t pri;
 
 	rl = prop_dictionary_create();
@@ -403,16 +400,7 @@
 	} else {
 		pri = rl_prio_counter++;
 	}
-	prop_dictionary_set(rl, "priority",
-	    prop_number_create_integer(pri));
-
-	return rl;
-}
-
-void
-npfctl_add_rule(prop_dictionary_t rl, prop_dictionary_t parent)
-{
-	prop_array_t rlset;
+	prop_dictionary_set(rl, "priority", prop_number_create_integer(pri));
 
 	if (parent) {
 		rlset = prop_dictionary_get(parent, "subrules");
@@ -421,41 +409,20 @@
 		rlset = rules_arr;
 	}
 	prop_array_add(rlset, rl);
+	return rl;
 }
 
 void
-npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface,
-    char *logiface, bool ipid_rnd, int minttl, int maxmss, bool no_df)
+npfctl_rule_setattr(prop_dictionary_t rl, int attr, u_int iface)
 {
 	prop_number_t attrnum, ifnum;
-	unsigned int if_idx;
 
 	attrnum = prop_number_create_integer(attr);
 	prop_dictionary_set(rl, "attributes", attrnum);
 	if (iface) {
-		if (npfctl_getif(iface, &if_idx) == NULL) {
-			errx(EXIT_FAILURE, "invalid interface '%s'", iface);
-		}
-		ifnum = prop_number_create_integer(if_idx);
+		ifnum = prop_number_create_integer(iface);
 		prop_dictionary_set(rl, "interface", ifnum);
 	}
-	if (logiface) {
-		if (npfctl_getif(logiface, &if_idx) == NULL) {
-			errx(EXIT_FAILURE, "invalid interface '%s'", logiface);
-		}
-		ifnum = prop_number_create_integer(if_idx);
-		prop_dictionary_set(rl, "log-interface", ifnum);
-	}
-	if (attr & NPF_RULE_NORMALIZE) {
-		prop_dictionary_set(rl, "randomize-id",
-		    prop_bool_create(ipid_rnd));
-		prop_dictionary_set(rl, "min-ttl",
-		    prop_number_create_integer(minttl));
-		prop_dictionary_set(rl, "max-mss",
-		    prop_number_create_integer(maxmss));
-		prop_dictionary_set(rl, "no-df",
-		    prop_bool_create(no_df));
-	}
 }
 
 /*
@@ -569,7 +536,7 @@
 		/* Default. */
 	}
 skip_proto:
-	if (icmp_type != -1) {
+	if (icmp || icmp_type != -1) {
 		assert(tcp_flags == NULL);
 		icmp = true;
 		nblocks[2] += 1;
@@ -598,7 +565,7 @@
 	}
 
 	/* Any n-code to generate? */
-	if ((nblocks[0] + nblocks[1] + nblocks[2]) == 0) {
+	if (!icmp && (nblocks[0] + nblocks[1] + nblocks[2]) == 0) {
 		/* Done, if none. */
 		return;
 	}
@@ -673,6 +640,42 @@
 }
 
 /*
+ * Rule procedure construction routines.
+ */
+
+prop_dictionary_t
+npfctl_mk_rproc(void)
+{
+	prop_dictionary_t rp;
+
+	rp = prop_dictionary_create();
+	prop_dictionary_set(rp, "id",
+	    prop_number_create_unsigned_integer(rproc_id_counter++));
+	prop_array_add(rproc_arr, rp);
+	return rp;
+}
+
+bool
+npfctl_find_rproc(prop_dictionary_t rl, char *name)
+{
+	prop_dictionary_t rp;
+	prop_object_iterator_t it;
+	prop_object_t obj;
+
+	it = prop_array_iterator(rproc_arr);
+	while ((rp = prop_object_iterator_next(it)) != NULL) {
+		obj = prop_dictionary_get(rp, "name");
+		if (strcmp(prop_string_cstring(obj), name) == 0)
+			break;
+	}
+	if (rp == NULL) {
+		return false;
+	}
+	prop_dictionary_set(rl, "rproc-id", prop_dictionary_get(rp, "id"));
+	return true;
+}
+
+/*
  * NAT policy construction routines.
  */
 
@@ -685,23 +688,17 @@
 	/* NAT policy is rule with extra info. */
 	rl = prop_dictionary_create();
 	pri = nat_prio_counter++;
-	prop_dictionary_set(rl, "priority",
-	    prop_number_create_integer(pri));
+	prop_dictionary_set(rl, "priority", prop_number_create_integer(pri));
+	prop_array_add(nat_arr, rl);
 	return rl;
 }
 
 void
-npfctl_add_nat(prop_dictionary_t nat)
-{
-	prop_array_add(nat_arr, nat);
-}
-
-void
 npfctl_nat_setup(prop_dictionary_t rl, int type, int flags,
-    char *iface, char *taddr, char *rport)
+    u_int iface, char *taddr, char *rport)
 {
 	int attr = NPF_RULE_PASS | NPF_RULE_FINAL;
-	in_addr_t addr, mask;
+	in_addr_t addr, _dummy;
 	prop_data_t addrdat;
 
 	/* Translation type and flags. */
@@ -712,10 +709,10 @@
 
 	/* Interface and attributes. */
 	attr |= (type == NPF_NATOUT) ? NPF_RULE_OUT : NPF_RULE_IN;
-	npfctl_rule_setattr(rl, attr, iface, NULL, false, 0, 0, false);
+	npfctl_rule_setattr(rl, attr, iface);
 
-	/* Translation IP, XXX should be no mask. */
-	npfctl_parse_cidr(taddr, &addr, &mask);
+	/* Translation IP. */
+	npfctl_parse_cidr(taddr, &addr, &_dummy);
 	addrdat = prop_data_create_data(&addr, sizeof(in_addr_t));
 	if (addrdat == NULL) {
 		err(EXIT_FAILURE, "prop_data_create_data");
--- a/usr.sbin/npf/npfctl/npf_parser.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npf_parser.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_parser.c,v 1.4 2010/12/18 01:07:26 rmind Exp $	*/
+/*	$NetBSD: npf_parser.c,v 1.5 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: npf_parser.c,v 1.4 2010/12/18 01:07:26 rmind Exp $");
+__RCSID("$NetBSD: npf_parser.c,v 1.5 2011/01/18 20:33:45 rmind Exp $");
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -143,12 +143,36 @@
 	return vr;
 }
 
-static inline int
-npfctl_parsenorm(char *buf, bool *rnd, int *minttl, int *maxmss, bool *no_df)
+static char *
+npfctl_val_single(var_t *v, char *p)
+{
+	element_t *el;
+
+	if (v->v_type != VAR_SINGLE) {
+		errx(EXIT_FAILURE, "invalid value '%s'", p);
+	}
+	el = v->v_elements;
+	return el->e_data;
+}
+
+static u_int
+npfctl_val_interface(var_t *v, char *p, bool reqaddr)
+{
+	char *iface = npfctl_val_single(v, p);
+	u_int if_idx;
+
+	if (iface == NULL || npfctl_getif(iface, &if_idx, reqaddr) == NULL) {
+		errx(EXIT_FAILURE, "invalid interface '%s'", iface);
+	}
+	return if_idx;
+}
+
+static int
+npfctl_parsenorm(char *buf, prop_dictionary_t rp)
 {
 	char *p = buf, *sptr;
-
-	DPRINTF(("norm\t|%s|\n", p));
+	int minttl = 0, maxmss = 0;
+	bool rnd = false, no_df = false;
 
 	p = strtok_r(buf, ", \t", &sptr);
 	if (p == NULL) {
@@ -156,40 +180,103 @@
 	}
 	do {
 		if (strcmp(p, "random-id") == 0) {
-			*rnd = true;
+			rnd = true;
 		} else if (strcmp(p, "min-ttl") == 0) {
 			p = strtok_r(NULL, ", \t", &sptr);
-			*minttl = atoi(p);
+			minttl = atoi(p);
 		} else if (strcmp(p, "max-mss") == 0) {
 			p = strtok_r(NULL, ", \t", &sptr);
-			*maxmss = atoi(p);
+			maxmss = atoi(p);
 		} else if (strcmp(p, "no-df") == 0) {
-			*no_df = true;
+			no_df = true;
 		} else {
 			return -1;
 		}
 	} while ((p = strtok_r(NULL, ", \t", &sptr)) != 0);
 
+	prop_dictionary_set(rp, "randomize-id", prop_bool_create(rnd));
+	prop_dictionary_set(rp, "min-ttl", prop_number_create_integer(minttl));
+	prop_dictionary_set(rp, "max-mss", prop_number_create_integer(maxmss));
+	prop_dictionary_set(rp, "no-df", prop_bool_create(no_df));
+	return 0;
+}
+
+static int
+npfctl_parserproc(char *buf, prop_dictionary_t rp)
+{
+	char *p = buf, *end;
+
+	DPRINTF(("rproc\t|%s|\n", buf));
+
+	if ((p = strchr(p, '"')) == NULL)
+		return -1;
+	if ((end = strchr(++p, '"')) == NULL)
+		return -1;
+	*end = '\0';
+	prop_dictionary_set(rp, "name", prop_string_create_cstring(p));
+	return 0;
+}
+
+static int
+npfctl_parserproc_lines(char *buf, prop_dictionary_t rp)
+{
+	char *p = buf, *sptr;
+	prop_object_t obj;
+	uint32_t attr;
+
+	DPRINTF(("rproc\t|%s|\n", p));
+	obj = prop_dictionary_get(rp, "flags");
+	attr = obj ? prop_number_integer_value(obj) : 0;
+
+	PARSE_FIRST_TOKEN();
+
+	/* log <interface> */
+	if (strcmp(p, "log") == 0) {
+		var_t *ifvar;
+		u_int if_idx;
+
+		PARSE_NEXT_TOKEN();
+		if ((ifvar = npfctl_parsevalue(p)) == NULL)
+			return PARSE_ERR();
+		if_idx = npfctl_val_interface(ifvar, p, false);
+		prop_dictionary_set(rp, "log-interface",
+		    prop_number_create_integer(if_idx));
+		attr |= NPF_RPROC_LOG;
+
+	} else if (strcmp(p, "normalize") == 0) {
+		/* normalize ( .. ) */
+		p = strtok_r(NULL, "()", &sptr);
+		if (p == NULL) {
+			return PARSE_ERR();
+		}
+		if (npfctl_parsenorm(p, rp)) {
+			return PARSE_ERR();
+		}
+		attr |= NPF_RPROC_NORMALIZE;
+		PARSE_NEXT_TOKEN_NOCHECK();
+	}
+	prop_dictionary_set(rp, "flags", prop_number_create_integer(attr));
 	return 0;
 }
 
 /*
  * npfctl_parserule: main routine to parse a rule.  Syntax:
  *
- *	{ pass | block | count } [ in | out ] [ log ] [ quick ]
+ *	{ pass | block } [ in | out ] [ quick ]
  *	    [on <if>] [inet | inet6 ] proto <array>
  *	    from <addr/mask> port <port(s)|range>
- *	    too <addr/mask> port <port(s)|range>
- *	    [ keep state ]
+ *	    to <addr/mask> port <port(s)|range>
+ *	    [ keep state ] [ apply "<rproc>" ]
  */
-static inline int
+static int
 npfctl_parserule(char *buf, prop_dictionary_t rl)
 {
 	var_t *from_cidr = NULL, *fports = NULL;
 	var_t *to_cidr = NULL, *tports = NULL;
-	char *p, *sptr, *iface, *logiface, *proto = NULL, *tcp_flags = NULL;
-	int icmp_type = -1, icmp_code = -1, minttl = 0, maxmss = 0;
-	bool icmp = false, tcp = false, rnd = false, no_df = false;
+	char *p, *sptr, *proto = NULL, *tcp_flags = NULL;
+	int icmp_type = -1, icmp_code = -1;
+	bool icmp = false, tcp = false;
+	u_int iface = 0;
 	int ret, attr = 0;
 
 	DPRINTF(("rule\t|%s|\n", buf));
@@ -199,7 +286,6 @@
 
 	/* pass or block (mandatory) */
 	if (strcmp(p, "block") == 0) {
-		attr = 0;
 		PARSE_NEXT_TOKEN();
 		/* return-rst or return-icmp */
 		if (strcmp(p, "return-rst") == 0) {
@@ -213,7 +299,7 @@
 			PARSE_NEXT_TOKEN();
 		}
 	} else if (strcmp(p, "pass") == 0) {
-		attr = NPF_RULE_PASS;
+		attr |= NPF_RULE_PASS;
 		PARSE_NEXT_TOKEN();
 	} else {
 		return PARSE_ERR();
@@ -230,32 +316,6 @@
 		attr |= (NPF_RULE_IN | NPF_RULE_OUT);
 	}
 
-	/* log <interface> */
-	if (strcmp(p, "log") == 0) {
-		var_t *ifvar;
-		element_t *el;
-
-		PARSE_NEXT_TOKEN();
-		if ((ifvar = npfctl_parsevalue(p)) == NULL)
-			return PARSE_ERR();
-		if (ifvar->v_type != VAR_SINGLE) {
-			errx(EXIT_FAILURE, "invalid interface value '%s'", p);
-		}
-		el = ifvar->v_elements;
-		logiface = el->e_data;
-
-		attr |= NPF_RULE_LOG;
-		PARSE_NEXT_TOKEN();
-	} else {
-		logiface = NULL;
-	}
-
-	/* count */
-	if (strcmp(p, "count") == 0) {
-		attr |= NPF_RULE_COUNT;
-		PARSE_NEXT_TOKEN();
-	}
-
 	/* quick */
 	if (strcmp(p, "quick") == 0) {
 		attr |= NPF_RULE_FINAL;
@@ -265,20 +325,12 @@
 	/* on <interface> */
 	if (strcmp(p, "on") == 0) {
 		var_t *ifvar;
-		element_t *el;
 
 		PARSE_NEXT_TOKEN();
 		if ((ifvar = npfctl_parsevalue(p)) == NULL)
 			return PARSE_ERR();
-		if (ifvar->v_type != VAR_SINGLE) {
-			errx(EXIT_FAILURE, "invalid interface value '%s'", p);
-		}
-		el = ifvar->v_elements;
-		iface = el->e_data;
-
+		iface = npfctl_val_interface(ifvar, p, true);
 		PARSE_NEXT_TOKEN();
-	} else {
-		iface = NULL;
 	}
 
 	/* inet, inet6 (TODO) */
@@ -355,12 +407,8 @@
 		}
 		PARSE_NEXT_TOKEN();
 		var_t *tfvar = npfctl_parsevalue(p);
+		tcp_flags = npfctl_val_single(tfvar, p);
 		PARSE_NEXT_TOKEN_NOCHECK();
-		if (tfvar->v_type != VAR_SINGLE) {
-			errx(EXIT_FAILURE, "invalid TCP flags");
-		}
-		element_t *el = tfvar->v_elements;
-		tcp_flags = el->e_data;
 	}
 
 	/* icmp-type <t> code <c> */
@@ -389,16 +437,18 @@
 		PARSE_NEXT_TOKEN_NOCHECK();
 	}
 
-	/* normalize ( .. ) */
-	if (p && strcmp(p, "normalize") == 0) {
-		p = strtok_r(NULL, "()", &sptr);
-		if (p == NULL) {
+	/* apply "<rproc>" */
+	if (p && strcmp(p, "apply") == 0) {
+		char *end;
+		PARSE_NEXT_TOKEN();
+		if ((p = strchr(p, '"')) == NULL)
 			return PARSE_ERR();
-		}
-		if (npfctl_parsenorm(p, &rnd, &minttl, &maxmss, &no_df)) {
+		if ((end = strchr(++p, '"')) == NULL)
 			return PARSE_ERR();
+		*end = '\0';
+		if (!npfctl_find_rproc(rl, p)) {
+			errx(EXIT_FAILURE, "invalid procedure '%s'", p);
 		}
-		attr |= NPF_RULE_NORMALIZE;
 		PARSE_NEXT_TOKEN_NOCHECK();
 	}
 
@@ -407,13 +457,10 @@
 		return PARSE_ERR();
 	}
 
-	/* Set the rule attributes and interface, if any. */
-	npfctl_rule_setattr(rl, attr, iface, logiface,
-	    rnd, minttl, maxmss, no_df);
-
 	/*
-	 * Generate all protocol data.
+	 * Set the rule attributes and interface.  Generate all protocol data.
 	 */
+	npfctl_rule_setattr(rl, attr, iface);
 	npfctl_rule_protodata(rl, proto, tcp_flags, icmp_type, icmp_code,
 	    from_cidr, fports, to_cidr, tports);
 	return 0;
@@ -428,10 +475,11 @@
 
 #define	GROUP_ATTRS	(NPF_RULE_PASS | NPF_RULE_FINAL)
 
-static inline int
+static int
 npfctl_parsegroup(char *buf, prop_dictionary_t rl)
 {
-	char *p = buf, *end, *sptr, *iface;
+	char *p = buf, *end, *sptr;
+	u_int iface = 0;
 	int attr_dir;
 
 	DPRINTF(("group\t|%s|\n", buf));
@@ -453,10 +501,8 @@
 	 * If default group - no other options.
 	 */
 	if (strcmp(p, "default") == 0) {
-		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
-		npfctl_rule_setattr(rl,
-		    GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL,
-		    NULL, false, 0, 0, false);
+		attr_dir = NPF_RULE_DEFAULT | (NPF_RULE_IN | NPF_RULE_OUT);
+		npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, 0);
 		return 0;
 	}
 
@@ -477,34 +523,26 @@
 	/* Interface for this group (optional). */
 	if (p && strcmp(p, "interface") == 0) {
 		var_t *ifvar;
-		element_t *el;
-
 		PARSE_NEXT_TOKEN();
 		if ((ifvar = npfctl_parsevalue(p)) == NULL)
 			return -1;
-		if (ifvar->v_type != VAR_SINGLE) {
-			errx(EXIT_FAILURE, "invalid key '%s'", ifvar->v_key);
-		}
-		el = ifvar->v_elements;
-		iface = el->e_data;
+		iface = npfctl_val_interface(ifvar, p, true);
 		PARSE_NEXT_TOKEN_NOCHECK();
-	} else {
-		iface = NULL;
 	}
 
 	/* Direction (optional). */
-	if (p == NULL) {
-		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
-	} else {
+	if (p) {
 		if (strcmp(p, "in") == 0)
 			attr_dir = NPF_RULE_IN;
 		else if (strcmp(p, "out") == 0)
 			attr_dir = NPF_RULE_OUT;
 		else
 			return -1;
+	} else {
+		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
 	}
-	npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface, NULL,
-	    false, 0, 0, false);
+
+	npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface);
 	return 0;
 }
 
@@ -513,11 +551,12 @@
  *
  *	table <num> type <t> [ dynamic | file <path> ]
  */
-static inline int
-npfctl_parsetable(char *buf, prop_dictionary_t tl)
+static int
+npfctl_parsetable(char *buf)
 {
-	char *p, *sptr;
-	char *id_ptr, *type_ptr, *fname;
+	prop_dictionary_t tl;
+	char *p, *sptr, *fname;
+	unsigned int id, type;
 
 	DPRINTF(("table\t|%s|\n", buf));
 
@@ -525,7 +564,7 @@
 	if ((p = strchr(buf, '"')) == NULL) {
 		return PARSE_ERR();
 	}
-	id_ptr = ++p;
+	id = atoi(++p);
 	p = strchr(p, '"');
 	*p++ = '\0';
 
@@ -539,7 +578,13 @@
 	if (p == NULL || *p != '"') {
 		return PARSE_ERR();
 	}
-	type_ptr = p;
+	if (strcmp(p, "hash")) {
+		type = NPF_TABLE_HASH;
+	} else if (strcmp(p, "tree")) {
+		type = NPF_TABLE_RBTREE;
+	} else {
+		errx(EXIT_FAILURE, "invalid table type '%s'\n", p);
+	}
 	if ((p = strchr(++p, '"')) == NULL) {
 		return PARSE_ERR();
 	}
@@ -548,7 +593,7 @@
 	/*
 	 * Setup the table.
 	 */
-	npfctl_table_setup(tl, id_ptr, type_ptr);
+	tl = npfctl_construct_table(id, type);
 	PARSE_NEXT_TOKEN();
 
 	/* Dynamic. */
@@ -566,8 +611,8 @@
 	p = strchr(p, '"');
 	*p = '\0';
 
-	/* Construct the table. */
-	npfctl_construct_table(tl, fname);
+	/* Fill the table. */
+	npfctl_fill_table(tl, fname);
 	return 0;
 }
 
@@ -577,14 +622,16 @@
  *	[bi]nat <if> from <net> to <net/addr> -> <ip>
  *	rdr <if> from <net> to <addr> -> <ip>
  */
-static inline int
-npfctl_parse_nat(char *buf, prop_dictionary_t nat)
+static int
+npfctl_parse_nat(char *buf)
 {
+	prop_dictionary_t nat, bn;
 	var_t *ifvar, *from_cidr, *to_cidr, *ip;
 	var_t *tports = NULL, *rports = NULL;
-	element_t *iface, *cidr;
+	element_t *cidr;
 	char *p, *sptr;
 	bool binat, rdr;
+	u_int iface;
 
 	DPRINTF(("[bi]nat/rdr\t|%s|\n", buf));
 	binat = (strncmp(buf, "binat", 5) == 0);
@@ -599,11 +646,7 @@
 	if ((ifvar = npfctl_parsevalue(p)) == NULL) {
 		return PARSE_ERR();
 	}
-	if (ifvar->v_type != VAR_SINGLE) {
-		errx(EXIT_FAILURE, "invalid interface value '%s'", p);
-	} else {
-		iface = ifvar->v_elements;
-	}
+	iface = npfctl_val_interface(ifvar, p, true);
 	PARSE_NEXT_TOKEN();
 
 	/* from <addr> */
@@ -652,19 +695,21 @@
 	 *
 	 * XXX mess
 	 */
+	nat = npfctl_mk_nat();
+
 	if (!rdr) {
 		npfctl_rule_protodata(nat, NULL, NULL, -1, -1, from_cidr,
 		    NULL, to_cidr, NULL);
 		npfctl_nat_setup(nat, NPF_NATOUT,
 		    binat ? 0 : (NPF_NAT_PORTS | NPF_NAT_PORTMAP),
-		    iface->e_data, cidr->e_data, NULL);
+		    iface, cidr->e_data, NULL);
 	} else {
 		element_t *rp = rports->v_elements;
 
 		npfctl_rule_protodata(nat, NULL, NULL, -1, -1, from_cidr,
 		    NULL, to_cidr, tports);
 		npfctl_nat_setup(nat, NPF_NATIN, NPF_NAT_PORTS,
-		    iface->e_data, cidr->e_data, rp->e_data);
+		    iface, cidr->e_data, rp->e_data);
 	}
 
 	/*
@@ -675,14 +720,13 @@
 	 * XXX mess
 	 */
 	if (binat) {
-		prop_dictionary_t bn = npfctl_mk_nat();
 		element_t *taddr = from_cidr->v_elements;
 
+		bn = npfctl_mk_nat();
 		npfctl_rule_protodata(bn, NULL, NULL, -1, -1,
 		    to_cidr, NULL, ip, NULL);
-		npfctl_nat_setup(bn, NPF_NATIN, 0, iface->e_data,
+		npfctl_nat_setup(bn, NPF_NATIN, 0, iface,
 		    taddr->e_data, NULL);
-		npfctl_add_nat(bn);
 	}
 	return 0;
 }
@@ -694,7 +738,7 @@
  * => Value can be an array, use npf_parsevalue().
  * => Insert variable into the global list.
  */
-static inline int
+static int
 npfctl_parsevar(char *buf)
 {
 	char *s = buf, *p, *key;
@@ -703,27 +747,27 @@
 	DPRINTF(("def\t|%s|\n", buf));
 
 	if ((p = strpbrk(s, "= \t")) == NULL)
-		return -1;
+		return PARSE_ERR();
 
 	/* Validation of '='. */
 	if (*p != '=' && strchr(p, '=') == NULL)
-		return -1;
+		return PARSE_ERR();
 	*p = '\0';
 	key = s;
 
 	/* Check for duplicates. */
 	if (npfctl_lookup_varlist(key))
-		return -1;
+		return PARSE_ERR();
 
 	/* Parse quotes before. */
 	if ((s = strchr(p + 1, '"')) == NULL)
-		return -1;
+		return PARSE_ERR();
 	if ((p = strchr(++s, '"')) == NULL)
-		return -1;
+		return PARSE_ERR();
 	*p = '\0';
 
 	if ((vr = npfctl_parsevalue(s)) == NULL)
-		return -1;
+		return PARSE_ERR();
 	vr->v_key = xstrdup(key);
 	vr->v_next = var_list;
 	var_list = vr;
@@ -733,23 +777,25 @@
 /*
  * npf_parseline: main function parsing a single configuration line.
  *
- * => Distinguishes 'group', rule (in-group), 'table' and definitions.
- * => Tracks begin-end of the group i.e. in-group state.
+ * Distinguishes 'group', rule (in-group), 'procedure', in-procedure,
+ * 'table' and definitions.  Tracks begin-end of the group and procedure
+ * i.e. in-group or in-procedure states.
  */
 int
 npf_parseline(char *buf)
 {
 	static prop_dictionary_t curgr = NULL;
+	static prop_dictionary_t currp = NULL;
 	char *p = buf;
 	int ret;
 
-	/* Skip emptry lines and comments. */
+	/* Skip empty lines and comments. */
 	while (isspace((unsigned char)*p))
 		p++;
 	if (*p == '\0' || *p == '\n' || *p == '#')
 		return 0;
 
-	/* At first, check if inside the group. */
+	/* At first, check if inside the group or rproc. */
 	if (curgr) {
 		prop_dictionary_t rl;
 
@@ -759,41 +805,36 @@
 			return 0;
 		}
 		/* Rule. */
-		rl = npfctl_mk_rule(false);
+		rl = npfctl_mk_rule(false, curgr);
 		ret = npfctl_parserule(p, rl);
-		if (ret)
-			return ret;
-		npfctl_add_rule(rl, curgr);
+
+	} else if (currp) {
+		/* End of the procedure. */
+		if (*p == '}') {
+			currp = NULL;
+			return 0;
+		}
+		/* Procedure contents. */
+		ret = npfctl_parserproc_lines(p, currp);
 
 	} else if (strncmp(p, "group", 5) == 0) {
-
 		/* Group. */
-		curgr = npfctl_mk_rule(true);
+		curgr = npfctl_mk_rule(true, NULL);
 		ret = npfctl_parsegroup(p, curgr);
-		if (ret)
-			return ret;
-		npfctl_add_rule(curgr, NULL);
+
+	} else if (strncmp(p, "procedure", 9) == 0) {
+		/* Rule procedure. */
+		currp = npfctl_mk_rproc();
+		ret = npfctl_parserproc(p, currp);
 
 	} else if (strncmp(p, "table", 5) == 0) {
-		prop_dictionary_t tl;
-
 		/* Table. */
-		tl = npfctl_mk_table();
-		ret = npfctl_parsetable(p, tl);
-		if (ret)
-			return ret;
-		npfctl_add_table(tl);
+		ret = npfctl_parsetable(p);
 
 	} else if (strncmp(p, "nat", 3) == 0 || strncmp(p, "rdr", 3) == 0 ||
 	    strncmp(p, "binat", 5) == 0) {
-		prop_dictionary_t nat;
-
 		/* NAT policy. */
-		nat = npfctl_mk_nat();
-		ret = npfctl_parse_nat(p, nat);
-		if (ret)
-			return ret;
-		npfctl_add_nat(nat);
+		ret = npfctl_parse_nat(p);
 
 	} else {
 		/* Defined variable or syntax error. */
--- a/usr.sbin/npf/npfctl/npfctl.8	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.8	Tue Jan 18 20:33:45 2011 +0000
@@ -1,6 +1,6 @@
-.\"	$NetBSD: npfctl.8,v 1.3 2010/09/14 11:04:57 kim Exp $
+.\"	$NetBSD: npfctl.8,v 1.4 2011/01/18 20:33:45 rmind Exp $
 .\"
-.\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+.\" Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
 .\" All rights reserved.
 .\"
 .\" This material is based upon work partially supported by The
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd August 22, 2010
+.Dd January 18, 2011
 .Dt NPFCTL 8
 .Os
 .Sh NAME
@@ -52,20 +52,27 @@
 .Bl -tag -width reload
 .It start
 Enable packet inspection using the currently loaded configuration, if any.
-Note that this command does not load or reload the configuration.
+Note that this command does not load or reload the configuration,
+or affect existing sessions.
 .It stop
 Disable packet inspection.
-This command does not change the currently loaded configuration.
+This command does not change the currently loaded configuration,
+or affect existing sessions.
 .It reload Op Ar path
 Load or reload configuration from file.
 The configuration file at
 .Pa /etc/npf.conf
 will be used unless a file is specified by
 .Ar path .
-The reload operation (i.e., replacing the ruleset) is atomic.
+All sessions will be preserved during the reload, except those which
+will lose NAT policy due to removal.
+NAT policy is determined by the translation type and address.
+Note that change of filter criteria will not expire associated sessions.
+The reload operation (i.e., replacing the ruleset, NAT policies and tables)
+is atomic.
 .It flush
 Flush configuration.
-That is, remove all rules and tables.
+That is, remove all rules, tables and expire all sessions.
 This command does not disable packet inspection.
 .It table Ar tid
 List all entries in the currently loaded table specified by
@@ -84,6 +91,22 @@
 .Ar tid ,
 add or remove the IPv4 CIDR specified by
 .Aq Ar addr/mask .
+.It sess-save
+Save all active sessions.
+The data will be stored in the
+.Pa /var/db/npf_sessions.db
+file.
+Administrator may want to stop the packet inspection before the
+session saving.
+.It sess-load
+Load saved sessions from the file.
+Note that original configuration should be loaded before the session loading.
+In a case of NAT policy changes, sessions which lose an associated policy
+will not be loaded.
+Any existing sessions during the load operation will be expired.
+Administrator may want to start packet inspection after the session loading.
+.It stats
+Print various statistics.
 .El
 .\" -----
 .Sh PERFORMANCE
--- a/usr.sbin/npf/npfctl/npfctl.c	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.c	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npfctl.c,v 1.3 2010/12/18 01:07:26 rmind Exp $	*/
+/*	$NetBSD: npfctl.c,v 1.4 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: npfctl.c,v 1.3 2010/12/18 01:07:26 rmind Exp $");
+__RCSID("$NetBSD: npfctl.c,v 1.4 2011/01/18 20:33:45 rmind Exp $");
 
 #include <sys/ioctl.h>
 #include <sys/stat.h>
@@ -81,8 +81,7 @@
 
 	p = malloc(sz);
 	if (p == NULL) {
-		perror("zalloc");
-		exit(EXIT_FAILURE);
+		err(EXIT_FAILURE, "zalloc");
 	}
 	memset(p, 0, sz);
 	return p;
@@ -95,8 +94,7 @@
 
 	p = strdup(s);
 	if (p == NULL) {
-		perror("xstrdup");
-		exit(EXIT_FAILURE);
+		err(EXIT_FAILURE, "xstrdup");
 	}
 	return p;
 }
@@ -132,7 +130,7 @@
 
 	fp = fopen(cfg, "r");
 	if (fp == NULL) {
-		err(EXIT_FAILURE, "fopen");
+		err(EXIT_FAILURE, "open '%s'", cfg);
 	}
 	l = 0;
 	buf = NULL;
@@ -183,9 +181,15 @@
 	    st[NPF_STAT_INVALID_STATE_TCP3]);
 
 	printf("Packet race cases:\n\t%"PRIu64" NAT association race\n\t"
-	    "%"PRIu64" duplicate session race\n", st[NPF_STAT_RACE_NAT],
+	    "%"PRIu64" duplicate session race\n\n", st[NPF_STAT_RACE_NAT],
 	    st[NPF_STAT_RACE_SESSION]);
 
+	printf("Rule processing procedure cases:\n"
+	    "\t%"PRIu64" packets logged\n\t%"PRIu64" packets normalized\n\n",
+	    st[NPF_STAT_RPROC_LOG], st[NPF_STAT_RPROC_NORM]);
+
+	printf("Unexpected error cases:\n\t%"PRIu64"\n", st[NPF_STAT_ERROR]);
+
 	free(st);
 	return 0;
 }
@@ -197,17 +201,16 @@
 	npf_ioctl_table_t tbl;
 	char *arg;
 
-#ifndef DEBUG
 	fd = open(NPF_DEV_PATH, O_RDONLY);
 	if (fd == -1) {
-		err(EXIT_FAILURE, "cannot open " NPF_DEV_PATH);
+		err(EXIT_FAILURE, "cannot open '%s'", NPF_DEV_PATH);
 	}
 	ret = ioctl(fd, IOC_NPF_VERSION, &ver);
 	if (ver != NPF_VERSION) {
-		errx(EXIT_FAILURE, "incompatible npf interface version "
-		    "(%d, kernel %d)", NPF_VERSION, ver);
+		errx(EXIT_FAILURE,
+		    "incompatible NPF interface version (%d, kernel %d)",
+		    NPF_VERSION, ver);
 	}
-#endif
 	switch (action) {
 	case NPFCTL_START:
 		boolval = true;
@@ -219,10 +222,6 @@
 		break;
 	case NPFCTL_RELOAD:
 		npfctl_init_data();
-#ifdef DEBUG
-		npfctl_parsecfg("npf.conf");
-		return npfctl_ioctl_send(0);
-#endif
 		npfctl_parsecfg(argc < 3 ? NPF_CONF_PATH : argv[2]);
 		ret = npfctl_ioctl_send(fd);
 		break;
@@ -230,6 +229,10 @@
 		/* Pass empty configuration to flush. */
 		npfctl_init_data();
 		ret = npfctl_ioctl_send(fd);
+		if (ret) {
+			break;
+		}
+		ret = npfctl_ioctl_flushse(fd);
 		break;
 	case NPFCTL_TABLE:
 		if (argc < 5) {
@@ -237,12 +240,15 @@
 		}
 		tbl.nct_tid = atoi(argv[2]);
 		if (strcmp(argv[3], "add") == 0) {
+			/* Add table entry. */
 			tbl.nct_action = NPF_IOCTL_TBLENT_ADD;
 			arg = argv[4];
 		} else if (strcmp(argv[3], "rem") == 0) {
+			/* Remove entry. */
 			tbl.nct_action = NPF_IOCTL_TBLENT_REM;
 			arg = argv[4];
 		} else {
+			/* Default: lookup. */
 			tbl.nct_action = 0;
 			arg = argv[3];
 		}
@@ -279,6 +285,14 @@
 	}
 	cmd = argv[1];
 
+#ifdef _NPF_TESTING
+	/* Special testing case. */
+	npfctl_init_data();
+	npfctl_parsecfg("npf.conf");
+	npfctl_ioctl_send(0);
+	return 0;
+#endif
+
 	/* Find and call the subroutine */
 	for (n = 0; operations[n].cmd != NULL; n++) {
 		if (strcmp(cmd, operations[n].cmd) != 0)
--- a/usr.sbin/npf/npfctl/npfctl.h	Tue Jan 18 20:32:53 2011 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.h	Tue Jan 18 20:33:45 2011 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npfctl.h,v 1.4 2010/12/18 01:07:26 rmind Exp $	*/
+/*	$NetBSD: npfctl.h,v 1.5 2011/01/18 20:33:45 rmind Exp $	*/
 
 /*-
- * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2011 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -75,27 +75,27 @@
 int		npfctl_ioctl_send(int);
 int		npfctl_ioctl_recvse(int);
 int		npfctl_ioctl_sendse(int);
+int		npfctl_ioctl_flushse(int);
 
+struct ifaddrs *npfctl_getif(char *, unsigned int *, bool);
 bool		npfctl_parse_v4mask(char *, in_addr_t *, in_addr_t *);
 
-prop_dictionary_t npfctl_mk_rule(bool);
-void		npfctl_add_rule(prop_dictionary_t, prop_dictionary_t);
-void		npfctl_rule_setattr(prop_dictionary_t, int, char *,
-		    char *, bool, int, int, bool);
+prop_dictionary_t npfctl_mk_rule(bool, prop_dictionary_t);
+void		npfctl_rule_setattr(prop_dictionary_t, int, u_int);
 void		npfctl_rule_protodata(prop_dictionary_t, char *, char *,
 		    int, int, var_t *, var_t *, var_t *, var_t *);
 void		npfctl_rule_icmpdata(prop_dictionary_t, var_t *, var_t *);
 
 prop_dictionary_t npfctl_lookup_table(char *);
-prop_dictionary_t npfctl_mk_table(void);
-void		npfctl_table_setup(prop_dictionary_t, char *, char *);
-void		npfctl_construct_table(prop_dictionary_t, char *);
-void		npfctl_add_table(prop_dictionary_t);
+prop_dictionary_t npfctl_construct_table(int, int);
+void		npfctl_fill_table(prop_dictionary_t, char *);
+
+prop_dictionary_t npfctl_mk_rproc(void);
+bool		npfctl_find_rproc(prop_dictionary_t, char *);
 
 prop_dictionary_t npfctl_mk_nat(void);
-void		npfctl_add_nat(prop_dictionary_t);
 void		npfctl_nat_setup(prop_dictionary_t, int, int,
-		    char *, char *, char *);
+		    u_int, char *, char *);
 
 size_t		npfctl_calc_ncsize(int []);
 size_t		npfctl_failure_offset(int []);