NPF checkpoint: trunk
authorrmind <rmind@NetBSD.org>
Thu, 16 Sep 2010 04:53:27 +0000
branchtrunk
changeset 193622 eb4b31cbe55c
parent 193621 7e9b20cc166b
child 193623 4f1f8953c2e0
NPF checkpoint: - Add support for bi-directional NAT and redirection / port forwarding. - Finish filtering on ICMP type/code and add filtering on TCP flags. - Add support for TCP reset (RST) or ICMP destination unreachable on block. - Fix a bunch of bugs; misc cleanup.
share/man/man9/npf_ncode.9
sys/modules/npf/Makefile
sys/net/npf/files.npf
sys/net/npf/npf.h
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_ctl.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_instr.c
sys/net/npf/npf_mbuf.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_ncode.h
sys/net/npf/npf_processor.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_sendpkt.c
sys/net/npf/npf_session.c
usr.sbin/npf/npfctl/npf.conf.5
usr.sbin/npf/npfctl/npf_data.c
usr.sbin/npf/npfctl/npf_ncgen.c
usr.sbin/npf/npfctl/npf_parser.c
usr.sbin/npf/npfctl/npfctl.h
--- a/share/man/man9/npf_ncode.9	Thu Sep 16 02:38:50 2010 +0000
+++ b/share/man/man9/npf_ncode.9	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-.\"	$NetBSD: npf_ncode.9,v 1.3 2010/08/24 23:55:05 rmind Exp $
+.\"	$NetBSD: npf_ncode.9,v 1.4 2010/09/16 04:53:27 rmind Exp $
 .\"
 .\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd August 22, 2010
+.Dd September 16, 2010
 .Dt NPF_NCODE 9
 .Os
 .Sh NAME
@@ -220,12 +220,16 @@
 Value of the first argument indicates whether source (if 0x1) or
 destination (if 0x0) address should be matched.
 .\" -
-.It Sy 0x92 NPF_OPCODE_ICMP4 <type> <code>
-Match ICMP type and code of the packet, unless a value of ~0 (all bits set)
-is passed, which indicates that comparison should not be performed.
+.It Sy 0x92 NPF_OPCODE_ICMP4 <type/code>
+Match that packet is ICMP and compare type and code values, if required.
+Highest 32nd and 31st bits indicate whether the type and code values,
+accordingly, should be compared.
+If comparison is required, the type and code values are represented by
+lower 16 bits.
+The higher 8 bits represent type, and the lower 8 bits code number.
 .\" -
 .It Sy 0xa0 NPF_OPCODE_TCP_PORT	<s/d>, <port range>
-Match the source or destination port with a specified port range.
+Match the TCP source or destination port with a specified port range.
 The higher 16 bits of the second argument represent the "from" and
 the lower 16 bits represent the "to" values of the range.
 The 32-bit port range value is in host byte order, however the actual
@@ -234,13 +238,14 @@
 destination (if 0x0) port should be matched.
 .\" -
 .It Sy 0xa1 NPF_OPCODE_UDP_PORT <s/d>, <port range>
-Match the source or destination port with a specified port range.
-The higher 16 bits of the second argument represent the "from" and
-the lower 16 bits represent the "to" values of range.
-The 32-bit port range value is in host byte order, however the actual
-"from" and "to" values should be in network byte order.
-The value of the first argument indicates whether source (if 0x1) or
-destination (if 0x0) port should be matched.
+Equivalent of
+.Dv NPF_OPCODE_TCP_PORT ,
+but for UDP protocol.
+.\" -
+.It Sy 0xa2 NPF_OPCODE_TCP_FLAGS <fl/mask>
+Match the TCP flags with the a specified flags and mask,
+represented by the lower 16 bits.
+The higher 8 bits represent flags and the lower 8 bits mask to apply.
 .El
 .\" -----
 .Sh CODE REFERENCES
--- a/sys/modules/npf/Makefile	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/modules/npf/Makefile	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.1 2010/08/22 18:56:22 rmind Exp $
+# $NetBSD: Makefile,v 1.2 2010/09/16 04:53:27 rmind Exp $
 
 .include "../Makefile.inc"
 
@@ -8,6 +8,6 @@
 
 SRCS=		npf.c npf_ctl.c npf_handler.c npf_instr.c npf_mbuf.c
 SRCS+=		npf_processor.c npf_ruleset.c npf_tableset.c npf_inet.c
-SRCS+=		npf_session.c npf_nat.c npf_alg.c
+SRCS+=		npf_session.c npf_nat.c npf_sendpkt.c npf_alg.c
 
 .include <bsd.kmodule.mk>
--- a/sys/net/npf/files.npf	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/files.npf	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.1 2010/08/22 18:56:22 rmind Exp $
+# $NetBSD: files.npf,v 1.2 2010/09/16 04:53:27 rmind Exp $
 #
 # Public Domain.
 #
@@ -9,6 +9,7 @@
 
 defpseudo	npf:	ifnet
 
+# Core
 file	net/npf/npf.c				npf
 file	net/npf/npf_ctl.c			npf
 file	net/npf/npf_handler.c			npf
@@ -21,3 +22,7 @@
 file	net/npf/npf_session.c			npf
 file	net/npf/npf_nat.c			npf
 file	net/npf/npf_alg.c			npf
+file	net/npf/npf_sendpkt.c			npf
+
+# ALGs
+file	net/npf/npf_alg_icmp.c			npf
--- a/sys/net/npf/npf.h	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf.h	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -81,7 +81,6 @@
 typedef struct {
 	uint32_t		npc_info;
 	int			npc_dir;
-	uint8_t			npc_elen;
 	/* NPC_IP46 */
 	uint8_t			npc_proto;
 	uint16_t		npc_hlen;
@@ -127,7 +126,7 @@
 		    void (*)(const npf_cache_t *, void *), void *);
 void		npf_hook_unregister(npf_rule_t *, npf_hook_t *);
 
-#endif
+#endif	/* _KERNEL */
 
 /* Rule attributes. */
 #define	NPF_RULE_PASS			0x0001
@@ -136,11 +135,20 @@
 #define	NPF_RULE_LOG			0x0008
 #define	NPF_RULE_DEFAULT		0x0010
 #define	NPF_RULE_KEEPSTATE		0x0020
+#define	NPF_RULE_RETRST			0x0040
+#define	NPF_RULE_RETICMP		0x0080
 
 #define	NPF_RULE_IN			0x1000
 #define	NPF_RULE_OUT			0x2000
 #define	NPF_RULE_DIMASK			0x3000
 
+/* Address translation types and flags. */
+#define	NPF_NATIN			1
+#define	NPF_NATOUT			2
+
+#define	NPF_NAT_PORTS			0x01
+#define	NPF_NAT_PORTMAP			0x02
+
 /* Table types. */
 #define	NPF_TABLE_HASH			1
 #define	NPF_TABLE_RBTREE		2
@@ -176,4 +184,4 @@
 #define	IOC_NPF_RELOAD		_IOW('N', 102, struct plistref)
 #define	IOC_NPF_TABLE		_IOW('N', 103, struct npf_ioctl_table)
 
-#endif
+#endif	/* _NPF_H_ */
--- a/sys/net/npf/npf_alg_icmp.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -228,11 +228,6 @@
 
 	/* Advance to ICMP header. */
 	n_ptr = nbuf_dataptr(nbuf);
-#ifdef _NPF_TESTING
-	if (npc->npc_elen && /* XXX */
-	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
-		return false;
-#endif
 	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen)) == NULL) {
 		return false;
 	}
@@ -317,7 +312,7 @@
 	in_addr_t addr;
 	in_port_t port;
 
-	npf_nat_getlocal(nt, &addr, &port);
+	npf_nat_getorig(nt, &addr, &port);
 
 	if (!npf_rwrip(&enpc, nbuf, n_ptr, PFIL_OUT, addr)) {
 		return false;
--- a/sys/net/npf/npf_ctl.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_ctl.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ctl.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -41,7 +41,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -328,7 +328,9 @@
 		prop_object_t obj;
 		npf_natpolicy_t *np;
 		npf_rule_t *rl;
-		in_addr_t gip;
+		in_addr_t taddr;
+		in_port_t tport;
+		int type, flags;
 
 		/* NAT policy - dictionary. */
 		if (prop_object_type(natdict) != PROP_TYPE_DICTIONARY) {
@@ -336,9 +338,21 @@
 			break;
 		}
 
-		/* Gateway IP. */
-		obj = prop_dictionary_get(natdict, "gateway_ip");
-		gip = (in_addr_t)prop_number_integer_value(obj);
+		/* Translation type. */
+		obj = prop_dictionary_get(natdict, "type");
+		type = prop_number_integer_value(obj);
+
+		/* Translation type. */
+		obj = prop_dictionary_get(natdict, "flags");
+		flags = prop_number_integer_value(obj);
+
+		/* Translation IP. */
+		obj = prop_dictionary_get(natdict, "translation_ip");
+		taddr = (in_addr_t)prop_number_integer_value(obj);
+
+		/* Translation port (for redirect case). */
+		obj = prop_dictionary_get(natdict, "translation_port");
+		tport = (in_addr_t)prop_number_integer_value(obj);
 
 		/*
 		 * NAT policies are standard rules, plus additional
@@ -349,7 +363,7 @@
 			break;
 
 		/* Allocate a new NAT policy and assign to the rule. */
-		np = npf_nat_newpolicy(gip);
+		np = npf_nat_newpolicy(type, flags, taddr, tport);
 		if (np == NULL) {
 			error = ENOMEM;
 			break;
@@ -402,7 +416,7 @@
 
 	/* NAT policies. */
 	nset = npf_ruleset_create();
-	natlist = prop_dictionary_get(dict, "nat");
+	natlist = prop_dictionary_get(dict, "translation");
 	error = npf_mk_natlist(nset, natlist);
 	if (error)
 		goto fail;
@@ -455,7 +469,7 @@
 }
 
 /*
- * npf_table_ctl: add, remove or query entries in the specified table.
+ * npfctl_table: add, remove or query entries in the specified table.
  *
  * For maximum performance, interface is avoiding proplib(3)'s overhead.
  */
--- a/sys/net/npf/npf_handler.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_handler.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_handler.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_handler.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -56,6 +56,8 @@
 static struct pfil_head *	npf_ph_if = NULL;
 static struct pfil_head *	npf_ph_inet = NULL;
 
+static bool			default_pass = true;
+
 int	npf_packet_handler(void *, struct mbuf **, struct ifnet *, int);
 
 /*
@@ -69,59 +71,67 @@
 }
 
 /*
- * npf_packet_handler: main packet handling routine.
+ * npf_packet_handler: main packet handling routine for layer 3.
  *
  * Note: packet flow and inspection logic is in strict order.
  */
 int
 npf_packet_handler(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
 {
-	const int layer = (const int)(long)arg;
 	nbuf_t *nbuf = *mp;
 	npf_cache_t npc;
 	npf_session_t *se;
 	npf_rule_t *rl;
-	int error;
+	bool keepstate;
+	int retfl, error;
 
 	/*
 	 * Initialise packet information cache.
 	 * Note: it is enough to clear the info bits.
 	 */
 	npc.npc_info = 0;
+	error = 0;
+	retfl = 0;
 
 	/* Inspect the list of sessions. */
-	se = npf_session_inspect(&npc, nbuf, ifp, di, layer);
+	se = npf_session_inspect(&npc, nbuf, ifp, di);
+
+	/* If "passing" session found - skip the ruleset inspection. */
+	if (se && npf_session_pass(se)) {
+		goto pass;
+	}
 
-	/* Inbound NAT. */
-	if ((di & PFIL_IN) && (error = npf_natin(&npc, se, nbuf, layer)) != 0) {
+	/* Inspect the ruleset using this packet. */
+	rl = npf_ruleset_inspect(&npc, nbuf, ifp, di, NPF_LAYER_3);
+	if (rl == NULL) {
+		if (default_pass) {
+			goto pass;
+		}
+		error = ENETUNREACH;
 		goto out;
 	}
 
-	/* If session found - we pass this packet. */
-	if (se && npf_session_pass(se)) {
-		error = 0;
-	} else {
-		/* Inspect ruleset using this packet. */
-		rl = npf_ruleset_inspect(&npc, nbuf, ifp, di, layer);
-		if (rl != NULL) {
-			bool keepstate;
-			/* Apply the rule. */
-			error = npf_rule_apply(&npc, rl, &keepstate);
-			if (error) {
-				goto out;
-			}
-			/* Establish a session, if required. */
-			if (keepstate) {
-				se = npf_session_establish(&npc, NULL, di);
-			}
-		}
-		/* No rules or "default" rule - pass. */
+	/* Apply the rule. */
+	error = npf_rule_apply(&npc, rl, &keepstate, &retfl);
+	if (error) {
+		goto out;
 	}
 
-	/* Outbound NAT. */
-	if (di & PFIL_OUT) {
-		error = npf_natout(&npc, se, nbuf, ifp, layer);
+	/* Establish a "pass" session, if required. */
+	if (keepstate && !se) {
+		se = npf_session_establish(&npc, NULL, di);
+		if (se == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		npf_session_setpass(se);
 	}
+pass:
+	KASSERT(error == 0);
+	/*
+	 * Perform NAT.
+	 */
+	error = npf_do_nat(&npc, se, nbuf, ifp, di);
 out:
 	/* Release reference on session. */
 	if (se != NULL) {
@@ -130,9 +140,16 @@
 
 	/*
 	 * If error is set - drop the packet.
-	 * Normally, ENETUNREACH is used to "block".
+	 * Normally, ENETUNREACH is used for "block".
 	 */
 	if (error) {
+		/*
+		 * Depending on flags and protocol, return TCP reset (RST)
+		 * or ICMP destination unreachable
+		 */
+		if (retfl) {
+			npf_return_block(&npc, nbuf, retfl);
+		}
 		m_freem(*mp);
 		*mp = NULL;
 	}
@@ -171,7 +188,7 @@
 	KASSERT(error == 0);
 
 	/* Packet IN/OUT handler on all interfaces and IP layer. */
-	error = pfil_add_hook(npf_packet_handler, (void *)NPF_LAYER_3,
+	error = pfil_add_hook(npf_packet_handler, NULL,
 	    PFIL_WAITOK | PFIL_ALL, npf_ph_inet);
 	KASSERT(error == 0);
 
@@ -193,7 +210,7 @@
 	KERNEL_LOCK(1, NULL);
 
 	if (npf_ph_if) {
-		(void)pfil_remove_hook(npf_packet_handler, (void *)NPF_LAYER_3,
+		(void)pfil_remove_hook(npf_packet_handler, NULL,
 		    PFIL_ALL, npf_ph_inet);
 		(void)pfil_remove_hook(npf_ifhook, NULL,
 		    PFIL_IFADDR | PFIL_IFNET, npf_ph_if);
--- a/sys/net/npf/npf_impl.h	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_impl.h	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -98,8 +98,9 @@
 bool		npf_ip4_proto(npf_cache_t *, nbuf_t *, void *);
 bool		npf_fetch_ip4addrs(npf_cache_t *, nbuf_t *, void *);
 bool		npf_fetch_ports(npf_cache_t *, nbuf_t *, void *, const int);
+bool		npf_fetch_tcpfl(npf_cache_t *, nbuf_t *, void *);
 bool		npf_fetch_icmp(npf_cache_t *, nbuf_t *, void *);
-bool		npf_cache_all_ip4(npf_cache_t *, nbuf_t *, const int);
+bool		npf_cache_all(npf_cache_t *, nbuf_t *);
 
 bool		npf_rwrport(npf_cache_t *, nbuf_t *, void *, const int,
 		    in_port_t, in_addr_t);
@@ -108,6 +109,8 @@
 uint16_t	npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
 uint16_t	npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
 
+void		npf_return_block(npf_cache_t *, nbuf_t *, const int);
+
 /* Complex instructions. */
 int		npf_match_ether(nbuf_t *, int, int, uint16_t, uint32_t *);
 int		npf_match_ip4table(npf_cache_t *, nbuf_t *, void *,
@@ -118,8 +121,8 @@
 		    const int, const uint32_t);
 int		npf_match_udp_ports(npf_cache_t *, nbuf_t *, void *,
 		    const int, const uint32_t);
-int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *,
-		    const int, const int);
+int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, const uint32_t);
+int		npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, const uint32_t);
 
 /* Tableset interface. */
 int		npf_tableset_sysinit(void);
@@ -157,7 +160,7 @@
 		    struct ifnet *, const int, const int);
 npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
 		    struct ifnet *, const int, const int);
-int		npf_rule_apply(const npf_cache_t *, npf_rule_t *, bool *);
+int		npf_rule_apply(const npf_cache_t *, npf_rule_t *, bool *, int *);
 npf_ruleset_t *	npf_rule_subset(npf_rule_t *);
 
 npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
@@ -169,31 +172,27 @@
 int		npf_session_tracking(bool);
 
 npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *,
-		    struct ifnet *, const int, const int);
+		    struct ifnet *, const int);
 npf_session_t *	npf_session_establish(const npf_cache_t *,
 		    npf_nat_t *, const int);
 void		npf_session_release(npf_session_t *);
 bool		npf_session_pass(const npf_session_t *);
-
-npf_nat_t *	npf_session_retnat(const npf_session_t *);
-
+void		npf_session_setpass(npf_session_t *);
 void		npf_session_link(npf_session_t *, npf_session_t *);
-npf_nat_t *	npf_session_retlinknat(const npf_session_t *);
+npf_nat_t *	npf_session_retnat(npf_session_t *, const int, bool *);
 
 /* NAT. */
 void		npf_nat_sysinit(void);
 void		npf_nat_sysfini(void);
-npf_natpolicy_t *npf_nat_newpolicy(in_addr_t);
+npf_natpolicy_t *npf_nat_newpolicy(int, int, in_addr_t, in_port_t);
 void		npf_nat_freepolicy(npf_natpolicy_t *);
 void		npf_nat_flush(void);
 void		npf_nat_reload(npf_ruleset_t *);
 
-int		npf_natout(npf_cache_t *, npf_session_t *, nbuf_t *,
+int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *,
 		    struct ifnet *, const int);
-int		npf_natin(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
-
 void		npf_nat_expire(npf_nat_t *);
-void		npf_nat_getlocal(npf_nat_t *, in_addr_t *, in_port_t *);
+void		npf_nat_getorig(npf_nat_t *, in_addr_t *, in_port_t *);
 void		npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
 
 /* ALG interface. */
--- a/sys/net/npf/npf_inet.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_inet.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_inet.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -242,7 +242,10 @@
 	return true;
 }
 
-static inline bool
+/*
+ * npf_fetch_tcpfl: fetch TCP flags and store into the cache.
+ */
+bool
 npf_fetch_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
 {
 	u_int offby;
@@ -257,24 +260,13 @@
 }
 
 /*
- * npf_cache_all_ip4: general routine to cache all relevant IPv4 and
+ * npf_cache_all: general routine to cache all relevant IPv4 and
  * TCP, UDP or ICMP data.
  */
 bool
-npf_cache_all_ip4(npf_cache_t *npc, nbuf_t *nbuf, const int layer)
+npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf)
 {
 	void *n_ptr = nbuf_dataptr(nbuf);
-	u_int offby;
-
-	if (layer == NPF_LAYER_2) {
-		/* Ethernet: match if ETHERTYPE_IP and if so - advance. */
-		if (npf_match_ether(nbuf, 1, 0, ETHERTYPE_IP, &offby))
-			return false;
-		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
-			return false;
-		/* Cache Ethernet header length. XXX */
-		npc->npc_elen = offby;
-	}
 
 	/* IPv4: get protocol, source and destination addresses. */
 	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
--- a/sys/net/npf/npf_instr.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_instr.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_instr.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_instr.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -181,8 +181,7 @@
  * npf_match_icmp4: match ICMPv4 packet.
  */
 int
-npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
-    const int type, const int code)
+npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t tc)
 {
 
 	if (!npf_iscached(npc, NPC_ICMP)) {
@@ -200,10 +199,35 @@
 		}
 		KASSERT(npf_iscached(npc, NPC_ICMP));
 	}
-	/* Match, if required. */
-	if (type != ~0 && type != npc->npc_icmp_type)
-		return -1;
-	if (code != ~0 && code != npc->npc_icmp_code)
-		return -1;
+	/* Match code/type, if required. */
+	if ((1 << 31) & tc) {
+		const uint8_t type = (tc >> 8) & 0xff;
+		if (type != npc->npc_icmp_type) {
+			return -1;
+		}
+	}
+	if ((1 << 30) & tc) {
+		const uint8_t code = tc & 0xff;
+		if (code != npc->npc_icmp_code) {
+			return -1;
+		}
+	}
 	return 0;
 }
+
+/*
+ * npf_match_tcpfl: match TCP flags.
+ */
+int
+npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t fl)
+{
+	const uint8_t tcpfl = (fl >> 8) & 0xff, mask = fl & 0xff;
+
+	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
+		return -1;
+	}
+	if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
+		return -1;
+	}
+	return ((npc->npc_tcp_flags & mask) == tcpfl) ? 0 : -1;
+}
--- a/sys/net/npf/npf_mbuf.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_mbuf.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_mbuf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_mbuf.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 #endif
 
 #include <sys/param.h>
@@ -132,10 +132,11 @@
 		l = m->m_len - off;
 		KASSERT(l < len);
 		len -= l;
-		if (wr) {
+		if (wr == NBUF_DATA_WRITE) {
 			while (l--)
 				*d++ = *b++;
 		} else {
+			KASSERT(wr == NBUF_DATA_READ);
 			while (l--)
 				*b++ = *d++;
 		}
@@ -158,10 +159,11 @@
 	KASSERT(len <= m->m_len);
 
 	/* Non-overlapping case: fetch the actual data. */
-	if (wr) {
+	if (wr == NBUF_DATA_WRITE) {
 		while (len--)
 			*d++ = *b++;
 	} else {
+		KASSERT(wr == NBUF_DATA_READ);
 		while (len--)
 			*b++ = *d++;
 	}
--- a/sys/net/npf/npf_nat.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_nat.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -39,10 +39,26 @@
  *	NAT module has a separate ruleset, where rules contain associated
  *	NAT policy, thus flexible filter criteria can be used.
  *
+ * Translation types
+ *
+ *	There are two types of translation: outbound (NPF_NATOUT) and
+ *	inbound (NPF_NATIN).  It should not be confused with connection
+ *	direction.
+ *
+ *	Outbound NAT rewrites:
+ *	- Source on "forwards" stream.
+ *	- Destination on "backwards" stream.
+ *	Inbound NAT rewrites:
+ *	- Destination on "forwards" stream.
+ *	- Source on "backwards" stream.
+ *
+ *	It should be noted that bi-directional NAT is a combined outbound
+ *	and inbound translation, therefore constructed as two policies.
+ *
  * NAT policies and port maps
  *
- *	NAT policy is applied when a packet matches the rule.  Apart from
- *	filter criteria, NAT policy has a translation (gateway) IP address
+ *	NAT (translation) policy is applied when a packet matches the rule.
+ *	Apart from filter criteria, NAT policy has a translation IP address
  *	and associated port map.  Port map is a bitmap used to reserve and
  *	use unique TCP/UDP ports for translation.  Port maps are unique to
  *	the IP addresses, therefore multiple NAT policies with the same IP
@@ -51,7 +67,7 @@
  * NAT sessions and translation entries
  *
  *	NAT module relies on session management module.  Each "NAT" session
- *	has an associated translation entry (npf_nat_t).  It contains local
+ *	has an associated translation entry (npf_nat_t).  It contains saved
  *	i.e. original IP address with port and translation port, allocated
  *	from the port map.  Each NAT translation entry is associated with
  *	the policy, which contains translation IP address.  Allocated port
@@ -61,7 +77,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -94,26 +110,29 @@
 /* NAT policy structure. */
 struct npf_natpolicy {
 	LIST_ENTRY(npf_natpolicy)	n_entry;
-	in_addr_t			n_gw_ip;
+	int				n_type;
+	int				n_flags;
+	in_addr_t			n_taddr;
+	in_port_t			n_tport;
 	npf_portmap_t *			n_portmap;
 };
 
 /* NAT translation entry for a session. */ 
 struct npf_nat {
 	npf_natpolicy_t *		nt_natpolicy;
-	/* Local address and port (for backwards translation). */
-	in_addr_t			nt_laddr;
-	in_port_t			nt_lport;
-	/* Translation port (for forwards). */
+	/* Original address and port (for backwards translation). */
+	in_addr_t			nt_oaddr;
+	in_port_t			nt_oport;
+	/* Translation port (for redirects). */
 	in_port_t			nt_tport;
 	/* ALG (if any) associated with this NAT entry. */
 	npf_alg_t *			nt_alg;
 	uintptr_t			nt_alg_arg;
 };
 
-static npf_ruleset_t *			nat_ruleset;
-static LIST_HEAD(, npf_natpolicy)	nat_policy_list;
-static pool_cache_t			nat_cache;
+static npf_ruleset_t *			nat_ruleset	__read_mostly;
+static LIST_HEAD(, npf_natpolicy)	nat_policy_list	__read_mostly;
+static pool_cache_t			nat_cache	__read_mostly;
 
 /*
  * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
@@ -141,13 +160,13 @@
 }
 
 /*
- * npf_nat_newpolicy: allocate a new NAT policy.
+ * npf_nat_newpolicy: create a new NAT policy.
  *
  * => Shares portmap if policy is on existing translation address.
  * => XXX: serialise at upper layer.
  */
 npf_natpolicy_t *
-npf_nat_newpolicy(in_addr_t gip)
+npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
 {
 	npf_natpolicy_t *np, *it;
 	npf_portmap_t *pm;
@@ -156,12 +175,20 @@
 	if (np == NULL) {
 		return NULL;
 	}
-	np->n_gw_ip = gip;
+	KASSERT(type == NPF_NATIN || type == NPF_NATOUT);
+	np->n_type = type;
+	np->n_flags = flags;
+	np->n_taddr = taddr;
+	np->n_tport = tport;
+
+	pm = NULL;
+	if ((flags & NPF_NAT_PORTMAP) == 0) {
+		goto nopm;
+	}
 
 	/* Search for a NAT policy using the same translation address. */
-	pm = NULL;
 	LIST_FOREACH(it, &nat_policy_list, n_entry) {
-		if (it->n_gw_ip != np->n_gw_ip)
+		if (it->n_taddr != np->n_taddr)
 			continue;
 		pm = it->n_portmap;
 		break;
@@ -180,6 +207,7 @@
 		/* Share the port map. */
 		pm->p_refcnt++;
 	}
+nopm:
 	np->n_portmap = pm;
 	/*
 	 * Note: old policies with new might co-exist in the list,
@@ -200,7 +228,8 @@
 	npf_portmap_t *pm = np->n_portmap;
 
 	LIST_REMOVE(np, n_entry);
-	if (--pm->p_refcnt == 0) {
+	if (pm && --pm->p_refcnt == 0) {
+		KASSERT((np->n_flags & NPF_NAT_PORTMAP) != 0);
 		kmem_free(pm, sizeof(npf_portmap_t) +
 		    (PORTMAP_SIZE * sizeof(uint32_t)));
 	}
@@ -245,7 +274,7 @@
 				/* No space. */
 				return 0;
 			}
-			/* This bitmap is sfilled, next. */
+			/* This bitmap is filled, next. */
 			idx = (idx ? idx : PORTMAP_SIZE) - 1;
 			continue;
 		}
@@ -282,115 +311,219 @@
 }
 
 /*
- * npf_natout:
- *	- Inspect packet for a NAT policy, unless session with NAT
- *	  association already exists.
- *	- Perform "forwards" translation: rewrite source address, etc.
- *	- Establish sessions or if already exists, associate NAT policy.
+ * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
+ */
+static npf_natpolicy_t *
+npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, struct ifnet *ifp, const int di)
+{
+	npf_rule_t *rl;
+
+	rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, di, NPF_LAYER_3);
+
+	return rl ? npf_rule_getnat(rl) : NULL;
+}
+
+/*
+ * npf_nat_create: create a new NAT translation entry.
+ */
+static npf_nat_t *
+npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
+{
+	const int proto = npc->npc_proto;
+	npf_nat_t *nt;
+
+	/* New NAT association. */
+	nt = pool_cache_get(nat_cache, PR_NOWAIT);
+	if (nt == NULL){
+		return NULL;
+	}
+	nt->nt_natpolicy = np;
+	nt->nt_alg = NULL;
+
+	/* Save the original address which may be rewritten. */
+	if (np->n_type == NPF_NATOUT) {
+		/* Source (local) for Outbound NAT. */
+		nt->nt_oaddr = npc->npc_srcip;
+	} else {
+		/* Destination (external) for Inbound NAT. */
+		KASSERT(np->n_type == NPF_NATIN);
+		nt->nt_oaddr = npc->npc_dstip;
+	}
+
+	/*
+	 * Port translation, if required, and if it is TCP/UDP.
+	 */
+	if ((np->n_flags & NPF_NAT_PORTS) == 0 ||
+	    (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) {
+		nt->nt_oport = 0;
+		nt->nt_tport = 0;
+		return nt;
+	}
+	/* Save a relevant TCP/UDP port. */
+	KASSERT(npf_iscached(npc, NPC_PORTS));
+	if (np->n_type == NPF_NATOUT) {
+		nt->nt_oport = npc->npc_sport;
+	} else {
+		nt->nt_oport = npc->npc_dport;
+	}
+	/* Get a new port for translation. */
+	if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
+		nt->nt_tport = npf_nat_getport(np);
+	} else {
+		nt->nt_tport = np->n_tport;
+	}
+	return nt;
+}
+
+/*
+ * npf_nat_translate: perform address and/or port translation.
+ */
+static int
+npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
+    const bool forw, const int di)
+{
+	const npf_natpolicy_t *np = nt->nt_natpolicy;
+	void *n_ptr = nbuf_dataptr(nbuf);
+	in_addr_t addr;
+	in_port_t port;
+
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+
+	if (forw) {
+		/* "Forwards" stream: use translation address/port. */
+		KASSERT(
+		    (np->n_type == NPF_NATIN && di == PFIL_IN) ^
+		    (np->n_type == NPF_NATOUT && di == PFIL_OUT)
+		);
+		addr = np->n_taddr;
+		port = nt->nt_tport;
+	} else {
+		/* "Backwards" stream: use original address/port. */
+		KASSERT(
+		    (np->n_type == NPF_NATIN && di == PFIL_OUT) ^
+		    (np->n_type == NPF_NATOUT && di == PFIL_IN)
+		);
+		addr = nt->nt_oaddr;
+		port = nt->nt_oport;
+	}
+
+	/* Execute ALG hooks first. */
+	npf_alg_exec(npc, nbuf, nt, di);
+
+	/*
+	 * Address translation: rewrite source/destination address, depending
+	 * on direction (PFIL_OUT - for source, PFIL_IN - for destination).
+	 * Note: cache will be used in npf_rwrport(), update only in the end.
+	 */
+	if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) {
+		return EINVAL;
+	}
+	if ((np->n_flags & NPF_NAT_PORTS) == 0) {
+		/* Cache new address. */
+		if (di == PFIL_OUT) {
+			npc->npc_srcip = addr;
+		} else {
+			npc->npc_dstip = addr;
+		}
+		return 0;
+	}
+	switch (npc->npc_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+		/* Rewrite source/destination port. */
+		if (!npf_rwrport(npc, nbuf, n_ptr, di, port, addr)) {
+			return EINVAL;
+		}
+		break;
+	case IPPROTO_ICMP:
+		/* None. */
+		break;
+	default:
+		return ENOTSUP;
+	}
+	/* Cache new address and port. */
+	if (di == PFIL_OUT) {
+		npc->npc_srcip = addr;
+		npc->npc_sport = port;
+	} else {
+		npc->npc_dstip = addr;
+		npc->npc_dport = port;
+	}
+	return 0;
+}
+
+/*
+ * npf_do_nat:
+ *	- Inspect packet for a NAT policy, unless a session with a NAT
+ *	  association already exists.  In such case, determine whether is
+ *	  is a "forwards" or "backwards" stream.
+ *	- Perform translation: rewrite source address if "forwards" stream
+ *	  and destination address if "backwards".
+ *	- Establish sessions or, if already exists, associate a NAT policy.
  */
 int
-npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
-    struct ifnet *ifp, const int layer)
+npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
+    struct ifnet *ifp, const int di)
 {
-	const int proto = npc->npc_proto;
-	void *n_ptr = nbuf_dataptr(nbuf);
-	npf_session_t *nse = NULL; /* XXXgcc */
+	npf_session_t *nse = NULL;
 	npf_natpolicy_t *np;
 	npf_nat_t *nt;
-	npf_rule_t *rl;
-	in_addr_t gwip;
-	in_port_t tport;
 	int error;
-	bool new;
+	bool forw, new;
 
 	/* All relevant IPv4 data should be already cached. */
 	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
 		return 0;
 	}
 
-	/* Detect if there is a linked session pointing to the NAT entry. */
-	nt = se ? npf_session_retlinknat(se) : NULL;
-	if (nt) {
+	/*
+	 * Return the NAT entry associated with the session, if any.
+	 * Assumptions:
+	 * - If associated via linked session, then "forwards" stream.
+	 * - If associated directly, then "backwards" stream.
+	 */
+	if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
 		np = nt->nt_natpolicy;
 		new = false;
-		goto skip;
+		goto translate;
 	}
 
-	/* Inspect packet against NAT ruleset, return a policy. */
-	rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer);
-	np = rl ? npf_rule_getnat(rl) : NULL;
+	/* Inspect the packet for a NAT policy, if there is no session. */
+	np = npf_nat_inspect(npc, nbuf, ifp, di);
 	if (np == NULL) {
 		/* If packet does not match - done. */
 		return 0;
 	}
+	forw = true;
 
-	/* New NAT association. */
-	nt = pool_cache_get(nat_cache, PR_NOWAIT);
-	if (nt == NULL){
+	/* Create a new NAT translation entry. */
+	nt = npf_nat_create(npc, np);
+	if (nt == NULL) {
 		return ENOMEM;
 	}
-	nt->nt_natpolicy = np;
-	nt->nt_alg = NULL;
 	new = true;
 
-	/* Save local (source) address. */
-	nt->nt_laddr = npc->npc_srcip;
-
-	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
-		/* Also, save local TCP/UDP port. */
-		KASSERT(npf_iscached(npc, NPC_PORTS));
-		nt->nt_lport = npc->npc_sport;
-		/* Get a new port for translation. */
-		nt->nt_tport = npf_nat_getport(np);
-	} else {
-		nt->nt_lport = 0;
-		nt->nt_tport = 0;
-	}
-
-	/* Match any ALGs. */
-	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
-
-	/* If there is no local session, establish one before translation. */
+	/*
+	 * If there is no local session (no "keep state" rule - unusual, but
+	 * possible configuration), establish one before translation.  Note
+	 * that it is not a "pass" session, therefore passing of "backwards"
+	 * stream depends on other, stateless filtering rules.
+	 */
 	if (se == NULL) {
-		nse = npf_session_establish(npc, NULL, PFIL_OUT);
+		nse = npf_session_establish(npc, NULL, di);
 		if (nse == NULL) {
 			error = ENOMEM;
 			goto out;
 		}
 		se = nse;
-	} else {
-		nse = NULL;
 	}
-skip:
-	if (layer == NPF_LAYER_2 && /* XXX */
-	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
-		return EINVAL;
-
-	/* Execute ALG hooks first. */
-	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
-
-	gwip = np->n_gw_ip;
-	tport = nt->nt_tport;
-
-	/*
-	 * Perform translation: rewrite source address et al.
-	 * Note: cache may be used in npf_rwrport(), update only in the end.
-	 */
-	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) {
-		error = EINVAL;
+translate:
+	/* Perform the translation. */
+	error = npf_nat_translate(npc, nbuf, nt, forw, di);
+	if (error) {
 		goto out;
 	}
-	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
-		KASSERT(tport != 0);
-		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) {
-			error = EINVAL;
-			goto out;
-		}
-	}
-	/* Success: cache new address and port (if any). */
-	npc->npc_srcip = gwip;
-	npc->npc_sport = tport;
-	error = 0;
 
 	if (__predict_false(new)) {
 		npf_session_t *natse;
@@ -400,7 +533,7 @@
 		 *
 		 * Note: packet now has a translated address in the cache.
 		 */
-		natse = npf_session_establish(npc, nt, PFIL_OUT);
+		natse = npf_session_establish(npc, nt, di);
 		if (natse == NULL) {
 			error = ENOMEM;
 			goto out;
@@ -413,13 +546,12 @@
 out:
 		if (error) {
 			if (nse != NULL) {
-				/* XXX: expire local session if new? */
+				/* XXX: Expire it?? */
 			}
 			/* Will free the structure and return the port. */
 			npf_nat_expire(nt);
 		}
 		if (nse != NULL) {
-			/* Drop the reference local session was new. */
 			npf_session_release(nse);
 		}
 	}
@@ -427,72 +559,14 @@
 }
 
 /*
- * npf_natin:
- *	- Inspect packet for a session with associated NAT policy.
- *	- Perform "backwards" translation: rewrite destination address, etc.
- */
-int
-npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer)
-{
-	npf_nat_t *nt = se ? npf_session_retnat(se) : NULL;
-
-	if (nt == NULL) {
-		/* No association - no translation. */
-		return 0;
-	}
-	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
-
-	void *n_ptr = nbuf_dataptr(nbuf);
-	in_addr_t laddr = nt->nt_laddr;
-	in_port_t lport = nt->nt_lport;
-
-	if (layer == NPF_LAYER_2) {
-		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen);
-		if (n_ptr == NULL) {
-			return EINVAL;
-		}
-	}
-
-	/* Execute ALG hooks first. */
-	npf_alg_exec(npc, nbuf, nt, PFIL_IN);
-
-	/*
-	 * Address translation: rewrite destination address.
-	 * Note: cache will be used in npf_rwrport(), update only in the end.
-	 */
-	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) {
-		return EINVAL;
-	}
-	switch (npc->npc_proto) {
-	case IPPROTO_TCP:
-	case IPPROTO_UDP:
-		KASSERT(npf_iscached(npc, NPC_PORTS));
-		/* Rewrite destination port. */
-		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) {
-			return EINVAL;
-		}
-		break;
-	case IPPROTO_ICMP:
-		/* None. */
-		break;
-	default:
-		return ENOTSUP;
-	}
-	/* Cache new address and port. */
-	npc->npc_dstip = laddr;
-	npc->npc_dport = lport;
-	return 0;
-}
-
-/*
- * npf_nat_getlocal: return local IP address and port from translation entry.
+ * npf_nat_getorig: return original IP address and port from translation entry.
  */
 void
-npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
+npf_nat_getorig(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
 {
 
-	*addr = nt->nt_laddr;
-	*port = nt->nt_lport;
+	*addr = nt->nt_oaddr;
+	*port = nt->nt_oport;
 }
 
 void
@@ -509,9 +583,10 @@
 void
 npf_nat_expire(npf_nat_t *nt)
 {
+	npf_natpolicy_t *np = nt->nt_natpolicy;
 
-	if (nt->nt_tport) {
-		npf_natpolicy_t *np = nt->nt_natpolicy;
+	if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
+		KASSERT(nt->nt_tport != 0);
 		npf_nat_putport(np, nt->nt_tport);
 	}
 	pool_cache_put(nat_cache, nt);
@@ -531,14 +606,15 @@
 	}
 	LIST_FOREACH(np, &nat_policy_list, n_entry) {
 skip:
-		ip.s_addr = np->n_gw_ip;
-		printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip));
+		ip.s_addr = np->n_taddr;
+		printf("\tNAT policy: type = %d, flags = %d, taddr = %s\n",
+		    np->n_type, np->n_flags, inet_ntoa(ip));
 		if (nt == NULL) {
 			continue;
 		}
-		ip.s_addr = nt->nt_laddr;
-		printf("\tNAT: original address %s, lport %d, tport = %d\n",
-		    inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport));
+		ip.s_addr = nt->nt_oaddr;
+		printf("\tNAT: original address %s, oport %d, tport = %d\n",
+		    inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
 		if (nt->nt_alg) {
 			printf("\tNAT ALG = %p, ARG = %p\n",
 			    nt->nt_alg, (void *)nt->nt_alg_arg);
--- a/sys/net/npf/npf_ncode.h	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_ncode.h	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ncode.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_ncode.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -103,5 +103,6 @@
 
 #define	NPF_OPCODE_TCP_PORTS		0xa0
 #define	NPF_OPCODE_UDP_PORTS		0xa1
+#define	NPF_OPCODE_TCP_FLAGS		0xa2
 
 #endif
--- a/sys/net/npf/npf_processor.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_processor.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_processor.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -55,7 +55,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 #endif
 
 #include <sys/param.h>
@@ -154,7 +154,7 @@
 	i_ptr = ncode;
 	regs[0] = layer;
 
-	lcount = NPF_LOOP_LIMIT;	/* XXX */
+	lcount = NPF_LOOP_LIMIT;
 	cmpval = 0;
 
 	/* Note: offset = n_ptr - nbuf_dataptr(nbuf); */
@@ -304,10 +304,15 @@
 		i_ptr = nc_fetch_double(i_ptr, &n, &i);
 		cmpval = npf_match_udp_ports(npc, nbuf, n_ptr, n, i);
 		break;
+	case NPF_OPCODE_TCP_FLAGS:
+		/* TCP flags/mask. */
+		i_ptr = nc_fetch_word(i_ptr, &n);
+		cmpval = npf_match_tcpfl(npc, nbuf, n_ptr, n);
+		break;
 	case NPF_OPCODE_ICMP4:
-		/* ICMP type, code. */
-		i_ptr = nc_fetch_double(i_ptr, &n, &i);
-		cmpval = npf_match_icmp4(npc, nbuf, n_ptr, n, i);
+		/* ICMP type/code. */
+		i_ptr = nc_fetch_word(i_ptr, &n);
+		cmpval = npf_match_icmp4(npc, nbuf, n_ptr, n);
 		break;
 	default:
 		/* Invalid instruction. */
@@ -447,8 +452,11 @@
 	case NPF_OPCODE_UDP_PORTS:
 		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
 		break;
+	case NPF_OPCODE_TCP_FLAGS:
+		error = nc_ptr_check(&iptr, nc, sz, 1, NULL, 0);
+		break;
 	case NPF_OPCODE_ICMP4:
-		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		error = nc_ptr_check(&iptr, nc, sz, 1, NULL, 0);
 		break;
 	default:
 		/* Invalid instruction. */
--- a/sys/net/npf/npf_ruleset.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_ruleset.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ruleset.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -230,6 +230,7 @@
 	rl->r_nat = NULL;
 	return rl;
 }
+
 #if 0
 /*
  * npf_activate_rule: activate rule by inserting it into the global ruleset.
@@ -334,23 +335,19 @@
 }
 
 /*
- * npf_ruleset_match: inspect the packet against the ruleset.
+ * npf_ruleset_match: inspect the packet against the given ruleset.
  *
- * Loop for each rule in the set and perform run n-code processor of each
- * rule against the packet (nbuf chain).  If sub-ruleset found, inspect it.
- *
- * => If found, ruleset is kept read-locked.
- * => Caller should protect the nbuf chain.
+ * Loop for each rule in the set and run n-code processor of each rule
+ * against the packet (nbuf chain).
  */
 npf_rule_t *
-npf_ruleset_match(npf_ruleset_t *rlset0, npf_cache_t *npc, nbuf_t *nbuf,
+npf_ruleset_match(npf_ruleset_t *rlset, npf_cache_t *npc, nbuf_t *nbuf,
     struct ifnet *ifp, const int di, const int layer)
 {
 	npf_rule_t *final_rl = NULL, *rl;
-	npf_ruleset_t *rlset = rlset0;
 
 	KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));
-reinspect:
+
 	TAILQ_FOREACH(rl, &rlset->rs_queue, r_entry) {
 		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
 
@@ -374,38 +371,42 @@
 		/* Set the matching rule and check for "final". */
 		final_rl = rl;
 		if (rl->r_attr & NPF_RULE_FINAL) {
-			goto final;
+			break;
 		}
 	}
-	/* Default, if no final rule. */
-	if (final_rl == NULL) {
-		rlset = rlset0;
-		final_rl = rlset->rs_default;
-	}
-	/* Inspect the sub-ruleset, if any. */
-	if (final_rl) {
-final:
-		if (TAILQ_EMPTY(&final_rl->r_subset.rs_queue)) {
-			return final_rl;
-		}
-		rlset = &final_rl->r_subset;
-		final_rl = NULL;
-		goto reinspect;
-	}
 	return final_rl;
 }
 
 /*
  * npf_ruleset_inspect: inspection of the main ruleset for filtering.
+ * If sub-ruleset is found, inspect it.
+ *
+ * => If found, ruleset is kept read-locked.
+ * => Caller should protect the nbuf chain.
  */
 npf_rule_t *
 npf_ruleset_inspect(npf_cache_t *npc, nbuf_t *nbuf,
     struct ifnet *ifp, const int di, const int layer)
 {
+	npf_ruleset_t *rlset = ruleset;
 	npf_rule_t *rl;
+	bool defed;
+
+	defed = false;
+	rw_enter(&ruleset_lock, RW_READER);
+reinspect:
+	rl = npf_ruleset_match(rlset, npc, nbuf, ifp, di, layer);
 
-	rw_enter(&ruleset_lock, RW_READER);
-	rl = npf_ruleset_match(ruleset, npc, nbuf, ifp, di, layer);
+	/* If no final rule, then - default. */
+	if (rl == NULL && !defed) {
+		rl = ruleset->rs_default;
+		defed = true;
+	}
+	/* Inspect the sub-ruleset, if any. */
+	if (rl && !TAILQ_EMPTY(&rl->r_subset.rs_queue)) {
+		rlset = &rl->r_subset;
+		goto reinspect;
+	}
 	if (rl == NULL) {
 		rw_exit(&ruleset_lock);
 	}
@@ -419,7 +420,8 @@
  * => Releases the ruleset lock.
  */
 int
-npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl, bool *keepstate)
+npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl,
+    bool *keepstate, int *retfl)
 {
 	npf_hook_t *hk;
 
@@ -432,6 +434,8 @@
 
 	/* If not passing - drop the packet. */
 	if ((rl->r_attr & NPF_RULE_PASS) == 0) {
+		/* Determine whether any return message is needed. */
+		*retfl = rl->r_attr & (NPF_RULE_RETRST | NPF_RULE_RETICMP);
 		rw_exit(&ruleset_lock);
 		return ENETUNREACH;
 	}
@@ -455,12 +459,11 @@
 	uint32_t *op = rl->r_ncode;
 	size_t n = rl->r_nc_size;
 
-	do {
+	while (n) {
 		printf("\t> |0x%02x|\n", (uint32_t)*op);
 		op++;
 		n -= sizeof(*op);
-	} while (n);
-
+	}
 	printf("-> %s\n", (rl->r_attr & NPF_RULE_PASS) ? "pass" : "block");
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_sendpkt.c	Thu Sep 16 04:53:27 2010 +0000
@@ -0,0 +1,202 @@
+/*	$NetBSD: npf_sendpkt.c,v 1.1 2010/09/16 04:53:27 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF module for packet construction routines.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.1 2010/09/16 04:53:27 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#endif
+#include <sys/mbuf.h>
+
+#include "npf_impl.h"
+
+#define	DEFAULT_IP_TTL		(ip_defttl)
+
+/*
+ * npf_fetch_seqack: fetch TCP data length, SEQ and ACK numbers.
+ *
+ * NOTE: Returns in host byte-order.
+ */
+static inline bool
+npf_fetch_seqack(nbuf_t *nbuf, npf_cache_t *npc,
+    tcp_seq *seq, tcp_seq *ack, size_t *tcpdlen)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	u_int offby;
+	tcp_seq seqack[2];
+	uint16_t iplen;
+	uint8_t toff;
+
+	/* Fetch total length of IP. */
+	offby = offsetof(struct ip, ip_len);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t), &iplen))
+		return false;
+
+	/* Fetch SEQ and ACK numbers. */
+	offby = (npc->npc_hlen - offby) + offsetof(struct tcphdr, th_seq);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(seqack), seqack))
+		return false;
+
+	/* Fetch TCP data offset (header length) value. */
+	offby = sizeof(seqack);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &toff))
+		return false;
+	toff >>= 4;
+
+	*seq = ntohl(seqack[0]);
+	*ack = ntohl(seqack[1]);
+	*tcpdlen = ntohs(iplen) - npc->npc_hlen - (toff << 2);
+	return true;
+}
+
+/*
+ * npf_return_tcp: return a TCP reset (RST) packet.
+ */
+static int
+npf_return_tcp(npf_cache_t *npc, nbuf_t *nbuf)
+{
+	struct mbuf *m;
+	struct ip *ip;
+	struct tcphdr *th;
+	tcp_seq seq, ack;
+	size_t tcpdlen, len;
+
+	/* Fetch relevant data. */
+	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS | NPC_PORTS) ||
+	    !npf_fetch_seqack(nbuf, npc, &seq, &ack, &tcpdlen)) {
+		return EBADMSG;
+	}
+	if (npc->npc_tcp_flags & TH_RST) {
+		return 0;
+	}
+
+	/* Create and setup a network buffer. */
+	len = sizeof(struct ip) + sizeof(struct tcphdr);
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	if (m == NULL) {
+		return ENOMEM;
+	}
+	m->m_data += max_linkhdr;
+	m->m_len = len;
+	m->m_pkthdr.len = len;
+
+	ip = mtod(m, struct ip *);
+	memset(ip, 0, len);
+
+	/*
+	 * First fill of IPv4 header, for TCP checksum.
+	 * Note: IP length contains TCP header length.
+	 */
+	ip->ip_p = IPPROTO_TCP;
+	ip->ip_src.s_addr = npc->npc_dstip;
+	ip->ip_dst.s_addr = npc->npc_srcip;
+	ip->ip_len = htons(sizeof(struct tcphdr));
+
+	/* Construct TCP header and compute the checksum. */
+	th = (struct tcphdr *)(ip + 1);
+	th->th_sport = npc->npc_dport;
+	th->th_dport = npc->npc_sport;
+	th->th_seq = htonl(ack);
+	if (npc->npc_tcp_flags & TH_SYN) {
+		tcpdlen++;
+	}
+	th->th_ack = htonl(seq + tcpdlen);
+	th->th_off = sizeof(struct tcphdr) >> 2;
+	th->th_flags = TH_ACK | TH_RST;
+	th->th_sum = in_cksum(m, len);
+
+	/* Second fill of IPv4 header, fill correct IP length. */
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = sizeof(struct ip) >> 2;
+	ip->ip_tos = IPTOS_LOWDELAY;
+	ip->ip_len = htons(len);
+	ip->ip_off = htons(IP_DF);
+	ip->ip_ttl = DEFAULT_IP_TTL;
+
+	/* Pass to IP layer. */
+	return ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+}
+
+/*
+ * npf_return_icmp: return an ICMP error.
+ */
+static int
+npf_return_icmp(nbuf_t *nbuf)
+{
+	struct mbuf *m = nbuf;
+
+	icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT, 0, 0);
+	return 0;
+}
+
+/*
+ * npf_return_block: return TCP reset or ICMP host unreachable packet.
+ */
+void
+npf_return_block(npf_cache_t *npc, nbuf_t *nbuf, const int retfl)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	const int proto = npc->npc_proto;
+
+	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
+		return;
+	if ((proto == IPPROTO_TCP && (retfl & NPF_RULE_RETRST) == 0) ||
+	    (proto == IPPROTO_UDP && (retfl & NPF_RULE_RETICMP) == 0)) {
+		return;
+	}
+	switch (proto) {
+	case IPPROTO_TCP:
+		(void)npf_return_tcp(npc, nbuf);
+		break;
+	case IPPROTO_UDP:
+		(void)npf_return_icmp(nbuf);
+		break;
+	}
+}
--- a/sys/net/npf/npf_session.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/sys/net/npf/npf_session.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_session.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,6 +34,11 @@
  *
  * Overview
  *
+ *	Session direction is identified by the direction of its first packet.
+ *	Packets can be incoming or outgoing with respect to an interface.
+ *	To describe the packet in the context of session direction, we will
+ *	use the terms "forwards stream" and "backwards stream".
+ *
  *	There are two types of sessions: "pass" and "NAT".  The former are
  *	sessions created according to the rules with "keep state" attribute
  *	and are used for stateful filtering.  Such sessions indicate that
@@ -80,7 +85,7 @@
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -103,10 +108,6 @@
 
 #include "npf_impl.h"
 
-#define	NPF_SESSION_TCP		1
-#define	NPF_SESSION_UDP		2
-#define	NPF_SESSION_ICMP	3
-
 struct npf_session {
 	/* Session node / list entry and reference count. */
 	union {
@@ -117,7 +118,8 @@
 	/* Session type.  Supported: TCP, UDP, ICMP. */
 	int				s_type;
 	int				s_direction;
-	int				s_state;
+	uint16_t			s_state;
+	uint16_t			s_flags;
 	/* NAT data associated with this session (if any). */
 	npf_nat_t *			s_nat;
 	npf_session_t *			s_nat_se;
@@ -152,12 +154,11 @@
 	u_int				sh_count;
 } npf_sess_hash_t;
 
-/* XXX: give a separate cache-line to these. */
-static int				sess_tracking;
+static int				sess_tracking	__cacheline_aligned;
 
 /* Session hash table, lock and session cache. */
-static npf_sess_hash_t *		sess_hashtbl;
-static pool_cache_t			sess_cache;
+static npf_sess_hash_t *		sess_hashtbl	__read_mostly;
+static pool_cache_t			sess_cache	__read_mostly;
 
 static kmutex_t				sess_lock;
 static kcondvar_t			sess_cv;
@@ -167,20 +168,23 @@
 
 /* Session expiration table.  XXX: TCP close: 2 * tcp_msl (e.g. 120)?  Maybe. */
 static const u_int sess_expire_table[ ] = {
-	[NPF_SESSION_TCP]		= 600,		/* 10 min */
-	[NPF_SESSION_UDP]		= 300,		/*  5 min */
-	[NPF_SESSION_ICMP]		= 30		/*  1 min */
+	[IPPROTO_TCP]		= 600,		/* 10 min */
+	[IPPROTO_UDP]		= 300,		/*  5 min */
+	[IPPROTO_ICMP]		= 30		/*  1 min */
 };
 
+/* Session states and flags. */
 #define	SE_OPENING		1
-#define	SE_OPENING2		2
+#define	SE_ACKNOWLEDGE		2
 #define	SE_ESTABLISHED		3
 #define	SE_CLOSING		4
 
+#define	SE_PASSSING		0x01
+
 static void	sess_tracking_stop(void);
 static void	npf_session_worker(void *);
 
-#ifdef DEBUG
+#ifdef SE_DEBUG
 #define	DPRINTF(x)	printf x
 #else
 #define	DPRINTF(x)
@@ -221,35 +225,54 @@
 
 /*
  * Session hash table and RB-tree helper routines.
- * Order: (node1, node2) where (node1 < node2).
+ * Order: (src.id, dst.id, src.addr, dst.addr), where (node1 < node2).
  */
 
 static signed int
 sess_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2)
 {
-	const npf_session_t *se1 = NPF_RBN2SESENT(n1);
-	const npf_session_t *se2 = NPF_RBN2SESENT(n2);
+	const npf_session_t * const se1 = NPF_RBN2SESENT(n1);
+	const npf_session_t * const se2 = NPF_RBN2SESENT(n2);
 
-	if (se1->s_src.id < se2->s_src.id || se1->s_dst.id < se2->s_dst.id)
-		return 1;
-	if (se1->s_src.id > se2->s_src.id || se1->s_dst.id > se2->s_dst.id)
-		return -1;
+	/*
+	 * Note: must compare equivalent streams.
+	 * See sess_rbtree_cmp_key() below.
+	 */
+	if (se1->s_direction == se2->s_direction) {
+		/*
+		 * Direction "forwards".
+		 */
+		if (se1->s_src.id != se2->s_src.id)
+			return (se1->s_src.id < se2->s_src.id) ? -1 : 1;
+		if (se1->s_dst.id != se2->s_dst.id)
+			return (se1->s_dst.id < se2->s_dst.id) ? -1 : 1;
 
-	if (se1->s_src_addr < se2->s_src_addr ||
-	    se1->s_dst_addr < se2->s_dst_addr)
-		return -1;
-	if (se1->s_src_addr > se2->s_src_addr ||
-	    se1->s_dst_addr > se2->s_dst_addr)
-		return 1;
+		if (__predict_false(se1->s_src_addr != se2->s_src_addr))
+			return (se1->s_src_addr < se2->s_src_addr) ? -1 : 1;
+		if (__predict_false(se1->s_dst_addr != se2->s_dst_addr))
+			return (se1->s_dst_addr < se2->s_dst_addr) ? -1 : 1;
+	} else {
+		/*
+		 * Direction "backwards".
+		 */
+		if (se1->s_src.id != se2->s_dst.id)
+			return (se1->s_src.id < se2->s_dst.id) ? -1 : 1;
+		if (se1->s_dst.id != se2->s_src.id)
+			return (se1->s_dst.id < se2->s_src.id) ? -1 : 1;
 
+		if (__predict_false(se1->s_src_addr != se2->s_dst_addr))
+			return (se1->s_src_addr < se2->s_dst_addr) ? -1 : 1;
+		if (__predict_false(se1->s_dst_addr != se2->s_src_addr))
+			return (se1->s_dst_addr < se2->s_src_addr) ? -1 : 1;
+	}
 	return 0;
 }
 
 static signed int
 sess_rbtree_cmp_key(const struct rb_node *n1, const void *key)
 {
-	const npf_session_t *se = NPF_RBN2SESENT(n1);
-	const npf_cache_t *npc = key;
+	const npf_session_t * const se = NPF_RBN2SESENT(n1);
+	const npf_cache_t * const npc = key;
 	in_port_t sport, dport;
 	in_addr_t src, dst;
 
@@ -264,16 +287,17 @@
 	}
 
 	/* Ports are the main criteria and are first. */
-	if (se->s_src.id < sport || se->s_dst.id < dport)
-		return 1;
-	if (se->s_src.id > sport || se->s_dst.id > dport)
-		return -1;
+	if (se->s_src.id != sport)
+		return (se->s_src.id < sport) ? -1 : 1;
+
+	if (se->s_dst.id != dport)
+		return (se->s_dst.id < dport) ? -1 : 1;
 
 	/* Note that hash should minimise differentiation on these. */
-	if (__predict_false(se->s_src_addr < src || se->s_dst_addr < dst))
-		return 1;
-	if (__predict_false(se->s_src_addr > src || se->s_dst_addr > dst))
-		return -1;
+	if (__predict_false(se->s_src_addr != src))
+		return (se->s_src_addr < src) ? -1 : 1;
+	if (__predict_false(se->s_dst_addr < dst))
+		return (se->s_dst_addr < dst) ? -1 : 1;
 
 	return 0;
 }
@@ -296,6 +320,25 @@
 	return &sess_hashtbl[hash & SESS_HASH_MASK];
 }
 
+static npf_sess_hash_t *
+sess_hash_construct(void)
+{
+	npf_sess_hash_t *ht, *sh;
+	u_int i;
+
+	ht = kmem_alloc(SESS_HASH_BUCKETS * sizeof(*sh), KM_SLEEP);
+	if (ht == NULL) {
+		return NULL;
+	}
+	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
+		sh = &ht[i];
+		rb_tree_init(&sh->sh_tree, &sess_rbtree_ops);
+		rw_init(&sh->sh_lock);
+		sh->sh_count = 0;
+	}
+	return ht;
+}
+
 /*
  * Session tracking routines.  Note: manages tracking structures.
  */
@@ -303,27 +346,18 @@
 static int
 sess_tracking_start(void)
 {
-	npf_sess_hash_t *sh;
-	u_int i;
 
 	sess_cache = pool_cache_init(sizeof(npf_session_t), coherency_unit,
 	    0, 0, "npfsespl", NULL, IPL_NET, NULL, NULL, NULL);
 	if (sess_cache == NULL)
 		return ENOMEM;
 
-	sess_hashtbl = kmem_alloc(SESS_HASH_BUCKETS * sizeof(*sh), KM_SLEEP);
+	sess_hashtbl = sess_hash_construct();
 	if (sess_hashtbl == NULL) {
 		pool_cache_destroy(sess_cache);
 		return ENOMEM;
 	}
 
-	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
-		sh = &sess_hashtbl[i];
-		rb_tree_init(&sh->sh_tree, &sess_rbtree_ops);
-		rw_init(&sh->sh_lock);
-		sh->sh_count = 0;
-	}
-
 	/* Make it visible before thread start. */
 	sess_tracking = 1;
 
@@ -421,14 +455,14 @@
 			return true;
 		}
 		/* ACK seen after SYN-ACK: session fully established. */
-		if (se->s_state == SE_OPENING2 && !backwards) {
+		if (se->s_state == SE_ACKNOWLEDGE && !backwards) {
 			se->s_state = SE_ESTABLISHED;
 		}
 		break;
 	case TH_SYN | TH_ACK:
 		/* SYN-ACK seen, wait for ACK. */
 		if (se->s_state == SE_OPENING && backwards) {
-			se->s_state = SE_OPENING2;
+			se->s_state = SE_ACKNOWLEDGE;
 		}
 		break;
 	case TH_RST:
@@ -447,14 +481,14 @@
  */
 npf_session_t *
 npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
-    struct ifnet *ifp, const int di, const int layer)
+    struct ifnet *ifp, const int di)
 {
 	npf_sess_hash_t *sh;
 	struct rb_node *nd;
 	npf_session_t *se;
 
 	/* Attempt to fetch and cache all relevant IPv4 data. */
-	if (!sess_tracking || !npf_cache_all_ip4(npc, nbuf, layer)) {
+	if (!sess_tracking || !npf_cache_all(npc, nbuf)) {
 		return NULL;
 	}
 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
@@ -529,6 +563,7 @@
 	/* Reference count and direction. */
 	se->s_refcnt = 1;
 	se->s_direction = di;
+	se->s_flags = 0;
 
 	/* NAT and backwards session. */
 	se->s_nat = nt;
@@ -539,12 +574,13 @@
 	se->s_src_addr = npc->npc_srcip;
 	se->s_dst_addr = npc->npc_dstip;
 
+	/* Procotol. */
+	se->s_type = npc->npc_proto;
+
 	switch (npc->npc_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		KASSERT(npf_iscached(npc, NPC_PORTS));
-		se->s_type = (npc->npc_proto == IPPROTO_TCP) ?
-		    NPF_SESSION_TCP : NPF_SESSION_UDP;
 		/* Additional IDs: ports. */
 		se->s_src.id = npc->npc_sport;
 		se->s_dst.id = npc->npc_dport;
@@ -552,7 +588,6 @@
 	case IPPROTO_ICMP:
 		if (npf_iscached(npc, NPC_ICMP_ID)) {
 			/* ICMP query ID. (XXX) */
-			se->s_type = NPF_SESSION_ICMP;
 			se->s_src.id = npc->npc_icmp_id;
 			se->s_dst.id = npc->npc_icmp_id;
 			break;
@@ -595,7 +630,23 @@
 {
 
 	KASSERT(se->s_refcnt > 0);
-	return true;	/* FIXME */
+	return (se->s_flags & SE_PASSSING) != 0;
+}
+
+/*
+ * npf_session_setpass: mark session as a "pass" one, also mark the
+ * linked session if there is one.
+ */
+void
+npf_session_setpass(npf_session_t *se)
+{
+
+	KASSERT(se->s_refcnt > 0);
+	se->s_flags |= SE_PASSSING;		/* XXXSMP */
+	if (se->s_nat_se) {
+		se = se->s_nat_se;
+		se->s_flags |= SE_PASSSING;	/* XXXSMP */
+	}
 }
 
 /*
@@ -611,36 +662,37 @@
 }
 
 /*
- * npf_session_retnat: return associated NAT data, if any.
+ * npf_session_link: create a link between regular and NAT sessions.
+ * Note: NAT session inherits the flags, including "pass" bit.
  */
-npf_nat_t *
-npf_session_retnat(const npf_session_t *se)
-{
-
-	KASSERT(se->s_refcnt > 0);
-	return se->s_nat;
-}
-
 void
 npf_session_link(npf_session_t *se, npf_session_t *natse)
 {
 
-	/* Hold a reference on a session we link. */
+	/* Hold a reference on the session we link.  Inherit the flags. */
 	KASSERT(se->s_refcnt > 0 && natse->s_refcnt > 0);
 	atomic_inc_uint(&natse->s_refcnt);
+	natse->s_flags = se->s_flags;
+
+	KASSERT(se->s_nat_se == NULL);
 	se->s_nat_se = natse;
 }
 
+/*
+ * npf_session_retnat: return associated NAT data entry and indicate
+ * whether it is a "forwards" or "backwards" stream.
+ */
 npf_nat_t *
-npf_session_retlinknat(const npf_session_t *se)
+npf_session_retnat(npf_session_t *se, const int di, bool *forw)
 {
-	npf_session_t *natse = se->s_nat_se;
 
 	KASSERT(se->s_refcnt > 0);
-	KASSERT(natse == NULL || natse->s_refcnt > 0);
-
-	/* If there is a link, we hold a reference on it. */
-	return natse ? natse->s_nat : NULL;
+	*forw = (se->s_direction == di);
+	if (se->s_nat_se) {
+		se = se->s_nat_se;
+		KASSERT(se->s_refcnt > 0);
+	}
+	return se->s_nat;
 }
 
 /*
@@ -657,7 +709,7 @@
 		etime = sess_expire_table[se->s_type];
 		break;
 	case SE_OPENING:
-	case SE_OPENING2:
+	case SE_ACKNOWLEDGE:
 	case SE_CLOSING:
 		etime = 10;	/* XXX: figure out reasonable time */
 		break;
@@ -821,9 +873,9 @@
 			etime = (se->s_state == SE_ESTABLISHED) ?
 			    sess_expire_table[se->s_type] : 10;
 
-			printf("\t%p: type(%d) di = %d, tsdiff = %d, "
-			    "etime = %d\n", se, se->s_type, se->s_direction,
-			    (int)tsdiff.tv_sec, etime);
+			printf("\t%p: type(%d) di %d, pass %d, tsdiff %d, "
+			    "etime %d\n", se, se->s_type, se->s_direction,
+			    se->s_flags, (int)tsdiff.tv_sec, etime);
 			ip.s_addr = se->s_src_addr;
 			printf("\tsrc (%s, %d) ",
 			    inet_ntoa(ip), ntohs(se->s_src.port));
--- a/usr.sbin/npf/npfctl/npf.conf.5	Thu Sep 16 02:38:50 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf.conf.5	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-.\"	$NetBSD: npf.conf.5,v 1.1 2010/08/24 23:55:05 rmind Exp $
+.\"	$NetBSD: npf.conf.5,v 1.2 2010/09/16 04:53:27 rmind Exp $
 .\"
 .\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd August 24, 2010
+.Dd September 16, 2010
 .Dt NPF.CONF 5
 .Os
 .Sh NAME
@@ -95,27 +95,31 @@
 .Bd -literal
 line		= ( def | table | nat | group )
 
-def		= ( "{ a, b, ... }" | "text" | "$\*[Lt]interface\*[Gt]" )
+def		= ( \*[Lt]name\*[Gt] "=" "{ a, b, ... }" | "text" | "$\*[Lt]interface\*[Gt]" )
 iface		= ( \*[Lt]interface\*[Gt] | def )
 
 table		= "table" \*[Lt]tid\*[Gt] "type" ( "hash" | "tree" )
 		  ( "dynamic" | "file" \*[Lt]path\*[Gt] )
 
-nat		= "nat" iface "from" \*[Lt]addr/mask\*[Gt] "to" \*[Lt]addr/mask\*[Gt] "->" \*[Lt]addr\*[Gt]
+nat		= "nat" iface filt-opts "->" \*[Lt]addr\*[Gt]
+binat		= "binat" iface filt-opts "->" \*[Lt]addr\*[Gt]
+rdr		= "rdr" iface filt-opts "->" \*[Lt]addr\*[Gt] port-opts
 
 group		= "group" "(" ( "default" | group-opts ) "") ruleset
 group-opts	= "interface" iface "," [ "in" | "out" ]
 
 ruleset		= "{" rule1 \*[Lt]newline\*[Gt], rule2 \*[Lt]newline\*[Gt], ... "}"
 
-rule		= ( "block" | "pass" ) [ "in" | out" ] rule-opts
+rule		= ( "block" block-opts | "pass" ) [ "in" | out" ] rule-opts
 		  [ "on" iface ] [ "inet" | "inet6" ] [ "proto" \*[Lt]protocol\*[Gt] ]
-		  ( "all" | filt-opts )
+		  ( "all" | filt-opts [ "flags" \*[Lt]tcp_flags> \*[Gt] )
 
+block-opts	= [ "return-rst" | "return-icmp" | "return" ]
 rule-opts	= [ "log" ] [ "count" ] [ "quick" ]
 filt-opts	= [ "from" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
 		  [ "to" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
 port-opts	= [ "port" ( \*[Lt]port-num\*[Gt] | \*[Lt]port-from\*[Gt] ":" \*[Lt]port-to\*[Gt] | def ) ]
+proto-opts	= [ "flags" \*[Lt]tcp_flags\*[Gt] | "icmp-type" \*[Lt]type\*[Gt] "code" \*[Lt]code\*[Gt] ]
 .Ed
 .\" -----
 .Sh FILES
--- a/usr.sbin/npf/npfctl/npf_data.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_data.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_data.c,v 1.2 2010/08/23 06:01:04 jnemeth Exp $	*/
+/*	$NetBSD: npf_data.c,v 1.3 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -36,6 +36,7 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <net/if.h>
+#include <netinet/tcp.h>
 
 #include <arpa/inet.h>
 #include <prop/proplib.h>
@@ -74,7 +75,7 @@
 	prop_dictionary_set(npf_dict, "version", ver);
 
 	nat_arr = prop_array_create();
-	prop_dictionary_set(npf_dict, "nat", nat_arr);
+	prop_dictionary_set(npf_dict, "translation", nat_arr);
 
 	settings_dict = prop_dictionary_create();
 	prop_dictionary_set(npf_dict, "settings", settings_dict);
@@ -92,7 +93,7 @@
 	int ret = 0, errval;
 
 #ifdef DEBUG
-	prop_dictionary_externalize_to_file(npf_dict, "/tmp/npf.plist");
+	prop_dictionary_externalize_to_file(npf_dict, "./npf.plist");
 #else
 	errval = prop_dictionary_send_ioctl(npf_dict, fd, IOC_NPF_RELOAD);
 	if (errval) {
@@ -108,8 +109,9 @@
  * Helper routines:
  *
  *	npfctl_getif() - get interface addresses and index number from name.
- *	npfctl_servname2port() - get service ports from name.
  *	npfctl_parse_v4mask() - parse address/mask integers from CIDR block.
+ *	npfctl_parse_port() - parse port number (which may be a service name).
+ *	npfctl_parse_tcpfl() - parse TCP flags.
  */
 
 static struct ifaddrs *
@@ -132,20 +134,13 @@
 	return ifent;
 }
 
-static int
-npfctl_servname2port(char *name)
+bool
+npfctl_parse_v4mask(char *ostr, in_addr_t *addr, in_addr_t *mask)
 {
-	struct servent *se;
-
-	se = getservbyname(name, NULL);
-	return se ? se->s_port : -1;
-}
-
-bool
-npfctl_parse_v4mask(char *str, in_addr_t *addr, in_addr_t *mask)
-{
+	char *str = xstrdup(ostr);
 	char *p = strchr(str, '/');
 	u_int bits;
+	bool ret;
 
 	/* In network byte order. */
 	if (p) {
@@ -155,7 +150,37 @@
 	} else {
 		*mask = 0xffffffff;
 	}
-	return inet_aton(str, (struct in_addr *)addr) != 0;
+	ret = inet_aton(str, (struct in_addr *)addr) != 0;
+	free(str);
+	return ret;
+}
+
+static bool
+npfctl_parse_port(char *ostr, bool *range, in_port_t *fport, in_port_t *tport)
+{
+	char *str = xstrdup(ostr), *sep;
+
+	*range = false;
+	if ((sep = strchr(str, ':')) != NULL) {
+		/* Port range (only numeric). */
+		*range = true;
+		*sep = '\0';
+
+	} else if (isalpha((unsigned char)*str)) {
+		struct servent *se;
+
+		se = getservbyname(str, NULL);
+		if (se == NULL) {
+			free(str);
+			return false;
+		}
+		*fport = se->s_port;
+	} else {
+		*fport = htons(atoi(str));
+	}
+	*tport = sep ? htons(atoi(sep + 1)) : *fport;
+	free(str);
+	return true;
 }
 
 static void
@@ -180,6 +205,40 @@
 	}
 }
 
+static bool
+npfctl_parse_tcpfl(char *s, uint8_t *tfl, uint8_t *tfl_mask)
+{
+	uint8_t tcpfl = 0;
+	bool mask = false;
+
+	while (*s) {
+		switch (*s) {
+		case 'F': tcpfl |= TH_FIN; break;
+		case 'S': tcpfl |= TH_SYN; break;
+		case 'R': tcpfl |= TH_RST; break;
+		case 'P': tcpfl |= TH_PUSH; break;
+		case 'A': tcpfl |= TH_ACK; break;
+		case 'U': tcpfl |= TH_URG; break;
+		case 'E': tcpfl |= TH_ECE; break;
+		case 'W': tcpfl |= TH_CWR; break;
+		case '/':
+			*s = '\0';
+			*tfl = tcpfl;
+			tcpfl = 0;
+			mask = true;
+			break;
+		default:
+			return false;
+		}
+		s++;
+	}
+	if (!mask) {
+		*tfl = tcpfl;
+	}
+	*tfl_mask = tcpfl;
+	return true;
+}
+
 /*
  * NPF table creation and construction routines.
  */
@@ -390,27 +449,15 @@
 
 	/* Generate TCP/UDP port matching blocks. */
 	for (el = dat->v_elements; el != NULL; el = el->e_next) {
-		int pfrom, pto;
-		char *sep;
+		in_port_t fport, tport;
+		bool range;
 
-		if ((sep = strchr(el->e_data, ':')) != NULL) {
-			/* Port range (only numeric). */
-			*sep = '\0';
+		if (!npfctl_parse_port(el->e_data, &range, &fport, &tport)) {
+			errx(EXIT_FAILURE, "invalid service '%s'", el->e_data);
 		}
-		if (isalpha((unsigned char)*el->e_data)) {
-			pfrom = npfctl_servname2port(el->e_data);
-			if (pfrom == -1) {
-				errx(EXIT_FAILURE, "invalid service '%s'",
-				    el->e_data);
-			}
-		} else {
-			pfrom = htons(atoi(el->e_data));
-		}
-		pto = sep ? htons(atoi(sep + 1)) : pfrom;
-
 		nblocks[0]--;
 		foff = npfctl_failure_offset(nblocks);
-		npfctl_gennc_ports(nc, foff, pfrom, pto, tcpudp, sd);
+		npfctl_gennc_ports(nc, foff, fport, tport, tcpudp, sd);
 	}
 }
 
@@ -431,12 +478,13 @@
 }
 
 void
-npfctl_rule_protodata(prop_dictionary_t rl, char *proto, var_t *from,
-    var_t *fports, var_t *to, var_t *tports)
+npfctl_rule_protodata(prop_dictionary_t rl, char *proto, char *tcp_flags,
+    int icmp_type, int icmp_code,
+    var_t *from, var_t *fports, var_t *to, var_t *tports)
 {
 	prop_data_t ncdata;
 	bool icmp, tcpudp, both;
-	int nblocks[2] = { 0, 0 };
+	int foff, nblocks[3] = { 0, 0, 0 };
 	void *ncptr, *nc;
 	size_t sz;
 
@@ -455,7 +503,6 @@
 		fports = NULL;
 		tports = NULL;
 		icmp = true;
-		nblocks[0] += 1;
 
 	} else if (strcmp(proto, "tcp") == 0) {
 		/* Just TCP. */
@@ -469,6 +516,15 @@
 		/* Default. */
 	}
 skip_proto:
+	if (icmp_type != -1) {
+		assert(tcp_flags == NULL);
+		icmp = true;
+		nblocks[2] += 1;
+	}
+	if (tcpudp && tcp_flags) {
+		assert(icmp_type == -1 && icmp_code == -1);
+		nblocks[2] += 1;
+	}
 
 	/* Calculate how blocks to determince n-code. */
 	if (from && from->v_count) {
@@ -488,6 +544,12 @@
 			nblocks[0] += tports->v_count * (both ? 2 : 1);
 	}
 
+	/* Any n-code to generate? */
+	if ((nblocks[0] + nblocks[1] + nblocks[2]) == 0) {
+		/* Done, if none. */
+		return;
+	}
+
 	/* Allocate memory for the n-code. */
 	sz = npfctl_calc_ncsize(nblocks);
 	ncptr = malloc(sz);
@@ -497,10 +559,9 @@
 	}
 	nc = ncptr;
 
-	/* Ethernet fragment (ETHERTYPE_IP), XXX. */
-	npfctl_gennc_ether(&nc, npfctl_failure_offset(nblocks), htons(0x0800));
-
-	/* Generate v4 CIDR matching blocks and TCP/UDP port matching. */
+	/*
+	 * Generate v4 CIDR matching blocks and TCP/UDP port matching.
+	 */
 	if (from) {
 		npfctl_rulenc_block(&nc, nblocks, from, fports,
 		    both, tcpudp, true);
@@ -509,16 +570,34 @@
 		npfctl_rulenc_block(&nc, nblocks, to, tports,
 		    both, tcpudp, false);
 	}
-	/* ICMP case. */
+
 	if (icmp) {
-		const int foff = npfctl_failure_offset(nblocks);
-		npfctl_gennc_icmp(&nc, foff, -1, -1);
+		/*
+		 * ICMP case.
+		 */
+		nblocks[2]--;
+		foff = npfctl_failure_offset(nblocks);
+		npfctl_gennc_icmp(&nc, foff, icmp_type, icmp_code);
+
+	} else if (tcpudp && tcp_flags) {
+		/*
+		 * TCP case, flags.
+		 */
+		uint8_t tfl = 0, tfl_mask;
+
+		nblocks[2]--;
+		foff = npfctl_failure_offset(nblocks);
+		if (!npfctl_parse_tcpfl(tcp_flags, &tfl, &tfl_mask)) {
+			errx(EXIT_FAILURE, "invalid TCP flags '%s'", tcp_flags);
+		}
+		npfctl_gennc_tcpfl(&nc, foff, tfl, tfl_mask);
 	}
 	npfctl_gennc_complete(&nc);
 
-	if ((uintptr_t)nc - (uintptr_t)ncptr != sz)
+	if ((uintptr_t)nc - (uintptr_t)ncptr != sz) {
 		errx(EXIT_FAILURE, "n-code size got wrong (%tu != %zu)",
 		    (uintptr_t)nc - (uintptr_t)ncptr, sz);
+	}
 
 #ifdef DEBUG
 	uint32_t *op = ncptr;
@@ -565,15 +644,39 @@
 }
 
 void
-npfctl_nat_setup(prop_dictionary_t rl, char *iface, char *gwip)
+npfctl_nat_setup(prop_dictionary_t rl, int type, int flags,
+    char *iface, char *taddr, char *rport)
 {
-	const int attr = NPF_RULE_PASS | NPF_RULE_OUT | NPF_RULE_FINAL;
+	int attr = NPF_RULE_PASS | NPF_RULE_FINAL;
 	in_addr_t addr, mask;
 
+	/* Translation type and flags. */
+	prop_dictionary_set(rl, "type",
+	    prop_number_create_integer(type));
+	prop_dictionary_set(rl, "flags",
+	    prop_number_create_integer(flags));
+
 	/* Interface and attributes. */
+	attr |= (type == NPF_NATOUT) ? NPF_RULE_OUT : NPF_RULE_IN;
 	npfctl_rule_setattr(rl, attr, iface);
 
-	/* Gateway IP, XXX should be no mask. */
-	npfctl_parse_cidr(gwip, &addr, &mask);
-	prop_dictionary_set(rl, "gateway_ip", prop_number_create_integer(addr));
+	/* Translation IP, XXX should be no mask. */
+	npfctl_parse_cidr(taddr, &addr, &mask);
+	prop_dictionary_set(rl, "translation_ip",
+	    prop_number_create_integer(addr));
+
+	/* Translation port (for redirect case). */
+	if (rport) {
+		in_port_t port;
+		bool range;
+
+		if (!npfctl_parse_port(rport, &range, &port, &port)) {
+			errx(EXIT_FAILURE, "invalid service '%s'", rport);
+		}
+		if (range) {
+			errx(EXIT_FAILURE, "range is not supported for 'rdr'");
+		}
+		prop_dictionary_set(rl, "translation_port",
+		    prop_number_create_integer(port));
+	}
 }
--- a/usr.sbin/npf/npfctl/npf_ncgen.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_ncgen.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ncgen.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+/*	$NetBSD: npf_ncgen.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -48,14 +48,14 @@
 {
 	/*
 	 * Blocks:
-	 * - 13 words by npfctl_gennc_ether(), single initial block.
 	 * - 5 words each by npfctl_gennc_ports/tbl(), stored in nblocks[0].
 	 * - 6 words each by npfctl_gennc_v4cidr(), stored in nblocks[1].
+	 * - 4 words by npfctl_gennc_{icmp,tcpfl}(), stored in nblocks[2].
 	 * - 4 words by npfctl_gennc_complete(), single last fragment.
 	 */
 	return nblocks[0] * 5 * sizeof(uint32_t) +
 	    nblocks[1] * 6 * sizeof(uint32_t) +
-	    13 * sizeof(uint32_t) +
+	    nblocks[2] * 4 * sizeof(uint32_t) +
 	    4 * sizeof(uint32_t);
 }
 
@@ -65,16 +65,19 @@
 size_t
 npfctl_failure_offset(int nblocks[])
 {
-	size_t tblport_blocks, v4cidr_blocks;
+	size_t tblport_blocks, v4cidr_blocks, icmp_tcpfl;
 	/*
 	 * Take into account all blocks (plus 2 words for comparison each),
 	 * and additional 4 words to skip the last comparison and success path.
 	 */
 	tblport_blocks = (3 + 2) * nblocks[0];
 	v4cidr_blocks = (4 + 2) * nblocks[1];
-	return tblport_blocks + v4cidr_blocks + 4;
+	icmp_tcpfl = (2 + 2) * nblocks[2];
+	return tblport_blocks + v4cidr_blocks + icmp_tcpfl + 4;
 }
 
+#if 0
+
 /*
  * npfctl_gennc_ether: initial n-code fragment to check Ethernet frame.
  */
@@ -109,6 +112,8 @@
 	*ncptr = (void *)nc;
 }
 
+#endif
+
 /*
  * npfctl_gennc_v4cidr: fragment to match IPv4 CIDR.
  */
@@ -155,23 +160,23 @@
 }
 
 /*
- * npfctl_gennc_icmp: fragment to match ICMP code and type.
+ * npfctl_gennc_icmp: fragment to match ICMP type and code.
  */
 void
-npfctl_gennc_icmp(void **ncptr, int foff, int code, int type)
+npfctl_gennc_icmp(void **ncptr, int foff, int type, int code)
 {
 	uint32_t *nc = *ncptr;
 
-	/* OP, code, type (3 words) */
+	/* OP, code, type (2 words) */
 	*nc++ = NPF_OPCODE_ICMP4;
-	*nc++ = code;
-	*nc++ = type;
+	*nc++ = (type == -1 ? 0 : (1 << 31) & (type & 0xff << 8)) |
+		(code == -1 ? 0 : (1 << 31) & (code & 0xff));
 
 	/* If not equal, jump to failure block, continue otherwise (2 words). */
 	*nc++ = NPF_OPCODE_BNE;
 	*nc++ = foff;
 
-	/* + 5 words. */
+	/* + 4 words. */
 	*ncptr = (void *)nc;
 }
 
@@ -198,6 +203,26 @@
 }
 
 /*
+ * npfctl_gennc_tcpfl: fragment to match TCP flags/mask.
+ */
+void
+npfctl_gennc_tcpfl(void **ncptr, int foff, uint8_t tf, uint8_t tf_mask)
+{
+	uint32_t *nc = *ncptr;
+
+	/* OP, code, type (2 words) */
+	*nc++ = NPF_OPCODE_TCP_FLAGS;
+	*nc++ = (tf << 8) | tf_mask;
+
+	/* If not equal, jump to failure block, continue otherwise (2 words). */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff;
+
+	/* + 4 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
  * npfctl_gennc_complete: append success and failure fragments.
  */
 void
--- a/usr.sbin/npf/npfctl/npf_parser.c	Thu Sep 16 02:38:50 2010 +0000
+++ b/usr.sbin/npf/npfctl/npf_parser.c	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_parser.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+/*	$NetBSD: npf_parser.c,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -154,8 +154,10 @@
 {
 	var_t *from_cidr = NULL, *fports = NULL;
 	var_t *to_cidr = NULL, *tports = NULL;
-	char *proto = NULL;
+	char *proto = NULL, *tcp_flags = NULL;
 	char *p, *sptr, *iface;
+	bool icmp = false, tcp = false;
+	int icmp_type = -1, icmp_code = -1;
 	int ret, attr = 0;
 
 	DPRINTF(("rule\t|%s|\n", buf));
@@ -166,12 +168,24 @@
 	/* pass or block (mandatory) */
 	if (strcmp(p, "block") == 0) {
 		attr = 0;
+		PARSE_NEXT_TOKEN();
+		/* return-rst or return-icmp */
+		if (strcmp(p, "return-rst") == 0) {
+			attr |= NPF_RULE_RETRST;
+			PARSE_NEXT_TOKEN();
+		} else if (strcmp(p, "return-icmp") == 0) {
+			attr |= NPF_RULE_RETICMP;
+			PARSE_NEXT_TOKEN();
+		} else if (strcmp(p, "return") == 0) {
+			attr |= NPF_RULE_RETRST | NPF_RULE_RETICMP;
+			PARSE_NEXT_TOKEN();
+		}
 	} else if (strcmp(p, "pass") == 0) {
 		attr = NPF_RULE_PASS;
+		PARSE_NEXT_TOKEN();
 	} else {
 		return PARSE_ERR();
 	}
-	PARSE_NEXT_TOKEN();
 
 	/* in or out */
 	if (strcmp(p, "in") == 0) {
@@ -233,8 +247,14 @@
 		PARSE_NEXT_TOKEN();
 		var_t *pvar = npfctl_parsevalue(p);
 		PARSE_NEXT_TOKEN();
+		if (pvar->v_type != VAR_SINGLE) {
+			errx(EXIT_FAILURE, "only one protocol can be specified");
+		}
 		element_t *el = pvar->v_elements;
 		proto = el->e_data;
+		/* Determine TCP, ICMP. */
+		tcp = (strcmp(proto, "tcp") == 0);
+		icmp = (strcmp(proto, "icmp") == 0);
 	}
 
 	/*
@@ -280,6 +300,38 @@
 	if (ret) {
 		return ret;
 	}
+
+	/* flags <fl/mask> */
+	if (p && strcmp(p, "flags") == 0) {
+		if (icmp) {
+			errx(EXIT_FAILURE,
+			    "TCP flags used with ICMP protocol");
+		}
+		PARSE_NEXT_TOKEN();
+		var_t *tfvar = npfctl_parsevalue(p);
+		PARSE_NEXT_TOKEN_NOCHECK();
+		if (tfvar->v_type != VAR_SINGLE) {
+			errx(EXIT_FAILURE, "invalid TCP flags");
+		}
+		element_t *el = tfvar->v_elements;
+		tcp_flags = el->e_data;
+	}
+
+	/* icmp-type <t> code <c> */
+	if (p && strcmp(p, "icmp-type") == 0) {
+		if (tcp) {
+			errx(EXIT_FAILURE,
+			    "ICMP options used with TCP protocol");
+		}
+		PARSE_NEXT_TOKEN();
+		icmp_type = atoi(p);
+		PARSE_NEXT_TOKEN_NOCHECK();
+		if (p && strcmp(p, "code") == 0) {
+			PARSE_NEXT_TOKEN();
+			icmp_code = atoi(p);
+			PARSE_NEXT_TOKEN_NOCHECK();
+		}
+	}
 last:
 	/* keep state */
 	if (p && strcmp(p, "keep") == 0) {
@@ -293,7 +345,8 @@
 	/*
 	 * Generate all protocol data.
 	 */
-	npfctl_rule_protodata(rl, proto, from_cidr, fports, to_cidr, tports);
+	npfctl_rule_protodata(rl, proto, tcp_flags, icmp_type, icmp_code,
+	    from_cidr, fports, to_cidr, tports);
 	return 0;
 }
 
@@ -450,22 +503,28 @@
 /*
  * npfctl_parse_nat: parse NAT policy definition.
  *
- *	nat on <if> from <localnet> to <filter> -> <ip>
+ *	[bi]nat <if> from <net> to <net/addr> -> <ip>
+ *	rdr <if> from <net> to <addr> -> <ip>
  */
 static inline int
 npfctl_parse_nat(char *buf, prop_dictionary_t nat)
 {
 	var_t *ifvar, *from_cidr, *to_cidr, *ip;
+	var_t *tports = NULL, *rports = NULL;
 	element_t *iface, *cidr;
 	char *p, *sptr;
+	bool binat, rdr;
 
-	DPRINTF(("nat\t|%s|\n", buf));
+	DPRINTF(("[bi]nat/rdr\t|%s|\n", buf));
+	binat = (strncmp(buf, "binat", 5) == 0);
+	rdr = (strncmp(buf, "rdr", 3) == 0);
+
 	if ((p = strchr(buf, ' ')) == NULL) {
 		return PARSE_ERR();
 	}
 	PARSE_FIRST_TOKEN();
 
-	/* on <interface> */
+	/* <interface> */
 	if ((ifvar = npfctl_parsevalue(p)) == NULL) {
 		return PARSE_ERR();
 	}
@@ -492,6 +551,12 @@
 	to_cidr = npfctl_parsevalue(p);
 	PARSE_NEXT_TOKEN();
 
+	if (rdr && strcmp(p, "port") == 0) {
+		PARSE_NEXT_TOKEN();
+		tports = npfctl_parsevalue(p);
+		PARSE_NEXT_TOKEN();
+	}
+
 	/* -> <ip> */
 	if (strcmp(p, "->") != 0) {
 		return PARSE_ERR();
@@ -500,9 +565,54 @@
 	ip = npfctl_parsevalue(p);
 	cidr = ip->v_elements;
 
-	/* Setup NAT policy (rule as filter and extra info). */
-	npfctl_rule_protodata(nat, NULL, from_cidr, NULL, to_cidr, NULL);
-	npfctl_nat_setup(nat, iface->e_data, cidr->e_data);
+	if (rdr) {
+		PARSE_NEXT_TOKEN();
+		if (strcmp(p, "port") != 0) {
+			return PARSE_ERR();
+		}
+		PARSE_NEXT_TOKEN();
+		rports = npfctl_parsevalue(p);
+	}
+
+	/*
+	 * Setup NAT policy (rule as filter and extra info), which is
+	 * Outbound NAT (NPF_NATOUT).  Unless it is a redirect rule,
+	 * in which case it is Inbound NAT with specified port.
+	 *
+	 * XXX mess
+	 */
+	if (!rdr) {
+		npfctl_rule_protodata(nat, NULL, NULL, -1, -1, from_cidr,
+		    NULL, to_cidr, NULL);
+		npfctl_nat_setup(nat, NPF_NATOUT,
+		    binat ? 0 : (NPF_NAT_PORTS | NPF_NAT_PORTMAP),
+		    iface->e_data, cidr->e_data, NULL);
+	} else {
+		element_t *rp = rports->v_elements;
+
+		npfctl_rule_protodata(nat, NULL, NULL, -1, -1, from_cidr,
+		    NULL, to_cidr, tports);
+		npfctl_nat_setup(nat, NPF_NATIN, NPF_NAT_PORTS,
+		    iface->e_data, cidr->e_data, rp->e_data);
+	}
+
+	/*
+	 * For bi-directional NAT case, create and setup additional
+	 * Inbound NAT (NPF_NATIN) policy.  Note that translation address
+	 * is local IP, and filter criteria is inverted accordingly.
+	 *
+	 * XXX mess
+	 */
+	if (binat) {
+		prop_dictionary_t bn = npfctl_mk_nat();
+		element_t *taddr = from_cidr->v_elements;
+
+		npfctl_rule_protodata(bn, NULL, NULL, -1, -1,
+		    to_cidr, NULL, ip, NULL);
+		npfctl_nat_setup(bn, NPF_NATIN, 0, iface->e_data,
+		    taddr->e_data, NULL);
+		npfctl_add_nat(bn);
+	}
 	return 0;
 }
 
@@ -603,7 +713,8 @@
 			return ret;
 		npfctl_add_table(tl);
 
-	} else if (strncmp(p, "nat", 3) == 0) {
+	} else if (strncmp(p, "nat", 3) == 0 || strncmp(p, "rdr", 3) == 0 ||
+	    strncmp(p, "binat", 5) == 0) {
 		prop_dictionary_t nat;
 
 		/* NAT policy. */
--- a/usr.sbin/npf/npfctl/npfctl.h	Thu Sep 16 02:38:50 2010 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.h	Thu Sep 16 04:53:27 2010 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npfctl.h,v 1.1 2010/08/22 18:56:24 rmind Exp $	*/
+/*	$NetBSD: npfctl.h,v 1.2 2010/09/16 04:53:27 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@@ -78,8 +78,8 @@
 prop_dictionary_t npfctl_mk_rule(bool);
 void		npfctl_add_rule(prop_dictionary_t, prop_dictionary_t);
 void		npfctl_rule_setattr(prop_dictionary_t, int, char *);
-void		npfctl_rule_protodata(prop_dictionary_t, char *, var_t *,
-		    var_t *, var_t *, var_t *);
+void		npfctl_rule_protodata(prop_dictionary_t, char *, char *,
+		    int, int, var_t *, var_t *, var_t *, var_t *);
 void		npfctl_rule_icmpdata(prop_dictionary_t, var_t *, var_t *);
 
 prop_dictionary_t npfctl_lookup_table(char *);
@@ -90,7 +90,8 @@
 
 prop_dictionary_t npfctl_mk_nat(void);
 void		npfctl_add_nat(prop_dictionary_t);
-void		npfctl_nat_setup(prop_dictionary_t, char *, char *);
+void		npfctl_nat_setup(prop_dictionary_t, int, int,
+		    char *, char *, char *);
 
 size_t		npfctl_calc_ncsize(int []);
 size_t		npfctl_failure_offset(int []);
@@ -99,6 +100,7 @@
 void		npfctl_gennc_v4cidr(void **, int,
 		    in_addr_t, in_addr_t, bool);
 void		npfctl_gennc_icmp(void **, int, int, int);
+void		npfctl_gennc_tcpfl(void **, int , uint8_t, uint8_t);
 void		npfctl_gennc_ports(void **, int,
 		    in_port_t, in_port_t, bool, bool);
 void		npfctl_gennc_tbl(void **, int, u_int , bool);