Import NPF - a packet filter. Some features: trunk
authorrmind <rmind@NetBSD.org>
Sun, 22 Aug 2010 18:56:18 +0000
branchtrunk
changeset 193188 11cb5688d762
parent 193187 2a56d80a194d
child 193189 d9b653f8959a
Import NPF - a packet filter. Some features: - Designed to be fully MP-safe and highly efficient. - Tables/IP sets (hash or red-black tree) for high performance lookups. - Stateful filtering and Network Address Port Translation (NAPT). Framework for application level gateways (ALGs). - Packet inspection engine called n-code processor - inspired by BPF - supporting generic RISC-like and specific CISC-like instructions for common patterns (e.g. IPv4 address matching). See npf_ncode(9) manual. - Convenient userland utility npfctl(8) with npf.conf(8). NOTE: This is not yet a fully capable alternative to PF or IPFilter. Further work (support for binat/rdr, return-rst/return-icmp, common ALGs, state saving/restoring, logging, etc) is in progress. Thanks a lot to Matt Thomas for various useful comments and code review. Aye by: board@
distrib/sets/lists/base/mi
distrib/sets/lists/comp/mi
distrib/sets/lists/man/mi
etc/MAKEDEV.tmpl
etc/Makefile
share/man/man9/Makefile
share/man/man9/npf_ncode.9
share/mk/bsd.README
share/mk/bsd.own.mk
sys/arch/amd64/conf/GENERIC
sys/arch/i386/conf/ALL
sys/arch/i386/conf/GENERIC
sys/arch/i386/conf/MONOLITHIC
sys/conf/files
sys/conf/majors
sys/modules/npf/Makefile
sys/net/Makefile
sys/net/npf/Makefile
sys/net/npf/files.npf
sys/net/npf/npf.c
sys/net/npf/npf.h
sys/net/npf/npf_alg.c
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_ctl.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_instr.c
sys/net/npf/npf_mbuf.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_ncode.h
sys/net/npf/npf_processor.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_session.c
sys/net/npf/npf_tableset.c
usr.sbin/Makefile
usr.sbin/npf/Makefile
usr.sbin/npf/Makefile.inc
usr.sbin/npf/npfctl/Makefile
usr.sbin/npf/npfctl/npf.conf.8
usr.sbin/npf/npfctl/npf_data.c
usr.sbin/npf/npfctl/npf_ncgen.c
usr.sbin/npf/npfctl/npf_parser.c
usr.sbin/npf/npfctl/npfctl.8
usr.sbin/npf/npfctl/npfctl.c
usr.sbin/npf/npfctl/npfctl.h
--- a/distrib/sets/lists/base/mi	Sun Aug 22 18:01:01 2010 +0000
+++ b/distrib/sets/lists/base/mi	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.879 2010/08/21 06:38:59 christos Exp $
+# $NetBSD: mi,v 1.880 2010/08/22 18:56:18 rmind Exp $
 #
 # Note:	Don't delete entries from here - mark them as "obsolete" instead,
 #	unless otherwise stated below.
@@ -1239,6 +1239,7 @@
 ./usr/sbin/netgroup_mkdb			base-nis-bin
 ./usr/sbin/nfsd					base-nfsserver-bin
 ./usr/sbin/nfsiod				base-obsolete		obsolete
+./usr/sbin/npfctl				base-npf-bin		npf
 ./usr/sbin/nslookup				base-obsolete		obsolete
 ./usr/sbin/nsquery				base-obsolete		obsolete
 ./usr/sbin/nstest				base-obsolete		obsolete
--- a/distrib/sets/lists/comp/mi	Sun Aug 22 18:01:01 2010 +0000
+++ b/distrib/sets/lists/comp/mi	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.1498 2010/08/21 10:32:35 jruoho Exp $
+#	$NetBSD: mi,v 1.1499 2010/08/22 18:56:19 rmind Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -1474,6 +1474,8 @@
 ./usr/include/net/if_vlanvar.h			comp-c-include
 ./usr/include/net/net_stats.h			comp-c-include
 ./usr/include/net/netisr.h			comp-c-include
+./usr/include/net/npf.h				comp-npf-include	npf
+./usr/include/net/npf_ncode.h			comp-npf-include	npf
 ./usr/include/net/pfil.h			comp-c-include
 ./usr/include/net/pfkeyv2.h			comp-c-include
 ./usr/include/net/pfvar.h			comp-c-include
@@ -3632,6 +3634,7 @@
 ./usr/libdata/debug/usr/sbin/ndp.debug		comp-netutil-debug	inet6,debug
 ./usr/libdata/debug/usr/sbin/netgroup_mkdb.debug	comp-nis-debug		debug
 ./usr/libdata/debug/usr/sbin/nfsd.debug		comp-nfsserver-debug	debug
+./usr/libdata/debug/usr/sbin/npfctl.debug	comp-npf-debug		npf,debug
 ./usr/libdata/debug/usr/sbin/ntp-keygen.debug	comp-ntp-debug		crypto,debug
 ./usr/libdata/debug/usr/sbin/ntpd.debug		comp-ntp-debug		debug
 ./usr/libdata/debug/usr/sbin/ntpdate.debug	comp-ntp-debug		debug
@@ -9406,6 +9409,7 @@
 ./usr/share/man/cat9/nanouptime.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/need_resched.0		comp-obsolete		obsolete
 ./usr/share/man/cat9/nextrunqueue.0		comp-obsolete		obsolete
+./usr/share/man/cat9/npf_ncode.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/nullop.0			comp-sys-catman		.cat
 ./usr/share/man/cat9/old_sysctl.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/opencrypto.0		comp-sys-catman		.cat
@@ -15209,6 +15213,7 @@
 ./usr/share/man/html9/namei.html		comp-sys-htmlman	html
 ./usr/share/man/html9/nanotime.html		comp-sys-htmlman	html
 ./usr/share/man/html9/nanouptime.html		comp-sys-htmlman	html
+./usr/share/man/html9/npf_ncode.html		comp-sys-htmlman	html
 ./usr/share/man/html9/nullop.html		comp-sys-htmlman	html
 ./usr/share/man/html9/old_sysctl.html		comp-sys-htmlman	html
 ./usr/share/man/html9/opencrypto.html		comp-sys-htmlman	html
@@ -21179,6 +21184,7 @@
 ./usr/share/man/man9/nanouptime.9		comp-sys-man		.man
 ./usr/share/man/man9/need_resched.9		comp-obsolete		obsolete
 ./usr/share/man/man9/nextrunqueue.9		comp-obsolete		obsolete
+./usr/share/man/man9/npf_ncode.9		comp-sys-man		.man
 ./usr/share/man/man9/nullop.9			comp-sys-man		.man
 ./usr/share/man/man9/old_sysctl.9		comp-sys-man		.man
 ./usr/share/man/man9/opencrypto.9		comp-sys-man		.man
--- a/distrib/sets/lists/man/mi	Sun Aug 22 18:01:01 2010 +0000
+++ b/distrib/sets/lists/man/mi	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1233 2010/08/06 17:00:12 jruoho Exp $
+# $NetBSD: mi,v 1.1234 2010/08/22 18:56:20 rmind Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -2419,6 +2419,8 @@
 ./usr/share/man/cat8/nfsiod.0			man-obsolete		obsolete
 ./usr/share/man/cat8/nis.0			man-nis-catman		.cat
 ./usr/share/man/cat8/nologin.0			man-sysutil-catman	.cat
+./usr/share/man/cat8/npf.conf.0			man-npf-catman		.cat
+./usr/share/man/cat8/npfctl.0			man-npf-catman		.cat
 ./usr/share/man/cat8/nqmgr.0			man-obsolete		obsolete
 ./usr/share/man/cat8/nslookup.0			man-netutil-catman	.cat
 ./usr/share/man/cat8/nsupdate.0			man-obsolete		obsolete
@@ -4876,6 +4878,8 @@
 ./usr/share/man/html8/nfsd.html			man-nfsserver-htmlman	html
 ./usr/share/man/html8/nis.html			man-nis-htmlman		html
 ./usr/share/man/html8/nologin.html		man-sysutil-htmlman	html
+./usr/share/man/html8/npf.conf.html		man-npf-htmlman		html
+./usr/share/man/html8/npfctl.html		man-npf-htmlman		html
 ./usr/share/man/html8/nslookup.html		man-netutil-htmlman	html
 ./usr/share/man/html8/nsupdate.html		man-obsolete		obsolete
 ./usr/share/man/html8/ntalkd.html		man-netutil-htmlman	html
@@ -7555,6 +7559,8 @@
 ./usr/share/man/man8/nfsiod.8			man-obsolete		obsolete
 ./usr/share/man/man8/nis.8			man-nis-man		.man
 ./usr/share/man/man8/nologin.8			man-sysutil-man		.man
+./usr/share/man/man8/npf.conf.8			man-npf-man		.man
+./usr/share/man/man8/npfctl.8			man-npf-man		.man
 ./usr/share/man/man8/nqmgr.8			man-obsolete		obsolete
 ./usr/share/man/man8/nslookup.8			man-netutil-man		.man
 ./usr/share/man/man8/nsupdate.8			man-obsolete		obsolete
--- a/etc/MAKEDEV.tmpl	Sun Aug 22 18:01:01 2010 +0000
+++ b/etc/MAKEDEV.tmpl	Sun Aug 22 18:56:18 2010 +0000
@@ -1,5 +1,5 @@
 #!/bin/sh -
-#	$NetBSD: MAKEDEV.tmpl,v 1.134 2010/04/19 04:34:42 pooka Exp $
+#	$NetBSD: MAKEDEV.tmpl,v 1.135 2010/08/22 18:56:20 rmind Exp $
 #
 # Copyright (c) 2003,2007,2008 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -255,6 +255,7 @@
 #	mlx*	Mylex DAC960 control interface
 #	mly*	Mylex AcceleRAID/eXtremeRAID control interface
 #	np*	UNIBUS Ethernet co-processor interface, for downloading.
+#	npf	NPF packet filter
 #	nsmb*	SMB requester
 #	openfirm OpenFirmware accessor
 #	pad*	Pseudo-audio device driver
@@ -732,7 +733,7 @@
 	makedev md0 md1
 	makedev raid0 raid1 raid2 raid3 raid4 raid5 raid6 raid7
 	makedev vnd0 vnd1 vnd2 vnd3
-	makedev bpf
+	makedev bpf npf
 	makedev tun0 tun1 tun2 tun3
 	makedev ipl pf crypto random
 	makedev lockstat clockctl cpuctl
@@ -1261,6 +1262,10 @@
 	lndev bpf bpf0
 	;;
 
+npf)
+	mkdev npf	c %npf_chr% 0
+	;;
+
 bthub)
 	mkdev bthub c %bthub_chr% 0
 	;;
--- a/etc/Makefile	Sun Aug 22 18:01:01 2010 +0000
+++ b/etc/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.380 2010/02/05 09:44:23 roy Exp $
+#	$NetBSD: Makefile,v 1.381 2010/08/22 18:56:20 rmind Exp $
 #	from: @(#)Makefile	8.7 (Berkeley) 5/25/95
 
 # Environment variables without default values:
@@ -192,7 +192,7 @@
 		MKGCC MKGCCCMDS MKGDB \
 		MKHESIOD MKHOSTOBJ MKHTML MKIEEEFP MKINET6 MKINFO MKIPFILTER \
 		MKKERBEROS MKLDAP MKLINKLIB MKLINT \
-		MKMAN MKMANZ MKMDNS MKNLS MKNVI MKOBJ MKOBJDIRS \
+		MKMAN MKMANZ MKMDNS MKNLS MKNPF MKNVI MKOBJ MKOBJDIRS \
 		MKPAM MKPF MKPIC MKPICINSTALL MKPICLIB MKPOSTFIX MKPROFILE \
 		MKSHARE MKSKEY MKSOFTFLOAT MKSTATICLIB \
 		MKUNPRIVED MKUPDATE MKX11 MKYP \
--- a/share/man/man9/Makefile	Sun Aug 22 18:01:01 2010 +0000
+++ b/share/man/man9/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#       $NetBSD: Makefile,v 1.341 2010/08/21 10:11:35 jruoho Exp $
+#       $NetBSD: Makefile,v 1.342 2010/08/22 18:56:20 rmind Exp $
 
 #	Makefile for section 9 (kernel function and variable) manual pages.
 
@@ -36,7 +36,7 @@
 	memmove.9 memset.9 \
 	microtime.9 microuptime.9 mi_switch.9 module.9 \
 	mstohz.9 mutex.9 m_tag.9 namecache.9 \
-	namei.9 nullop.9 opencrypto.9 optstr.9 \
+	namei.9 npf_ncode.9 nullop.9 opencrypto.9 optstr.9 \
 	panic.9 pci.9 pci_configure_bus.9 pci_intr.9 pckbport.9 \
 	pcmcia.9 pcq.9 percpu.9 pfil.9 physio.9 pmap.9 pmatch.9 \
 	pmc.9 pmf.9 pool.9 pool_cache.9 powerhook_establish.9 ppsratecheck.9 \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/share/man/man9/npf_ncode.9	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,259 @@
+.\"	$NetBSD: npf_ncode.9,v 1.1 2010/08/22 18:56:20 rmind Exp $
+.\"
+.\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This material is based upon work partially supported by The
+.\" NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 22, 2010
+.Dt NPF_NCODE 9
+.Os
+.Sh NAME
+.Nm npf_ncode
+.Nd NPF n-code processor
+.Sh SYNOPSIS
+.In net/npf_ncode.h
+.Ft int
+.Fn npf_ncode_process \
+"npf_cache_t *npc" "const void *ncode" "nbuf_t *nbuf" "int layer"
+.Ft int
+.Fn npf_ncode_validate "const void *ncode" "size_t sz" "int *errat"
+.\" -----
+.Sh DESCRIPTION
+The NPF n-code processor is a general purpose engine to inspect network
+packets, which are abstracted as chained buffers.
+.Pp
+.Sh FUNCTIONS
+.Fn npf_ncode_process
+performs n-code processing using data of the specified packet.
+.Fa ncode
+is address to a validated n-code memory block.
+N-code memory address should be 32 bit word aligned.
+.Fa nbuf
+is an opaque network buffer on which n-code processor will operate.
+.Fa layer
+specifies at which network layer buffer is passed, it can be either
+NPF_LAYER_L2 or NPF_LAYER_L3.
+This value is initally set in R0 register and can be checked by the
+n-code.
+Contents of other registers are unspecified.
+.Pp
+Function returns a value from the n-code.
+.Pp
+.Fn npf_ncode_validate
+performs n-code validation.
+.Fa ncode
+is address to a n-code memory block.
+.Fa sz
+is the size of memory block.
+.Fa errat
+is the word number in the n-code where error is detected.
+If no error, this value is undefined.
+.Pp
+On successful validation, function returns 0.
+Otherwise, may return one of the following error codes:
+.Bl -tag -width [NPF_ERR_OPCODE]
+.It Bq Er NPF_ERR_OPCODE
+Invalid instruction (unknown opcode).
+.It Bq Er NPF_ERR_JUMP
+Invalid jump, e.g. not to the instruction or out of range.
+.It Bq Er NPF_ERR_REG
+Invalid register, i.e. incorrect index number.
+.It Bq Er NPF_ERR_INVAL
+Invalid argument value.
+.It Bq Er NPF_ERR_RANGE
+Processing out of range, e.g. missing return path.
+.El
+.Pp
+Any untrusted n-code, for example generated by userspace, should be
+validated (once) before allowing to process it.
+.Pp
+.\" -----
+.Sh PROCESSING
+There are two instruction sets: RISC-like and CISC-like.
+Processing is done in words, therefore both instructions (their codes) and
+arguments are always 32 bit long words.
+.Pp
+There are four general purpose registers: R0, R1, R2, R3.
+Each can store 32 bit long words.
+Registers are mainly to store values for operations using RISC-like
+instructions.
+CISC-like instructions, however, use them to store return values.
+.Pp
+Processing begins from the first word until it reaches
+NPF_OPCODE_RET
+instruction with a return value.
+Instruction pointer can be changed using jump operations, which always
+take relative address, in words.
+Result of last comparison is tracked internally and jump operations should
+be performed immediately after comparison or certain CISC-like instructions.
+.Pp
+CISC-like instructions and
+NPF_OPCODE_LOAD
+can be used to load data from network buffer.
+They operate at current network buffer offset, which is initially at
+the beginning of network buffer.
+NPF_OPCODE_ADVR
+instruction can be used to advance the current network buffer offset.
+.Pp
+.\" -----
+.Sh CACHING
+Various packet data is cached during execution of CISC-like instructions
+and further instruction calls may retrieve information from the cache.
+If n-code changes the packet data, information in the cache might no
+longer reflect the changes.
+In such case, it is n-code's responsibility to invalidate the cache
+(if necessary) by executing
+NPF_OPCODE_INVL
+instruction.
+.\" -----
+.Sh INSTRUCTIONS
+Return, advance, jump and tag operations.
+.Bl -tag -width indent
+.It Sy 0x00 NPF_OPCODE_RET <return value>
+Finish processing and return passed value.
+.It Sy 0x01 NPF_OPCODE_ADVR <register>
+Advance current network buffer offset by a value,
+passed in the specified register.
+Value represents bytes and cannot be negative or zero.
+.It Sy 0x02 NPF_OPCODE_J <relative address>
+Jump processor to a relative address (from this instruction).
+Address value is the amount of words forwards or backwards.
+It can point only to a valid instruction, at valid boundary.
+.It Sy 0x03 NPF_OPCODE_INVL
+Invalidate all data in the packet cache.
+.It Sy 0x04 NPF_OPCODE_TAG <key> <value>
+Add a tag with specified key and value to the primary network buffer (nbuf).
+.El
+.Pp
+.\" ---
+Set and load operations.
+.Bl -tag -width indent
+.It Sy 0x10 NPF_OPCODE_MOV <value>, <register>
+Set the specified value to a register.
+.It Sy 0x11 NPF_OPCODE_LOAD <length>, <register>
+Load secified length of packet data into the register.
+Data is read starting from the current network buffer offset.
+Operation does not advance the offset after read, however.
+Value of
+.Fa length
+represents bytes and must be in the range from 1 to 4.
+Returned data is in network byte order.
+.El
+.Pp
+.\" ---
+Compare and jump operations.
+.Bl -tag -width indent
+.It Sy 0x21 NPF_OPCODE_CMP <value>, <register>
+Compare the specified value and value in a register.
+Result is stored internally and can be tested by jump instructions.
+.It Sy 0x22 NPF_OPCODE_CMPR <register>, <register>
+Compare values of two registers.
+Result is stored internally and can be tested by jump instructions.
+.It Sy 0x23 NPF_OPCODE_BEQ <relative address>
+Jump if result of last comparison was "equal".
+Otherwise, continue processing of next instruction.
+.It Sy 0x24 NPF_OPCODE_BNE <relative address>
+Jump if result of last comparison was "not equal".
+Otherwise, continue processing of next instruction.
+.It Sy 0x25 NPF_OPCODE_BGT <relative address>
+Jump if result of last comparison was "greater than".
+Otherwise, continue processing of next instruction.
+.It Sy 0x26 NPF_OPCODE_BLT <relative address>
+Jump if result of last comparison was "less than".
+Otherwise, continue processing of next instruction.
+.El
+.Pp
+.\" ---
+Bitwise operations.
+.Bl -tag -width indent
+.It Sy 0x30 NPF_OPCODE_AND <value>, <register>
+Perform bitwise AND with a specified value and value in the register.
+Result is stored in the register.
+.El
+.Pp
+.\" -----
+CISC-like n-code instructions.
+.Bl -tag -width indent
+.It Sy 0x80 NPF_OPCODE_ETHER <s/d>, <_reserved>, <ether type>
+Read Ethernet type in the frame, handle possible VLAN and match with
+the value passed in the argument.
+Return value to advance to layer 3 header in R3.
+.\" -
+.It Sy 0x90 NPF_OPCODE_IP4MASK <s/d>, <network address>, <subnet mask>
+Match passed network address with subnet mask against source or destination
+address in the IPv4 header.
+Address and mask should be in network byte order.
+Value of first argument indicates whether source (if 0x1) or destination
+(if 0x0) address should be matched.
+.It Sy 0x91 NPF_OPCODE_IP4TABLE <s/d>, <table id>
+Match the source or destination address with NPF table contents
+specified by table ID.
+Value of the first argument indicates whether source (if 0x1) or
+destination (if 0x0) address should be matched.
+.\" -
+.It Sy 0x92 NPF_OPCODE_ICMP4 <type> <code>
+Match ICMP type and code of the packet, unless a value of ~0 (all bits set)
+is passed, which indicates that comparison should not be performed.
+.\" -
+.It Sy 0xa0 NPF_OPCODE_TCP_PORT	<s/d>, <port range>
+Match the source or destination port with a specified port range.
+Higher 16 bits of second argument represent "from" and lower 16 bits
+represent "to" values of range.
+The 32 bit port range value is host byte order, however the actual
+"from" and "to" values should be in the network byte order.
+Value of the first argument indicates whether source (if 0x1) or
+destination (if 0x0) port should be matched.
+.\" -
+.It Sy 0xa1 NPF_OPCODE_UDP_PORT <s/d>, <port range>
+Match the source or destination port with a specified port range.
+Higher 16 bits of second argument represent "from" and lower 16 bits
+represent "to" values of range.
+The 32 bit port range value is host byte order, however the actual
+"from" and "to" values should be in the network byte order.
+Value of the first argument indicates whether source (if 0x1) or
+destination (if 0x0) port should be matched.
+.El
+.\" -----
+.Sh CODE REFERENCES
+This section describes places within the
+.Nx
+source tree where actual code implementing the
+.Nm
+subsystem
+can be found.
+All pathnames are relative to
+.Pa /usr/src .
+.Pp
+The
+.Nm
+is implemented within the file
+.Pa sys/net/npf/npf_processor.c .
+.Sh SEE ALSO
+.Xr npf.conf 8 ,
+.Xr npfctl 8
+.Sh HISTORY
+The NPF n-code processor first appeared in
+.Nx 6.0 .
--- a/share/mk/bsd.README	Sun Aug 22 18:01:01 2010 +0000
+++ b/share/mk/bsd.README	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: bsd.README,v 1.274 2010/08/15 07:27:33 mrg Exp $
+#	$NetBSD: bsd.README,v 1.275 2010/08/22 18:56:20 rmind Exp $
 #	@(#)bsd.README	8.2 (Berkeley) 4/2/94
 
 This is the README file for the make "include" files for the NetBSD
@@ -266,6 +266,9 @@
 		definition files.
 		Default: yes
 
+MKNPF		If "no", don't build or install the NPF and its modules.
+		Default: yes
+
 MKOBJ		If "no", don't enable the rule which creates objdirs,
 		and also acts as "MKOBJDIRS=no".
 		Default: yes
--- a/share/mk/bsd.own.mk	Sun Aug 22 18:01:01 2010 +0000
+++ b/share/mk/bsd.own.mk	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: bsd.own.mk,v 1.639 2010/08/15 07:27:34 mrg Exp $
+#	$NetBSD: bsd.own.mk,v 1.640 2010/08/22 18:56:21 rmind Exp $
 
 # This needs to be before bsd.init.mk
 .if defined(BSD_MK_COMPAT_FILE)
@@ -701,6 +701,7 @@
 	MKMAN \
 	MKMDNS \
 	MKNLS \
+	MKNPF \
 	MKOBJ \
 	MKPAM \
 	MKPF MKPIC MKPICINSTALL MKPICLIB MKPOSTFIX MKPROFILE \
--- a/sys/arch/amd64/conf/GENERIC	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/arch/amd64/conf/GENERIC	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.284 2010/08/08 18:28:00 chs Exp $
+# $NetBSD: GENERIC,v 1.285 2010/08/22 18:56:21 rmind Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident 		"GENERIC-$Revision: 1.284 $"
+#ident 		"GENERIC-$Revision: 1.285 $"
 
 maxusers	64		# estimated number of users
 
@@ -1108,6 +1108,9 @@
 pseudo-device	vnd			# disk-like interface to files
 #options 	VND_COMPRESSION		# compressed vnd(4)
 
+# NPF
+#pseudo-device	npf
+
 # network pseudo-devices
 pseudo-device	bpfilter		# Berkeley packet filter
 #pseudo-device	carp			# Common Address Redundancy Protocol
--- a/sys/arch/i386/conf/ALL	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/arch/i386/conf/ALL	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: ALL,v 1.266 2010/08/21 03:06:37 tsutsui Exp $
+# $NetBSD: ALL,v 1.267 2010/08/22 18:56:21 rmind Exp $
 # From NetBSD: GENERIC,v 1.787 2006/10/01 18:37:54 bouyer Exp
 #
 # ALL machine description file
@@ -17,7 +17,7 @@
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident 		"ALL-$Revision: 1.266 $"
+#ident 		"ALL-$Revision: 1.267 $"
 
 maxusers	64		# estimated number of users
 
@@ -1598,6 +1598,9 @@
 
 pseudo-device 	dm			# device-mapper disk driver
 
+# NPF
+pseudo-device	npf
+
 # network pseudo-devices
 pseudo-device	bpfilter		# Berkeley packet filter
 pseudo-device	carp			# Common Address Redundancy Protocol
--- a/sys/arch/i386/conf/GENERIC	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/arch/i386/conf/GENERIC	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.992 2010/08/21 11:55:21 jmcneill Exp $
+# $NetBSD: GENERIC,v 1.993 2010/08/22 18:56:21 rmind Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident 		"GENERIC-$Revision: 1.992 $"
+#ident 		"GENERIC-$Revision: 1.993 $"
 
 maxusers	64		# estimated number of users
 
@@ -1541,6 +1541,9 @@
 pseudo-device	vnd			# disk-like interface to files
 options 	VND_COMPRESSION		# compressed vnd(4)
 
+# NPF
+#pseudo-device	npf
+
 # network pseudo-devices
 pseudo-device	bpfilter		# Berkeley packet filter
 #pseudo-device	carp			# Common Address Redundancy Protocol
--- a/sys/arch/i386/conf/MONOLITHIC	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/arch/i386/conf/MONOLITHIC	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: MONOLITHIC,v 1.11 2010/04/09 14:02:05 ahoka Exp $
+# $NetBSD: MONOLITHIC,v 1.12 2010/08/22 18:56:21 rmind Exp $
 #
 # Non MODULAR, used mostly as a reference as to what we modularized.
 #
@@ -50,6 +50,9 @@
 options 	PPP_BSDCOMP	# BSD-Compress compression support for PPP
 options 	PPP_DEFLATE	# Deflate compression support for PPP
 
+# NPF
+#pseudo-device	npf
+
 pseudo-device	accf_data	# "dataready" accept filter
 pseudo-device	accf_http	# "httpready" accept filter
 
--- a/sys/conf/files	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/conf/files	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: files,v 1.993 2010/08/21 13:17:32 pgoyette Exp $
+#	$NetBSD: files,v 1.994 2010/08/22 18:56:21 rmind Exp $
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
 
 version 	20100430
@@ -188,7 +188,6 @@
 include "netatalk/files.netatalk"
 include "netbt/files.netbt"
 include "netinet/files.netinet"
-include "netinet/files.ipfilter"
 include "netinet6/files.netinet6"
 include "netinet6/files.ipsec"
 include "netipsec/files.netipsec"
@@ -196,6 +195,9 @@
 include "netmpls/files.netmpls"
 include "netnatm/files.netnatm"
 include "netsmb/files.netsmb"
+
+include "net/npf/files.npf"
+include "netinet/files.ipfilter"
 include "net/files.pf"
 
 obsolete defflag		CCITT		# obsolete
--- a/sys/conf/majors	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/conf/majors	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: majors,v 1.51 2010/04/30 20:47:17 pooka Exp $
+# $NetBSD: majors,v 1.52 2010/08/22 18:56:21 rmind Exp $
 #
 # Device majors for Machine-Independent drivers.
 #
@@ -43,3 +43,4 @@
 device-major hdaudio   char 195		   hdaudio	vector=4
 device-major uhso      char 196		   uhso
 device-major rumpblk   char 197 block 197  rumpblk
+device-major npf       char 198		   npf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/modules/npf/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,13 @@
+# $NetBSD: Makefile,v 1.1 2010/08/22 18:56:22 rmind Exp $
+
+.include "../Makefile.inc"
+
+.PATH:		${S}/net/npf
+
+KMOD=		npf
+
+SRCS=		npf.c npf_ctl.c npf_handler.c npf_instr.c npf_mbuf.c
+SRCS+=		npf_processor.c npf_ruleset.c npf_tableset.c npf_inet.c
+SRCS+=		npf_session.c npf_nat.c npf_alg.c
+
+.include <bsd.kmodule.mk>
--- a/sys/net/Makefile	Sun Aug 22 18:01:01 2010 +0000
+++ b/sys/net/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.28 2010/06/26 14:24:28 kefren Exp $
+#	$NetBSD: Makefile,v 1.29 2010/08/22 18:56:22 rmind Exp $
 
 INCSDIR= /usr/include/net
 
@@ -10,7 +10,7 @@
 	netisr.h pfil.h pfkeyv2.h pfvar.h ppp-comp.h ppp_defs.h radix.h \
 	raw_cb.h route.h slcompress.h slip.h zlib.h
 
-SUBDIR=	agr
+SUBDIR=	agr npf
 
 .include <bsd.kinc.mk>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,9 @@
+# $NetBSD: Makefile,v 1.1 2010/08/22 18:56:22 rmind Exp $
+#
+# Public Domain.
+#
+
+INCSDIR=	/usr/include/net
+INCS=		npf.h npf_ncode.h
+
+.include <bsd.kinc.mk>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/files.npf	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,23 @@
+# $NetBSD: files.npf,v 1.1 2010/08/22 18:56:22 rmind Exp $
+#
+# Public Domain.
+#
+
+#
+# NPF pseudo device and modules.
+#
+
+defpseudo	npf:	ifnet
+
+file	net/npf/npf.c				npf
+file	net/npf/npf_ctl.c			npf
+file	net/npf/npf_handler.c			npf
+file	net/npf/npf_instr.c			npf
+file	net/npf/npf_mbuf.c			npf
+file	net/npf/npf_processor.c			npf
+file	net/npf/npf_ruleset.c			npf
+file	net/npf/npf_tableset.c			npf
+file	net/npf/npf_inet.c			npf
+file	net/npf/npf_session.c			npf
+file	net/npf/npf_nat.c			npf
+file	net/npf/npf_alg.c			npf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,216 @@
+/*	$NetBSD: npf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF main: dynamic load/initialisation and unload routines.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <sys/conf.h>
+#include <sys/kauth.h>
+#include <sys/lwp.h>
+#include <sys/module.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+
+#include "npf_impl.h"
+
+/*
+ * Module and device structures.
+ */
+MODULE(MODULE_CLASS_MISC, npf, NULL);
+
+void		npfattach(int);
+
+static int	npf_dev_open(dev_t, int, int, lwp_t *);
+static int	npf_dev_close(dev_t, int, int, lwp_t *);
+static int	npf_dev_ioctl(dev_t, u_long, void *, int, lwp_t *);
+static int	npf_dev_poll(dev_t, int, lwp_t *);
+static int	npf_dev_read(dev_t, struct uio *, int);
+
+const struct cdevsw npf_cdevsw = {
+	npf_dev_open, npf_dev_close, npf_dev_read, nowrite, npf_dev_ioctl,
+	nostop, notty, npf_dev_poll, nommap, nokqfilter, D_OTHER | D_MPSAFE
+};
+
+static int
+npf_init(void)
+{
+#ifdef _MODULE
+	devmajor_t bmajor = NODEVMAJOR, cmajor = NODEVMAJOR;
+#endif
+	int error;
+
+	/*
+	 * Initialise ruleset, tables and session structures.
+	 */
+
+	error = npf_ruleset_sysinit();
+	if (error)
+		return error;
+
+	error = npf_tableset_sysinit();
+	if (error) {
+		npf_ruleset_sysfini();
+		return error;
+	}
+
+	error = npf_session_sysinit();
+	if (error) {
+		npf_tableset_sysfini();
+		npf_ruleset_sysfini();
+		return error;
+	}
+	npf_nat_sysinit();
+	npf_alg_sysinit();
+
+#ifdef _MODULE
+	/* Attach /dev/npf device. */
+	error = devsw_attach("npf", NULL, &bmajor, &npf_cdevsw, &cmajor);
+	if (error) {
+		npf_nat_sysfini();
+		npf_session_sysfini();
+		npf_tableset_sysfini();
+		npf_ruleset_sysfini();
+	}
+#endif
+	return error;
+}
+
+static int
+npf_fini(void)
+{
+
+#ifdef _MODULE
+	/* At first, detach device and remove pfil hooks. */
+	devsw_detach(NULL, &npf_cdevsw);
+#endif
+	npf_nat_sysfini();
+	npf_alg_sysfini();
+	npf_session_sysfini();
+	npf_tableset_sysfini();
+	npf_ruleset_sysfini();
+
+	return 0;
+}
+
+/*
+ * Module interface.
+ */
+static int
+npf_modcmd(modcmd_t cmd, void *arg)
+{
+
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+		return npf_init();
+	case MODULE_CMD_FINI:
+		return npf_fini();
+	default:
+		return ENOTTY;
+	}
+	return 0;
+}
+
+void
+npfattach(int nunits)
+{
+
+	/* Void. */
+}
+
+static int
+npf_dev_open(dev_t dev, int flag, int mode, lwp_t *l)
+{
+
+	/* Available only for super-user. */
+	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
+		return EPERM;
+	}
+	return 0;
+}
+
+static int
+npf_dev_close(dev_t dev, int flag, int mode, lwp_t *l)
+{
+
+	return 0;
+}
+
+static int
+npf_dev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
+{
+	int error;
+
+	/* Available only for super-user. */
+	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
+		return EPERM;
+	}
+
+	switch (cmd) {
+	case IOC_NPF_VERSION:
+		*(int *)data = NPF_VERSION;
+		error = 0;
+		break;
+	case IOC_NPF_SWITCH:
+		error = npfctl_switch(data);
+		break;
+	case IOC_NPF_RELOAD:
+		error = npfctl_reload(cmd, data);
+		break;
+	case IOC_NPF_TABLE:
+		error = npfctl_table(data);
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+	return error;
+}
+
+static int
+npf_dev_poll(dev_t dev, int events, lwp_t *l)
+{
+
+	return ENOTSUP;
+}
+
+static int
+npf_dev_read(dev_t dev, struct uio *uio, int flag)
+{
+
+	return ENOTSUP;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf.h	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,179 @@
+/*	$NetBSD: npf.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Public NPF interfaces.
+ */
+
+#ifndef _NPF_H_
+#define _NPF_H_
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <sys/ioctl.h>
+#include <prop/proplib.h>
+
+#ifdef _NPF_TESTING
+#include "testing.h"
+#endif
+
+#define	NPF_VERSION		1
+
+/*
+ * Public declarations.
+ */
+
+struct npf_ruleset;
+struct npf_rule;
+struct npf_hook;
+
+typedef struct npf_ruleset	npf_ruleset_t;
+typedef struct npf_rule		npf_rule_t;
+typedef struct npf_hook		npf_hook_t;
+
+/*
+ * Public definitions.
+ */
+
+typedef void			nbuf_t;
+
+/*
+ * Packet information cache.
+ */
+
+#define	NPC_IP46	0x01	/* IPv4,6 packet with known protocol. */
+#define	NPC_IP6VER	0x02	/* If NPI_IP46, then: 0 - IPv4, 1 - IPv6. */
+#define	NPC_ADDRS	0x04	/* Known source and destination addresses. */
+#define	NPC_PORTS	0x08	/* Known ports (for TCP/UDP cases). */
+#define	NPC_ICMP	0x10	/* ICMP with known type and code. */
+#define	NPC_ICMP_ID	0x20	/* ICMP with query ID. */
+
+/* XXX: Optimise later, pack in unions, perhaps bitfields, etc. */
+typedef struct {
+	uint32_t		npc_info;
+	int			npc_dir;
+	uint8_t			npc_elen;
+	/* NPC_IP46 */
+	uint8_t			npc_proto;
+	uint16_t		npc_hlen;
+	uint16_t		npc_ipsum;
+	/* NPC_ADDRS */
+	in_addr_t		npc_srcip;
+	in_addr_t		npc_dstip;
+	/* NPC_PORTS */
+	in_port_t		npc_sport;
+	in_port_t		npc_dport;
+	uint8_t			npc_tcp_flags;
+	/* NPC_ICMP */
+	uint8_t			npc_icmp_type;
+	uint8_t			npc_icmp_code;
+	uint16_t		npc_icmp_id;
+} npf_cache_t;
+
+static inline bool
+npf_iscached(const npf_cache_t *npc, const int inf)
+{
+
+	return __predict_true((npc->npc_info & inf) != 0);
+}
+
+#if defined(_KERNEL) || defined(_NPF_TESTING)
+
+/* Network buffer interface. */
+void *		nbuf_dataptr(void *);
+void *		nbuf_advance(nbuf_t **, void *, u_int);
+int		nbuf_fetch_datum(nbuf_t *, void *, size_t, void *);
+int		nbuf_store_datum(nbuf_t *, void *, size_t, void *);
+
+int		nbuf_add_tag(nbuf_t *, uint32_t, uint32_t);
+int		nbuf_find_tag(nbuf_t *, uint32_t, void **);
+
+/* Ruleset interface. */
+npf_rule_t *	npf_rule_alloc(int, pri_t, int, void *, size_t);
+void		npf_rule_free(npf_rule_t *);
+void		npf_activate_rule(npf_rule_t *);
+void		npf_deactivate_rule(npf_rule_t *);
+
+npf_hook_t *	npf_hook_register(npf_rule_t *,
+		    void (*)(const npf_cache_t *, void *), void *);
+void		npf_hook_unregister(npf_rule_t *, npf_hook_t *);
+
+#endif
+
+/* Rule attributes. */
+#define	NPF_RULE_PASS			0x0001
+#define	NPF_RULE_COUNT			0x0002
+#define	NPF_RULE_FINAL			0x0004
+#define	NPF_RULE_LOG			0x0008
+#define	NPF_RULE_DEFAULT		0x0010
+#define	NPF_RULE_KEEPSTATE		0x0020
+
+#define	NPF_RULE_IN			0x1000
+#define	NPF_RULE_OUT			0x2000
+#define	NPF_RULE_DIMASK			0x3000
+
+/* Table types. */
+#define	NPF_TABLE_HASH			1
+#define	NPF_TABLE_RBTREE		2
+
+/* Layers. */
+#define	NPF_LAYER_2			2
+#define	NPF_LAYER_3			3
+
+/* XXX mbuf.h: just for now. */
+#define	PACKET_TAG_NPF			10
+
+/*
+ * IOCTL structures.
+ */
+
+#define	NPF_IOCTL_TBLENT_ADD		1
+#define	NPF_IOCTL_TBLENT_REM		2
+
+typedef struct npf_ioctl_table {
+	int			nct_action;
+	u_int			nct_tid;
+	in_addr_t		nct_addr;
+	in_addr_t		nct_mask;
+	int			_reserved;
+} npf_ioctl_table_t;
+
+/*
+ * IOCTL operations.
+ */
+
+#define	IOC_NPF_VERSION		_IOR('N', 100, int)
+#define	IOC_NPF_SWITCH		_IOW('N', 101, int)
+#define	IOC_NPF_RELOAD		_IOW('N', 102, struct plistref)
+#define	IOC_NPF_TABLE		_IOW('N', 103, struct npf_ioctl_table)
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_alg.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,168 @@
+/*	$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF interface for application level gateways (ALGs).
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#endif
+
+#include <sys/kmem.h>
+#include <sys/pool.h>
+#include <net/pfil.h>
+
+#include "npf_impl.h"
+
+/* NAT ALG structure for registration. */
+struct npf_alg {
+	LIST_ENTRY(npf_alg)		na_entry;
+	void *				na_ptr;
+	npf_algfunc_t			na_match_func;
+	npf_algfunc_t			na_out_func;
+	npf_algfunc_t			na_in_func;
+	npf_algfunc_t			na_seid_func;
+};
+
+static LIST_HEAD(, npf_alg)		nat_alg_list;
+
+void
+npf_alg_sysinit(void)
+{
+
+	LIST_INIT(&nat_alg_list);
+}
+
+void
+npf_alg_sysfini(void)
+{
+
+	KASSERT(LIST_EMPTY(&nat_alg_list));
+}
+
+/*
+ * npf_alg_register: register application-level gateway.
+ *
+ * XXX: Protected by module lock, but unify serialisation later.
+ */
+npf_alg_t *
+npf_alg_register(npf_algfunc_t match, npf_algfunc_t out, npf_algfunc_t in,
+    npf_algfunc_t seid)
+{
+	npf_alg_t *alg;
+
+	alg = kmem_alloc(sizeof(npf_alg_t), KM_SLEEP);
+	alg->na_ptr = alg;
+	alg->na_match_func = match;
+	alg->na_out_func = out;
+	alg->na_in_func = in;
+	alg->na_seid_func = seid;
+	LIST_INSERT_HEAD(&nat_alg_list, alg, na_entry);
+	return alg;
+}
+
+/*
+ * npf_alg_unregister: unregister application-level gateway.
+ */
+int
+npf_alg_unregister(npf_alg_t *alg)
+{
+	npf_alg_t *it;
+
+	LIST_FOREACH(it, &nat_alg_list, na_entry) {
+		if (alg == it)
+			break;
+	}
+	if (it != NULL) {
+		LIST_REMOVE(alg, na_entry);
+	}
+	/* TODO: Flush relevant sessions. */
+	kmem_free(alg, sizeof(npf_alg_t));
+	return 0;
+}
+
+void
+npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt)
+{
+	npf_alg_t *alg;
+	npf_algfunc_t func;
+
+	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
+		func = alg->na_match_func;
+		if (__predict_true(func != NULL)) {
+			func(npc, nbuf, nt);
+			return;
+		}
+	}
+}
+
+/*
+ * npf_alg_exec: execute in/out inspection hooks of each ALG.
+ */
+void
+npf_alg_exec(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, const int di)
+{
+	npf_alg_t *alg;
+
+	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
+		if ((di & PFIL_OUT) != 0 && alg->na_out_func != NULL) {
+			(alg->na_out_func)(npc, nbuf, nt);
+			continue;
+		}
+		if ((di & PFIL_IN) != 0 && alg->na_in_func != NULL) {
+			(alg->na_in_func)(npc, nbuf, nt);
+			continue;
+		}
+	}
+}
+
+bool
+npf_alg_sessionid(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *key)
+{
+	npf_alg_t *alg;
+	npf_algfunc_t func;
+
+	LIST_FOREACH(alg, &nat_alg_list, na_entry) {
+		func = alg->na_seid_func;
+		if (__predict_true(func == NULL)) {
+			continue;
+		}
+		if (func(npc, nbuf, key)) {
+			return true;
+		}
+	}
+	return false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,341 @@
+/*	$NetBSD: npf_alg_icmp.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF ALG for ICMP and traceroute translations.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#endif
+#include <sys/module.h>
+#include <sys/pool.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include <net/pfil.h>
+
+#include "npf_impl.h"
+
+MODULE(MODULE_CLASS_MISC, npf_alg_icmp, "npf");
+
+/*
+ * Traceroute criteria.
+ *
+ * IANA assigned base port: 33434.  However, common practice is to increase
+ * the port, thus monitor [33434-33484] range.  Additional filter is TTL < 50.
+ */
+
+#define	TR_BASE_PORT	33434
+#define	TR_PORT_RANGE	33484
+#define	TR_MAX_TTL	50
+
+static npf_alg_t *	alg_icmp;
+
+static bool		npfa_icmp_match(npf_cache_t *, nbuf_t *, void *);
+static bool		npfa_icmp_natin(npf_cache_t *, nbuf_t *, void *);
+static bool		npfa_icmp_session(npf_cache_t *, nbuf_t *, void *);
+
+/*
+ * npf_alg_icmp_{init,fini,modcmd}: ICMP ALG initialization, destruction
+ * and module interface.
+ */
+
+static int
+npf_alg_icmp_init(void)
+{
+
+	alg_icmp = npf_alg_register(npfa_icmp_match, NULL,
+	    npfa_icmp_natin, npfa_icmp_session);
+	KASSERT(alg_icmp != NULL);
+	return 0;
+}
+
+static int
+npf_alg_icmp_fini(void)
+{
+
+	KASSERT(alg_icmp != NULL);
+	return npf_alg_unregister(alg_icmp);
+}
+
+static int
+npf_alg_icmp_modcmd(modcmd_t cmd, void *arg)
+{
+
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+		return npf_alg_icmp_init();
+	case MODULE_CMD_FINI:
+		return npf_alg_icmp_fini();
+	default:
+		return ENOTTY;
+	}
+	return 0;
+}
+
+/*
+ * npfa_icmp_match: ALG matching inspector, determines ALG case and
+ * establishes a session for "backwards" stream.
+ */
+static bool
+npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
+{
+	const int proto = npc->npc_proto;
+	void *n_ptr = nbuf_dataptr(nbuf);
+
+	/* Handle TCP/UDP traceroute - check for port range. */
+	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+		return false;
+	}
+	KASSERT(npf_iscached(npc, NPC_PORTS));
+	in_port_t dport = ntohs(npc->npc_dport);
+	if (dport < TR_BASE_PORT || dport > TR_PORT_RANGE) {
+		return false;
+	}
+
+	/* Check for low TTL. */
+	const u_int offby = offsetof(struct ip, ip_ttl);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+		return false;
+	}
+	uint8_t ttl;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &ttl)) {
+		return false;
+	}
+	if (ttl > TR_MAX_TTL) {
+		return false;
+	}
+
+	/* Associate ALG with translation entry. */
+	npf_nat_t *nt = ntptr;
+	npf_nat_setalg(nt, alg_icmp, 0);
+	return true;
+}
+
+/*
+ * npf_icmp_uniqid: retrieve unique identifiers - either ICMP query ID
+ * or TCP/UDP ports of the original packet, which is embedded.
+ */
+static inline bool
+npf_icmp_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	u_int offby;
+
+	/* Per RFC 792. */
+	switch (type) {
+	case ICMP_UNREACH:
+	case ICMP_SOURCEQUENCH:
+	case ICMP_REDIRECT:
+	case ICMP_TIMXCEED:
+	case ICMP_PARAMPROB:
+		/* Should contain original IP header. */
+		offby = offsetof(struct icmp, icmp_ip);
+		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+			return false;
+		}
+		/* Fetch into the cache. */
+		if (!npf_ip4_proto(npc, nbuf, n_ptr)) {
+			return false;
+		}
+		const int proto = npc->npc_proto;
+		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+			return false;
+		}
+		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+			return false;
+		}
+		if (!npf_fetch_ports(npc, nbuf, n_ptr, proto)) {
+			return false;
+		}
+		return true;
+
+	case ICMP_ECHOREPLY:
+	case ICMP_ECHO:
+	case ICMP_TSTAMP:
+	case ICMP_TSTAMPREPLY:
+	case ICMP_IREQ:
+	case ICMP_IREQREPLY:
+		/* Should contain ICMP query ID. */
+		offby = offsetof(struct icmp, icmp_id);
+		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+			return false;
+		}
+		if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t),
+		    &npc->npc_icmp_id)) {
+			return false;
+		}
+		npc->npc_info |= NPC_ICMP_ID;
+		return true;
+	default:
+		break;
+	}
+	/* No unique IDs. */
+	return false;
+}
+
+/*
+ * npfa_icmp_session: ALG session inspector, determines unique identifiers.
+ */
+static bool
+npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
+{
+	npf_cache_t *key = keyptr;
+	void *n_ptr;
+
+	/* ICMP? Get unique identifiers from ICMP packet. */
+	if (npc->npc_proto != IPPROTO_ICMP) {
+		return false;
+	}
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ICMP));
+	key->npc_info = NPC_ICMP;
+
+	/* Advance to ICMP header. */
+	n_ptr = nbuf_dataptr(nbuf);
+#ifdef _NPF_TESTING
+	if (npc->npc_elen && /* XXX */
+	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
+		return false;
+#endif
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen)) == NULL) {
+		return false;
+	}
+
+	/* Fetch into the separate (key) cache. */
+	if (!npf_icmp_uniqid(npc->npc_icmp_type, key, nbuf, n_ptr)) {
+		return false;
+	}
+
+	if (npf_iscached(key, NPC_ICMP_ID)) {
+		/* Construct the key. */
+		key->npc_proto = npc->npc_proto;
+		key->npc_dir = npc->npc_dir;
+		/* Save IP addresses. */
+		key->npc_srcip = npc->npc_srcip;
+		key->npc_dstip = npc->npc_dstip;
+		key->npc_info |= NPC_IP46 | NPC_ADDRS | NPC_PORTS;
+		/* Fake ports with ICMP query IDs. */
+		key->npc_sport = key->npc_icmp_id;
+		key->npc_dport = key->npc_icmp_id;
+	} else {
+		in_addr_t addr;
+		in_port_t port;
+		/*
+		 * Embedded IP packet is the original of "forwards" stream.
+		 * We should imitate the "backwards" stream for inspection.
+		 */
+		KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
+		addr = key->npc_srcip;
+		port = key->npc_sport;
+		key->npc_srcip = key->npc_dstip;
+		key->npc_dstip = addr;
+		key->npc_sport = key->npc_dport;
+		key->npc_dport = port;
+	}
+	return true;
+}
+
+/*
+ * npfa_icmp_natin: ALG inbound translation inspector, rewrite IP address
+ * in the IP header, which is embedded in ICMP packet.
+ */
+static bool
+npfa_icmp_natin(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	npf_cache_t enpc;
+	u_int offby;
+	uint16_t cksum;
+
+	/* XXX: Duplicated work. */
+	if (!npfa_icmp_session(npc, nbuf, &enpc)) {
+		return false;
+	}
+	KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
+
+	/* Advance to ICMP checksum and fetch it. */
+	offby = npc->npc_hlen + offsetof(struct icmp, icmp_cksum);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+		return false;
+	}
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum)) {
+		return false;
+	}
+
+	/* Save the data for checksum update later. */
+	void *cnbuf = nbuf, *cnptr = n_ptr;
+	uint16_t ecksum = enpc.npc_ipsum;
+
+	/* Advance to the original IP header, which is embedded after ICMP. */
+	offby = offsetof(struct icmp, icmp_ip) -
+	    offsetof(struct icmp, icmp_cksum);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+		return false;
+	}
+
+	/*
+	 * Rewrite source IP address and port of the embedded IP header,
+	 * which represents original packet - therefore passing PFIL_OUT.
+	 */
+	npf_nat_t *nt = ntptr;
+	in_addr_t addr;
+	in_port_t port;
+
+	npf_nat_getlocal(nt, &addr, &port);
+
+	if (!npf_rwrip(&enpc, nbuf, n_ptr, PFIL_OUT, addr)) {
+		return false;
+	}
+	if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port, addr)) {
+		return false;
+	}
+
+	/*
+	 * Fixup and update ICMP checksum.
+	 * Note: npf_rwrip() has updated the IP checksum.
+	 */
+	cksum = npf_fixup32_cksum(cksum, enpc.npc_srcip, addr);
+	cksum = npf_fixup16_cksum(cksum, enpc.npc_sport, port);
+	cksum = npf_fixup16_cksum(cksum, ecksum, enpc.npc_ipsum);
+	/* FIXME: Updated UDP/TCP checksum joins-in too., when != 0, sigh. */
+	if (nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum)){
+		return false;
+	}
+	return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_ctl.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,485 @@
+/*	$NetBSD: npf_ctl.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF device control.
+ *
+ * Implementation of (re)loading, construction of tables and rules.
+ * NPF proplib(9) dictionary consumer.
+ *
+ * TODO:
+ * - Consider implementing 'sync' functionality.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#endif
+
+#include <prop/proplib.h>
+
+#include "npf_ncode.h"
+#include "npf_impl.h"
+
+/*
+ * npfctl_switch: enable or disable packet inspection.
+ */
+int
+npfctl_switch(void *data)
+{
+	const bool onoff = *(int *)data ? true : false;
+	int error;
+
+	if (onoff) {
+		/* Enable: add pfil hooks. */
+		error = npf_register_pfil();
+	} else {
+		/* Disable: remove pfil hooks. */
+		npf_unregister_pfil();
+		error = 0;
+	}
+	return error;
+}
+
+static int
+npf_mk_tables(npf_tableset_t *tblset, prop_array_t tables)
+{
+	prop_object_iterator_t it;
+	prop_dictionary_t tbldict;
+	prop_object_t obj;
+	int error = 0;
+
+	/* Tables - array. */
+	if (prop_object_type(tables) != PROP_TYPE_ARRAY)
+		return EINVAL;
+
+	it = prop_array_iterator(tables);
+	if (it == NULL)
+		return ENOMEM;
+
+	while ((tbldict = prop_object_iterator_next(it)) != NULL) {
+		prop_dictionary_t ent;
+		prop_object_iterator_t eit;
+		prop_array_t entries;
+		npf_table_t *t;
+		u_int tid;
+		int type;
+
+		/* Table - dictionary. */
+		if (prop_object_type(tbldict) != PROP_TYPE_DICTIONARY) {
+			error = EINVAL;
+			break;
+		}
+
+		/* Table ID and type. */
+		obj = prop_dictionary_get(tbldict, "id");
+		tid = (u_int)prop_number_integer_value(obj);
+		obj = prop_dictionary_get(tbldict, "type");
+		type = (int)prop_number_integer_value(obj);
+		/* Validate them. */
+		error = npf_table_check(tblset, tid, type);
+		if (error)
+			break;
+
+		/* Create and insert the table. */
+		t = npf_table_create(tid, type, 1024);	/* XXX */
+		if (t == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		error = npf_tableset_insert(tblset, t);
+		KASSERT(error == 0);
+
+		/* Entries. */
+		entries = prop_dictionary_get(tbldict, "entries");
+		if (prop_object_type(entries) != PROP_TYPE_ARRAY) {
+			error = EINVAL;
+			break;
+		}
+		eit = prop_array_iterator(entries);
+		if (eit == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		while ((ent = prop_object_iterator_next(eit)) != NULL) {
+			in_addr_t addr, mask;	/* XXX: IPv6 */
+
+			/* Address. */
+			obj = prop_dictionary_get(ent, "addr");
+			addr = (in_addr_t)prop_number_integer_value(obj);
+			/* Mask. */
+			obj = prop_dictionary_get(ent, "mask");
+			mask = (in_addr_t)prop_number_integer_value(obj);
+			/* Add a table entry. */
+			error = npf_table_add_v4cidr(tblset, tid, addr, mask);
+			if (error)
+				break;
+		}
+		prop_object_iterator_release(eit);
+		if (error)
+			break;
+	}
+	prop_object_iterator_release(it);
+	/*
+	 * Note: in a case of error, caller will free entire tableset.
+	 */
+	return error;
+}
+
+static void *
+npf_mk_ncode(const void *ncptr, size_t nc_size)
+{
+	int npf_err, errat;
+	void *nc;
+
+	/*
+	 * Allocate and copy n-code.
+	 *
+	 * XXX: Inefficient; consider extending proplib(9) to provide
+	 * interface for custom allocator and avoid copy.
+	 */
+	nc = npf_ncode_alloc(nc_size);
+	if (nc == NULL) {
+		return NULL;
+	}
+	memcpy(nc, ncptr, nc_size);
+	npf_err = npf_ncode_validate(nc, nc_size, &errat);
+	if (npf_err) {
+		npf_ncode_free(nc, nc_size);
+		/* TODO: return error details via proplib */
+		return NULL;
+	}
+	return nc;
+}
+
+static int
+npf_mk_singlerule(prop_dictionary_t rldict,
+    npf_ruleset_t *rlset, npf_rule_t **parent)
+{
+	npf_rule_t *rl;
+	prop_object_t obj;
+	int attr, ifidx;
+	pri_t pri;
+	size_t nc_size;
+	void *nc;
+
+	/* Rule - dictionary. */
+	if (prop_object_type(rldict) != PROP_TYPE_DICTIONARY)
+		return EINVAL;
+
+	/* Attributes (integer). */
+	obj = prop_dictionary_get(rldict, "attributes");
+	attr = prop_number_integer_value(obj);
+
+	/* Priority (integer). */
+	obj = prop_dictionary_get(rldict, "priority");
+	pri = prop_number_integer_value(obj);
+
+	/* Interface ID (integer). */
+	obj = prop_dictionary_get(rldict, "interface");
+	ifidx = prop_number_integer_value(obj);
+
+	/* N-code (binary data). */
+	obj = prop_dictionary_get(rldict, "ncode");
+	if (obj) {
+		const void *ncptr;
+
+		/* Perform n-code validation. */
+		nc_size = prop_data_size(obj);
+		ncptr = prop_data_data_nocopy(obj);
+		if (ncptr == NULL || nc_size > NPF_NCODE_LIMIT) {
+			return EINVAL;
+		}
+		nc = npf_mk_ncode(ncptr, nc_size);
+		if (nc == NULL) {
+			return EINVAL;
+		}
+	} else {
+		/* No n-code. */
+		nc = NULL;
+		nc_size = 0;
+	}
+
+	/* Allocate and setup NPF rule. */
+	rl = npf_rule_alloc(attr, pri, ifidx, nc, nc_size);
+	if (rl == NULL) {
+		if (nc) {
+			npf_ncode_free(nc, nc_size);	/* XXX */
+		}
+		return ENOMEM;
+	}
+	npf_ruleset_insert(rlset, rl);
+	if (parent) {
+		*parent = rl;
+	}
+	return 0;
+}
+
+static int
+npf_mk_rules(npf_ruleset_t *rlset, prop_array_t rules)
+{
+	prop_object_iterator_t it;
+	prop_dictionary_t rldict;
+	int error;
+
+	/* Ruleset - array. */
+	if (prop_object_type(rules) != PROP_TYPE_ARRAY)
+		return EINVAL;
+
+	it = prop_array_iterator(rules);
+	if (it == NULL)
+		return ENOMEM;
+
+	error = 0;
+	while ((rldict = prop_object_iterator_next(it)) != NULL) {
+		prop_object_iterator_t sit;
+		prop_array_t subrules;
+		prop_dictionary_t srldict;
+		npf_rule_t *myrl;
+
+		/* Generate a single rule. */
+		error = npf_mk_singlerule(rldict, rlset, &myrl);
+		if (error)
+			break;
+
+		/* Check for subrules. */
+		subrules = prop_dictionary_get(rldict, "subrules");
+		if (subrules == NULL) {
+			/* No subrules, next.. */
+			continue;
+		}
+		/* Generate subrules, if any. */
+		if (prop_object_type(subrules) != PROP_TYPE_ARRAY) {
+			error = EINVAL;
+			break;
+		}
+		sit = prop_array_iterator(subrules);
+		if (sit == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		while ((srldict = prop_object_iterator_next(sit)) != NULL) {
+			/* For subrule, pass ruleset pointer of parent. */
+			error = npf_mk_singlerule(srldict,
+			    npf_rule_subset(myrl), NULL);
+			if (error)
+				break;
+		}
+		prop_object_iterator_release(sit);
+		if (error)
+			break;
+	}
+	prop_object_iterator_release(it);
+	/*
+	 * Note: in a case of error, caller will free entire ruleset.
+	 */
+	return error;
+}
+
+static int
+npf_mk_natlist(npf_ruleset_t *nset, prop_array_t natlist)
+{
+	prop_object_iterator_t it;
+	prop_dictionary_t natdict;
+	int error;
+
+	/* NAT policies - array. */
+	if (prop_object_type(natlist) != PROP_TYPE_ARRAY)
+		return EINVAL;
+
+	it = prop_array_iterator(natlist);
+	if (it == NULL)
+		return ENOMEM;
+
+	error = 0;
+	while ((natdict = prop_object_iterator_next(it)) != NULL) {
+		prop_object_t obj;
+		npf_natpolicy_t *np;
+		npf_rule_t *rl;
+		in_addr_t gip;
+
+		/* NAT policy - dictionary. */
+		if (prop_object_type(natdict) != PROP_TYPE_DICTIONARY) {
+			error = EINVAL;
+			break;
+		}
+
+		/* Gateway IP. */
+		obj = prop_dictionary_get(natdict, "gateway_ip");
+		gip = (in_addr_t)prop_number_integer_value(obj);
+
+		/*
+		 * NAT policies are standard rules, plus additional
+		 * information for translation.  Make a rule.
+		 */
+		error = npf_mk_singlerule(natdict, nset, &rl);
+		if (error)
+			break;
+
+		/* Allocate a new NAT policy and assign to the rule. */
+		np = npf_nat_newpolicy(gip);
+		if (np == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		npf_rule_setnat(rl, np);
+	}
+	prop_object_iterator_release(it);
+	/*
+	 * Note: in a case of error, caller will free entire NAT ruleset
+	 * with assigned NAT policies.
+	 */
+	return error;
+}
+
+/*
+ * npfctl_reload: store passed data i.e. update settings, create passed
+ * tables, rules and atomically activate all them.
+ */
+int
+npfctl_reload(u_long cmd, void *data)
+{
+	const struct plistref *pref = data;
+	npf_tableset_t *tblset = NULL;
+	npf_ruleset_t *rlset = NULL;
+	npf_ruleset_t *nset = NULL;
+	prop_dictionary_t dict;
+	prop_array_t natlist, tables, rules;
+	prop_object_t ver;
+	int error;
+
+	/* Retrieve the dictionary. */
+#ifdef _KERNEL
+	error = prop_dictionary_copyin_ioctl(pref, cmd, &dict);
+	if (error)
+		return error;
+#else
+	dict = prop_dictionary_internalize_from_file(data);
+	if (dict == NULL)
+		return EINVAL;
+#endif
+	/* Version. */
+	ver = prop_dictionary_get(dict, "version");
+	if (ver == NULL || prop_number_integer_value(ver) != NPF_VERSION) {
+		error = EINVAL;
+		goto fail;
+	}
+
+	/* XXX: Hard way for now. */
+	(void)npf_session_tracking(false);
+
+	/* NAT policies. */
+	nset = npf_ruleset_create();
+	natlist = prop_dictionary_get(dict, "nat");
+	error = npf_mk_natlist(nset, natlist);
+	if (error)
+		goto fail;
+
+	/* Tables. */
+	tblset = npf_tableset_create();
+	tables = prop_dictionary_get(dict, "tables");
+	error = npf_mk_tables(tblset, tables);
+	if (error)
+		goto fail;
+
+	/* Rules. */
+	rlset = npf_ruleset_create();
+	rules = prop_dictionary_get(dict, "rules");
+	error = npf_mk_rules(rlset, rules);
+	if (error)
+		goto fail;
+
+	/* Flush and reload NAT policies. */
+	npf_nat_reload(nset);
+
+	/*
+	 * Finally, reload the ruleset.  It will also reload the tableset.
+	 * Operation will be performed as a single transaction.
+	 */
+	npf_ruleset_reload(rlset, tblset);
+
+	(void)npf_session_tracking(true);
+
+	/* Done.  Since data is consumed now, we shall not destroy it. */
+	tblset = NULL;
+	rlset = NULL;
+	nset = NULL;
+fail:
+	prop_object_release(dict);
+	/*
+	 * Note: destroy rulesets first, to drop references to the tableset.
+	 */
+	KASSERT(error == 0 || (nset || rlset || tblset));
+	if (nset) {
+		npf_ruleset_destroy(nset);
+	}
+	if (rlset) {
+		npf_ruleset_destroy(rlset);
+	}
+	if (tblset) {
+		npf_tableset_destroy(tblset);
+	}
+	return error;
+}
+
+/*
+ * npf_table_ctl: add, remove or query entries in the specified table.
+ *
+ * For maximum performance, interface is avoiding proplib(3)'s overhead.
+ */
+int
+npfctl_table(void *data)
+{
+	npf_ioctl_table_t *nct = data;
+	int error;
+
+	switch (nct->nct_action) {
+	case NPF_IOCTL_TBLENT_ADD:
+		error = npf_table_add_v4cidr(NULL, nct->nct_tid,
+		    nct->nct_addr, nct->nct_mask);
+		break;
+	case NPF_IOCTL_TBLENT_REM:
+		error = npf_table_rem_v4cidr(NULL, nct->nct_tid,
+		    nct->nct_addr, nct->nct_mask);
+		break;
+	default:
+		/* XXX */
+		error = npf_table_match_v4addr(nct->nct_tid, nct->nct_addr);
+		if (error) {
+			error = EINVAL;
+		}
+	}
+	return error;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_handler.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,206 @@
+/*	$NetBSD: npf_handler.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF packet handler.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#endif
+
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <net/if.h>
+#include <net/pfil.h>
+#include <sys/socketvar.h>
+
+#include "npf_impl.h"
+
+/*
+ * If npf_ph_if != NULL, pfil hooks are registers.  If NULL, not registered.
+ * Used to check the state.  Locked by: softnet_lock + KERNEL_LOCK (XXX).
+ */
+static struct pfil_head *	npf_ph_if = NULL;
+static struct pfil_head *	npf_ph_inet = NULL;
+
+int	npf_packet_handler(void *, struct mbuf **, struct ifnet *, int);
+
+/*
+ * npf_ifhook: hook handling interface changes.
+ */
+static int
+npf_ifhook(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
+{
+
+	return 0;
+}
+
+/*
+ * npf_packet_handler: main packet handling routine.
+ *
+ * Note: packet flow and inspection logic is in strict order.
+ */
+int
+npf_packet_handler(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
+{
+	const int layer = (const int)(long)arg;
+	nbuf_t *nbuf = *mp;
+	npf_cache_t npc;
+	npf_session_t *se;
+	npf_rule_t *rl;
+	int error;
+
+	/*
+	 * Initialise packet information cache.
+	 * Note: it is enough to clear the info bits.
+	 */
+	npc.npc_info = 0;
+
+	/* Inspect the list of sessions. */
+	se = npf_session_inspect(&npc, nbuf, ifp, di, layer);
+
+	/* Inbound NAT. */
+	if ((di & PFIL_IN) && (error = npf_natin(&npc, se, nbuf, layer)) != 0) {
+		goto out;
+	}
+
+	/* If session found - we pass this packet. */
+	if (se && npf_session_pass(se)) {
+		error = 0;
+	} else {
+		/* Inspect ruleset using this packet. */
+		rl = npf_ruleset_inspect(&npc, nbuf, ifp, di, layer);
+		if (rl != NULL) {
+			bool keepstate;
+			/* Apply the rule. */
+			error = npf_rule_apply(&npc, rl, &keepstate);
+			if (error) {
+				goto out;
+			}
+			/* Establish a session, if required. */
+			if (keepstate) {
+				se = npf_session_establish(&npc, NULL, di);
+			}
+		}
+		/* No rules or "default" rule - pass. */
+	}
+
+	/* Outbound NAT. */
+	if (di & PFIL_OUT) {
+		error = npf_natout(&npc, se, nbuf, ifp, layer);
+	}
+out:
+	/* Release reference on session. */
+	if (se != NULL) {
+		npf_session_release(se);
+	}
+
+	/*
+	 * If error is set - drop the packet.
+	 * Normally, ENETUNREACH is used to "block".
+	 */
+	if (error) {
+		m_freem(*mp);
+		*mp = NULL;
+	}
+	return error;
+}
+
+/*
+ * npf_register_pfil: register pfil(9) hooks.
+ */
+int
+npf_register_pfil(void)
+{
+	int error;
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
+	/* Check if pfil hooks are not already registered. */
+	if (npf_ph_if) {
+		error = EEXIST;
+		goto fail;
+	}
+
+	/* Capture point of any activity in interfaces and IP layer. */
+	npf_ph_if = pfil_head_get(PFIL_TYPE_IFNET, 0);
+	npf_ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (npf_ph_if == NULL || npf_ph_inet == NULL) {
+		npf_ph_if = NULL;
+		error = ENOENT;
+		goto fail;
+	}
+
+	/* Interface re-config or attach/detach hook. */
+	error = pfil_add_hook(npf_ifhook, NULL,
+	    PFIL_WAITOK | PFIL_IFADDR | PFIL_IFNET, npf_ph_if);
+	KASSERT(error == 0);
+
+	/* Packet IN/OUT handler on all interfaces and IP layer. */
+	error = pfil_add_hook(npf_packet_handler, (void *)NPF_LAYER_3,
+	    PFIL_WAITOK | PFIL_ALL, npf_ph_inet);
+	KASSERT(error == 0);
+
+fail:
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
+
+	return error;
+}
+
+/*
+ * npf_unregister: unregister pfil(9) hooks.
+ */
+void
+npf_unregister_pfil(void)
+{
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
+	if (npf_ph_if) {
+		(void)pfil_remove_hook(npf_packet_handler, (void *)NPF_LAYER_3,
+		    PFIL_ALL, npf_ph_inet);
+		(void)pfil_remove_hook(npf_ifhook, NULL,
+		    PFIL_IFADDR | PFIL_IFNET, npf_ph_if);
+
+		npf_ph_if = NULL;
+	}
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_impl.h	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,214 @@
+/*	$NetBSD: npf_impl.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Private NPF structures and interfaces.
+ * For internal use within NPF core only.
+ */
+
+#ifndef _NPF_IMPL_H_
+#define _NPF_IMPL_H_
+
+#include <sys/rb.h>
+#include <sys/hash.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/rwlock.h>
+
+#include "npf.h"
+#include "npf_ncode.h"
+
+#ifdef _NPF_TESTING
+#include "testing.h"
+#endif
+
+/*
+ * STRUCTURE DECLARATIONS.
+ *
+ * Note: ruleset interface declarations are public.
+ */
+
+struct npf_nat;
+struct npf_session;
+
+typedef struct npf_nat		npf_nat_t;
+typedef struct npf_alg		npf_alg_t;
+typedef struct npf_natpolicy	npf_natpolicy_t;
+typedef struct npf_session	npf_session_t;
+
+struct npf_tblent;
+struct npf_table;
+
+typedef struct npf_tblent	npf_tblent_t;
+typedef struct npf_table	npf_table_t;
+
+typedef npf_table_t *		npf_tableset_t;
+
+/*
+ * DEFINITIONS.
+ */
+
+typedef bool	(*npf_algfunc_t)(npf_cache_t *, void *, void *);
+
+#define	NPF_NCODE_LIMIT		1024
+#define	NPF_TABLE_SLOTS		32
+
+/*
+ * INTERFACES.
+ */
+
+/* NPF control. */
+int		npfctl_switch(void *);
+int		npfctl_reload(u_long, void *);
+int		npfctl_table(void *);
+
+/* Packet filter hooks. */
+int		npf_register_pfil(void);
+void		npf_unregister_pfil(void);
+
+/* Protocol helpers. */
+bool		npf_ip4_proto(npf_cache_t *, nbuf_t *, void *);
+bool		npf_fetch_ip4addrs(npf_cache_t *, nbuf_t *, void *);
+bool		npf_fetch_ports(npf_cache_t *, nbuf_t *, void *, const int);
+bool		npf_fetch_icmp(npf_cache_t *, nbuf_t *, void *);
+bool		npf_cache_all_ip4(npf_cache_t *, nbuf_t *, const int);
+
+bool		npf_rwrport(npf_cache_t *, nbuf_t *, void *, const int,
+		    in_port_t, in_addr_t);
+bool		npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int, in_addr_t);
+
+uint16_t	npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
+uint16_t	npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
+
+/* Complex instructions. */
+int		npf_match_ether(nbuf_t *, int, int, uint16_t, uint32_t *);
+int		npf_match_ip4table(npf_cache_t *, nbuf_t *, void *,
+		    const int, const u_int);
+int		npf_match_ip4mask(npf_cache_t *, nbuf_t *, void *,
+		    const int, in_addr_t, in_addr_t);
+int		npf_match_tcp_ports(npf_cache_t *, nbuf_t *, void *,
+		    const int, const uint32_t);
+int		npf_match_udp_ports(npf_cache_t *, nbuf_t *, void *,
+		    const int, const uint32_t);
+int		npf_match_icmp4(npf_cache_t *, nbuf_t *, void *,
+		    const int, const int);
+
+/* Tableset interface. */
+int		npf_tableset_sysinit(void);
+void		npf_tableset_sysfini(void);
+
+npf_tableset_t *npf_tableset_create(void);
+void		npf_tableset_destroy(npf_tableset_t *);
+int		npf_tableset_insert(npf_tableset_t *, npf_table_t *);
+npf_tableset_t *npf_tableset_reload(npf_tableset_t *);
+
+npf_table_t *	npf_table_create(u_int, int, size_t);
+void		npf_table_destroy(npf_table_t *);
+void		npf_table_ref(npf_table_t *);
+void		npf_table_unref(npf_table_t *);
+
+npf_table_t *	npf_table_get(npf_tableset_t *, u_int);
+void		npf_table_put(npf_table_t *);
+int		npf_table_check(npf_tableset_t *, u_int, int);
+int		npf_table_add_v4cidr(npf_tableset_t *, u_int,
+		    in_addr_t, in_addr_t);
+int		npf_table_rem_v4cidr(npf_tableset_t *, u_int,
+		    in_addr_t, in_addr_t);
+int		npf_table_match_v4addr(u_int, in_addr_t);
+
+/* Ruleset interface. */
+int		npf_ruleset_sysinit(void);
+void		npf_ruleset_sysfini(void);
+
+npf_ruleset_t *	npf_ruleset_create(void);
+void		npf_ruleset_destroy(npf_ruleset_t *);
+void		npf_ruleset_insert(npf_ruleset_t *, npf_rule_t *);
+void		npf_ruleset_reload(npf_ruleset_t *, npf_tableset_t *);
+
+npf_rule_t *	npf_ruleset_match(npf_ruleset_t *, npf_cache_t *, nbuf_t *,
+		    struct ifnet *, const int, const int);
+npf_rule_t *	npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
+		    struct ifnet *, const int, const int);
+int		npf_rule_apply(const npf_cache_t *, npf_rule_t *, bool *);
+npf_ruleset_t *	npf_rule_subset(npf_rule_t *);
+
+npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
+void		npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *);
+
+/* State handling interface. */
+int		npf_session_sysinit(void);
+void		npf_session_sysfini(void);
+int		npf_session_tracking(bool);
+
+npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *,
+		    struct ifnet *, const int, const int);
+npf_session_t *	npf_session_establish(const npf_cache_t *,
+		    npf_nat_t *, const int);
+void		npf_session_release(npf_session_t *);
+bool		npf_session_pass(const npf_session_t *);
+
+npf_nat_t *	npf_session_retnat(const npf_session_t *);
+
+void		npf_session_link(npf_session_t *, npf_session_t *);
+npf_nat_t *	npf_session_retlinknat(const npf_session_t *);
+
+/* NAT. */
+void		npf_nat_sysinit(void);
+void		npf_nat_sysfini(void);
+npf_natpolicy_t *npf_nat_newpolicy(in_addr_t);
+void		npf_nat_freepolicy(npf_natpolicy_t *);
+void		npf_nat_flush(void);
+void		npf_nat_reload(npf_ruleset_t *);
+
+int		npf_natout(npf_cache_t *, npf_session_t *, nbuf_t *,
+		    struct ifnet *, const int);
+int		npf_natin(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
+
+void		npf_nat_expire(npf_nat_t *);
+void		npf_nat_getlocal(npf_nat_t *, in_addr_t *, in_port_t *);
+void		npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
+
+/* ALG interface. */
+void		npf_alg_sysinit(void);
+void		npf_alg_sysfini(void);
+npf_alg_t *	npf_alg_register(npf_algfunc_t, npf_algfunc_t,
+		    npf_algfunc_t, npf_algfunc_t);
+int		npf_alg_unregister(npf_alg_t *);
+void		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
+void		npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, const int );
+bool		npf_alg_sessionid(npf_cache_t *, nbuf_t *, npf_cache_t *);
+
+/* Debugging routines. */
+void		npf_rulenc_dump(npf_rule_t *);
+void		npf_sessions_dump(void);
+void		npf_nat_dump(npf_nat_t *);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_inet.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,410 @@
+/*	$NetBSD: npf_inet.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Various procotol related helper routines.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+
+#include <net/if.h>
+#include <net/ethertypes.h>
+#include <net/if_ether.h>
+#endif
+#include <net/pfil.h>
+
+#include "npf_impl.h"
+
+/*
+ * npf_fixup{16,32}_cksum: update IPv4 checksum.
+ */
+
+uint16_t
+npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum)
+{
+	uint32_t sum;
+
+	/*
+	 * RFC 1624:
+	 *	HC' = ~(~HC + ~m + m')
+	 */
+	sum = ~ntohs(cksum) & 0xffff;
+	sum += (~ntohs(odatum) & 0xffff) + ntohs(ndatum);
+	sum = (sum >> 16) + (sum & 0xffff);
+	sum += (sum >> 16);
+
+	return htons(~sum & 0xffff);
+}
+
+uint16_t
+npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum)
+{
+
+	cksum = npf_fixup16_cksum(cksum, odatum & 0xffff, ndatum & 0xffff);
+	cksum = npf_fixup16_cksum(cksum, odatum >> 16, ndatum >> 16);
+	return cksum;
+}
+
+/*
+ * npf_ip4_proto: check IPv4 header length and match protocol number.
+ *
+ * => Returns pointer to protocol header or NULL on failure.
+ * => Stores protocol number in the cache.
+ * => Updates nbuf pointer to header's nbuf.
+ */
+bool
+npf_ip4_proto(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	u_int hlen, offby;
+	uint8_t val8;
+	int error;
+
+	/* IPv4 header: check IP version and header length. */
+	error = nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &val8);
+	if (error || (val8 >> 4) != IPVERSION)
+		return false;
+	hlen = (val8 & 0xf) << 2;
+	if (hlen < sizeof(struct ip))
+		return false;
+	offby = offsetof(struct ip, ip_off);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+
+	/* IPv4 header: check fragment offset. */
+	error = nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &val8);
+	if (error || (val8 & ~htons(IP_DF | IP_RF)))
+		return false;
+
+	/* Get and match protocol. */
+	KASSERT(offsetof(struct ip, ip_p) > offby);
+	offby = offsetof(struct ip, ip_p) - offby;
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &val8))
+		return false;
+
+	/* IP checksum. */
+	offby = offsetof(struct ip, ip_sum) - offsetof(struct ip, ip_p);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t), &npc->npc_ipsum))
+		return false;
+
+	/* Cache: IPv4, protocol, header length. */
+	npc->npc_info |= NPC_IP46;
+	npc->npc_proto = val8;
+	npc->npc_hlen = hlen;
+	return true;
+}
+
+/*
+ * npf_fetch_ip4addrs: fetch source and destination address from IPv4 header.
+ *
+ * => Stores both source and destination addresses into the cache.
+ */
+bool
+npf_fetch_ip4addrs(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	u_int offby;
+
+	/* Source address. */
+	offby = offsetof(struct ip, ip_src);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_addr_t), &npc->npc_srcip))
+		return false;
+
+	/* Destination address. */
+	offby = offsetof(struct ip, ip_dst) - offby;
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_addr_t), &npc->npc_dstip))
+		return false;
+
+	/* Both addresses are cached. */
+	npc->npc_info |= NPC_ADDRS;
+	return true;
+}
+
+/*
+ * npf_fetch_ports: fetch ports from either TCP or UDP header.
+ *
+ * => Stores both source and destination ports into the cache.
+ */
+bool
+npf_fetch_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int proto)
+{
+	u_int dst_off;
+
+	/* Perform checks, advance to TCP/UDP header. */
+	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
+		return false;
+	n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
+	if (n_ptr == NULL || npc->npc_proto != proto)
+		return false;
+
+	/*
+	 * TCP/UDP header: fetch source and destination ports.  For both
+	 * protocols offset of the source port offset is 0.
+	 */
+	CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
+	CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
+	if (proto == IPPROTO_TCP) {
+		dst_off = offsetof(struct tcphdr, th_dport);
+	} else {
+		KASSERT(proto == IPPROTO_UDP);
+		dst_off = offsetof(struct udphdr, uh_dport);
+	}
+
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_sport))
+		return false;
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, dst_off)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_dport))
+		return false;
+
+	/* Both ports are cached. */
+	npc->npc_info |= NPC_PORTS;
+	return true;
+}
+
+/*
+ * npf_fetch_icmp: fetch ICMP code, type and possible query ID.
+ *
+ * => Stores both all fetched items into the cache.
+ */
+bool
+npf_fetch_icmp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	u_int offby;
+	uint8_t type;
+
+	KASSERT(npf_iscached(npc, NPC_IP46));
+
+	/* ICMP type. */
+	offby = npc->npc_hlen;
+	CTASSERT(offsetof(struct icmp, icmp_type) == 0);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &type))
+		return false;
+
+	/* ICMP code. */
+	offby = offsetof(struct icmp, icmp_code);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &npc->npc_icmp_code))
+		return false;
+
+	/* Mark as cached. */
+	npc->npc_icmp_type = type;
+	npc->npc_info |= NPC_ICMP;
+	return true;
+}
+
+static inline bool
+npf_fetch_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
+{
+	u_int offby;
+
+	/* Get TCP flags. */
+	offby = npc->npc_hlen + offsetof(struct tcphdr, th_flags);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &npc->npc_tcp_flags))
+		return false;
+	return true;
+}
+
+/*
+ * npf_cache_all_ip4: general routine to cache all relevant IPv4 and
+ * TCP, UDP or ICMP data.
+ */
+bool
+npf_cache_all_ip4(npf_cache_t *npc, nbuf_t *nbuf, const int layer)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	u_int offby;
+
+	if (layer == NPF_LAYER_2) {
+		/* Ethernet: match if ETHERTYPE_IP and if so - advance. */
+		if (npf_match_ether(nbuf, 1, 0, ETHERTYPE_IP, &offby))
+			return false;
+		if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+			return false;
+		/* Cache Ethernet header length. XXX */
+		npc->npc_elen = offby;
+	}
+
+	/* IPv4: get protocol, source and destination addresses. */
+	if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
+		return false;
+	}
+	if (!npf_iscached(npc, NPC_ADDRS) &&
+	    !npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+		return false;
+	}
+	switch (npc->npc_proto) {
+	case IPPROTO_TCP:
+		/* TCP flags. */
+		if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
+			return false;
+		}
+		/* FALLTHROUGH */
+
+	case IPPROTO_UDP:
+		/* Fetch TCP/UDP ports. */
+		return npf_fetch_ports(npc, nbuf, n_ptr, npc->npc_proto);
+
+	case IPPROTO_ICMP:
+		/* Fetch ICMP data. */
+		return npf_fetch_icmp(npc, nbuf, n_ptr);
+	}
+	return false;
+}
+
+/*
+ * npf_rwrport: rewrite required TCP/UDP port and update checksum.
+ */
+bool
+npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
+    in_port_t port, in_addr_t naddr)
+{
+	const int proto = npc->npc_proto;
+	u_int offby, toff;
+	in_addr_t oaddr;
+	in_port_t oport;
+	uint16_t cksum;
+
+	KASSERT(npf_iscached(npc, NPC_PORTS));
+	KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
+
+	offby = npc->npc_hlen;
+
+	if (di == PFIL_OUT) {
+		/* Offset to the source port is zero. */
+		CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
+		CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
+		if (proto == IPPROTO_TCP) {
+			toff = offsetof(struct tcphdr, th_sum);
+		} else {
+			toff = offsetof(struct udphdr, uh_sum);
+		}
+		oaddr = npc->npc_srcip;
+		oport = npc->npc_sport;
+	} else {
+		/* Calculate offset to destination port and checksum. */
+		u_int poff;
+		if (proto == IPPROTO_TCP) {
+			poff = offsetof(struct tcphdr, th_dport);
+			toff = offsetof(struct tcphdr, th_sum) - poff;
+		} else {
+			poff = offsetof(struct udphdr, uh_dport);
+			toff = offsetof(struct udphdr, uh_sum) - poff;
+		}
+		oaddr = npc->npc_dstip;
+		oport = npc->npc_dport;
+		offby += poff;
+	}
+
+	/* Advance and rewrite port. */
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_port_t), &port))
+		return false;
+
+	/* Advance and update TCP/UDP checksum. */
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, toff)) == NULL)
+		return false;
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
+		return false;
+	if (__predict_true(cksum || proto == IPPROTO_TCP)) {
+		cksum = npf_fixup32_cksum(cksum, oaddr, naddr);
+		cksum = npf_fixup16_cksum(cksum, oport, port);
+		if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
+			return false;
+	}
+	return true;
+}
+
+/*
+ * npf_rwrip: rewrite required IP address and update checksum.
+ */
+bool
+npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
+    in_addr_t addr)
+{
+	u_int offby;
+	in_addr_t oaddr;
+
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+
+	/* Advance to the checksum in IP header and fetch it. */
+	offby = offsetof(struct ip, ip_sum);
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+
+	if (di == PFIL_OUT) {
+		/* Rewrite source address, if outgoing. */
+		offby = offsetof(struct ip, ip_src) - offby;
+		oaddr = npc->npc_srcip;
+	} else {
+		/* Rewrite destination, if incoming. */
+		offby = offsetof(struct ip, ip_dst) - offby;
+		oaddr = npc->npc_dstip;
+	}
+
+	/* Write new IP checksum (it is acceptable to do this earlier). */
+	uint16_t cksum = npf_fixup32_cksum(npc->npc_ipsum, oaddr, addr);
+	if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
+		return false;
+
+	/* Advance to address and rewrite it. */
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
+		return false;
+	if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_addr_t), &addr))
+		return false;
+
+	npc->npc_ipsum = cksum;
+	return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_instr.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,209 @@
+/*	$NetBSD: npf_instr.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF complex instructions.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/ethertypes.h>
+#include <net/if_ether.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#endif
+
+#include "npf_impl.h"
+
+#define	NPF_PORTRANGE_MATCH(r, p)	(p >= (r >> 16) && p <= (r & 0xffff))
+
+/*
+ * npf_match_ether: find and check Ethernet and possible VLAN headers.
+ *
+ * => Stores value in to advance to layer 3 header (usually, IPv4).
+ * => Returns zero on success or -1 on failure.
+ */
+int
+npf_match_ether(nbuf_t *nbuf, int sd, int _res, uint16_t ethertype, uint32_t *r)
+{
+	void *n_ptr = nbuf_dataptr(nbuf);
+	u_int offby;
+	uint16_t val16;
+	bool vlan;
+
+	vlan = false;
+	*r = 0;
+
+	/* Ethernet header: check EtherType. */
+	offby = offsetof(struct ether_header, ether_type);
+again:
+	if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
+		return -1;
+	}
+	if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint16_t), &val16)) {
+		return -1;
+	}
+	val16 = ntohs(val16);
+	*r += offby;
+
+	/* Handle VLAN tags. */
+	if (val16 == ETHERTYPE_VLAN && !vlan) {
+		offby = sizeof(uint32_t);
+		vlan = true;
+		goto again;
+	}
+	if (val16 != ETHERTYPE_IP) {
+		return -1;
+	}
+
+	*r += ETHER_TYPE_LEN;
+	return 0;
+}
+
+/*
+ * npf_match_ip4table: match IPv4 address against NPF table.
+ */
+int
+npf_match_ip4table(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
+    const int sd, const u_int tid)
+{
+	in_addr_t ip4addr;
+
+	if (!npf_iscached(npc, NPC_ADDRS)) {
+		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_ADDRS));
+	}
+	ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
+
+	/* Match address against NPF table. */
+	return npf_table_match_v4addr(tid, ip4addr);
+}
+
+/*
+ * npf_match_ip4mask: match IPv4 address against netaddr/subnet.
+ */
+int
+npf_match_ip4mask(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
+    const int sd, in_addr_t netaddr, in_addr_t subnet)
+{
+	in_addr_t ip4addr;
+
+	if (!npf_iscached(npc, NPC_ADDRS)) {
+		if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_ADDRS));
+	}
+	ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
+
+	return (ip4addr & subnet) == netaddr ? 0 : -1;
+}
+
+/*
+ * npf_match_tcp_ports: match TCP port in header against the range.
+ */
+int
+npf_match_tcp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
+    const int sd, const uint32_t prange)
+{
+	in_port_t p;
+
+	if (!npf_iscached(npc, NPC_PORTS)) {
+		if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_TCP)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+	}
+	p = sd ? npc->npc_sport : npc->npc_dport;
+
+	/* Match against the port range. */
+	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
+}
+
+/*
+ * npf_match_udp_ports: match UDP port in header against the range.
+ */
+int
+npf_match_udp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
+    const int sd, const uint32_t prange)
+{
+	in_port_t p;
+
+	if (!npf_iscached(npc, NPC_PORTS)) {
+		if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_UDP)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+	}
+	p = sd ? npc->npc_sport : npc->npc_dport;
+
+	/* Match against the port range. */
+	return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
+}
+
+/*
+ * npf_match_icmp4: match ICMPv4 packet.
+ */
+int
+npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
+    const int type, const int code)
+{
+
+	if (!npf_iscached(npc, NPC_ICMP)) {
+		/* Perform checks, advance to ICMP header. */
+		if (!npf_iscached(npc, NPC_IP46) &&
+		    !npf_ip4_proto(npc, nbuf, n_ptr)) {
+			return -1;
+		}
+		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
+		if (n_ptr == NULL || npc->npc_proto != IPPROTO_ICMP) {
+			return -1;
+		}
+		if (!npf_fetch_icmp(npc, nbuf, n_ptr)) {
+			return -1;
+		}
+		KASSERT(npf_iscached(npc, NPC_ICMP));
+	}
+	/* Match, if required. */
+	if (type != ~0 && type != npc->npc_icmp_type)
+		return -1;
+	if (code != ~0 && code != npc->npc_icmp_code)
+		return -1;
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_mbuf.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,227 @@
+/*	$NetBSD: npf_mbuf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF network buffer management interface.
+ *
+ * Network buffer in NetBSD is mbuf.  Internal mbuf structures are
+ * abstracted within this source.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+#endif
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include "npf_impl.h"
+
+/*
+ * nbuf_dataptr: return a pointer to data in nbuf.
+ */
+void *
+nbuf_dataptr(nbuf_t *nbuf)
+{
+	const struct mbuf *m = nbuf;
+
+	return mtod(m, void *);
+}
+
+/*
+ * nbuf_advance: advance in mbuf or chain by specified amount of bytes.
+ *
+ * => Returns new pointer to data in mbuf and NULL if offset gets invalid.
+ * => Sets nbuf to current (after advance) mbuf in the chain.
+ */
+void *
+nbuf_advance(nbuf_t **nbuf, void *n_ptr, u_int n)
+{
+	struct mbuf *m = *nbuf;
+	u_int off, wmark;
+	uint8_t *d;
+
+	/* Offset with amount to advance. */
+	off = (uintptr_t)n_ptr - mtod(m, uintptr_t) + n;
+	wmark = m->m_len;
+
+	/* Find the mbuf according to offset. */
+	while (__predict_false(wmark <= off)) {
+		m = m->m_next;
+		if (__predict_false(m == NULL)) {
+			/*
+			 * If out of chain, then offset is
+			 * higher than packet length.
+			 */
+			return NULL;
+		}
+		wmark += m->m_len;
+	}
+
+	/* Offset in mbuf data. */
+	d = mtod(m, uint8_t *);
+	KASSERT(off >= (wmark - m->m_len));
+	d += (off - (wmark - m->m_len));
+
+	*nbuf = (void *)m;
+	return d;
+}
+
+/*
+ * nbuf_rw_datum: read or write a datum of specified length at current
+ * offset in the nbuf chain and copy datum into passed buffer.
+ *
+ * => Datum is allowed to overlap between two or more mbufs.
+ * => Note: all data in nbuf is in network byte order.
+ * => Returns 0 on success, error code on failure.
+ *
+ * Note: this function must be static inline with constant operation
+ * parameter - we expect constant propagation.
+ */
+
+#define	NBUF_DATA_READ		0
+#define	NBUF_DATA_WRITE		1
+
+static inline int
+nbuf_rw_datum(const int wr, nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
+{
+	uint8_t *d = n_ptr, *b = buf;
+	struct mbuf *m = nbuf;
+	u_int off, wmark, end;
+
+	/* Current offset in mbuf. */
+	off = (uintptr_t)n_ptr - mtod(m, uintptr_t);
+	KASSERT(off < m->m_len);
+	wmark = m->m_len;
+
+	/* Is datum overlapping? */
+	end = off + len;
+	while (__predict_false(end > wmark)) {
+		u_int l;
+
+		/* Get the part of current mbuf. */
+		l = m->m_len - off;
+		KASSERT(l < len);
+		len -= l;
+		if (wr) {
+			while (l--)
+				*d++ = *b++;
+		} else {
+			while (l--)
+				*b++ = *d++;
+		}
+		KASSERT(len > 0);
+
+		/* Take next mbuf and continue. */
+		m = m->m_next;
+		if (__predict_false(m == NULL)) {
+			/*
+			 * If out of chain, then offset with datum
+			 * length exceed the packet length.
+			 */
+			return EINVAL;
+		}
+		wmark += m->m_len;
+		d = mtod(m, uint8_t *);
+		off = 0;
+	}
+	KASSERT(n_ptr == d || mtod(m, uint8_t *) == d);
+	KASSERT(len <= m->m_len);
+
+	/* Non-overlapping case: fetch the actual data. */
+	if (wr) {
+		while (len--)
+			*d++ = *b++;
+	} else {
+		while (len--)
+			*b++ = *d++;
+	}
+	return 0;
+}
+
+/*
+ * nbuf_{fetch|store}_datum: read/write absraction calls on nbuf_rw_datum().
+ */
+int
+nbuf_fetch_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
+{
+
+	return nbuf_rw_datum(NBUF_DATA_READ, nbuf, n_ptr, len, buf);
+}
+
+int
+nbuf_store_datum(nbuf_t *nbuf, void *n_ptr, size_t len, void *buf)
+{
+
+	return nbuf_rw_datum(NBUF_DATA_WRITE, nbuf, n_ptr, len, buf);
+}
+
+/*
+ * nbuf_add_tag: add a tag to specified network buffer.
+ *
+ * => Returns 0 on success, or errno on failure.
+ */
+int
+nbuf_add_tag(nbuf_t *nbuf, uint32_t key, uint32_t val)
+{
+	struct mbuf *m = nbuf;
+	struct m_tag *mt;
+	uint32_t *dat;
+
+	mt = m_tag_get(PACKET_TAG_NPF, sizeof(uint32_t), M_NOWAIT);
+	if (__predict_false(mt == NULL)) {
+		return ENOMEM;
+	}
+	dat = (uint32_t *)(mt + 1);
+	*dat = val;
+	m_tag_prepend(m, mt);
+	return 0;
+}
+
+/*
+ * nbuf_find_tag: find a tag in specified network buffer.
+ *
+ * => Returns 0 on success, or errno on failure.
+ */
+int
+nbuf_find_tag(nbuf_t *nbuf, uint32_t key, void **data)
+{
+	struct mbuf *m = nbuf;
+	struct m_tag *mt;
+
+	mt = m_tag_find(m, PACKET_TAG_NPF, NULL);
+	if (__predict_false(mt == NULL)) {
+		return EINVAL;
+	}
+	*data = (void *)(mt + 1);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_nat.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,550 @@
+/*	$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF network address port translation (NAPT).
+ * Described in RFC 2663, RFC 3022.  Commonly just "NAT".
+ *
+ * Overview
+ *
+ *	There are few mechanisms: NAT policy, port map and translation.
+ *	NAT module has a separate ruleset, where rules contain associated
+ *	NAT policy, thus flexible filter criteria can be used.
+ *
+ * NAT policies and port maps
+ *
+ *	NAT policy is applied when a packet matches the rule.  Apart from
+ *	filter criteria, NAT policy has a translation (gateway) IP address
+ *	and associated port map.  Port map is a bitmap used to reserve and
+ *	use unique TCP/UDP ports for translation.  Port maps are unique to
+ *	the IP addresses, therefore multiple NAT policies with the same IP
+ *	will share the same port map.
+ *
+ * NAT sessions and translation entries
+ *
+ *	NAT module relies on session management module.  Each "NAT" session
+ *	has an associated translation entry (npf_nat_t).  It contains local
+ *	i.e. original IP address with port and translation port, allocated
+ *	from the port map.  Each NAT translation entry is associated with
+ *	the policy, which contains translation IP address.  Allocated port
+ *	is returned to the port map and translation entry destroyed when
+ *	"NAT" session expires.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#endif
+
+#include <sys/atomic.h>
+#include <sys/bitops.h>
+#include <sys/kmem.h>
+#include <sys/pool.h>
+#include <net/pfil.h>
+#include <netinet/in.h>
+
+#include "npf_impl.h"
+
+/*
+ * NPF portmap structure.
+ */
+typedef struct {
+	u_int				p_refcnt;
+	uint32_t			p_bitmap[0];
+} npf_portmap_t;
+
+/* Portmap range: [ 1024 .. 65535 ] */
+#define	PORTMAP_FIRST			(1024)
+#define	PORTMAP_SIZE			((65536 - PORTMAP_FIRST) / 32)
+#define	PORTMAP_FILLED			((uint32_t)~0)
+#define	PORTMAP_MASK			(31)
+#define	PORTMAP_SHIFT			(5)
+
+/* NAT policy structure. */
+struct npf_natpolicy {
+	LIST_ENTRY(npf_natpolicy)	n_entry;
+	in_addr_t			n_gw_ip;
+	npf_portmap_t *			n_portmap;
+};
+
+/* NAT translation entry for a session. */ 
+struct npf_nat {
+	npf_natpolicy_t *		nt_natpolicy;
+	/* Local address and port (for backwards translation). */
+	in_addr_t			nt_laddr;
+	in_port_t			nt_lport;
+	/* Translation port (for forwards). */
+	in_port_t			nt_tport;
+	/* ALG (if any) associated with this NAT entry. */
+	npf_alg_t *			nt_alg;
+	uintptr_t			nt_alg_arg;
+};
+
+static npf_ruleset_t *			nat_ruleset;
+static LIST_HEAD(, npf_natpolicy)	nat_policy_list;
+static pool_cache_t			nat_cache;
+
+/*
+ * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
+ */
+
+void
+npf_nat_sysinit(void)
+{
+
+	nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
+	    0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
+	KASSERT(nat_cache != NULL);
+	nat_ruleset = npf_ruleset_create();
+	LIST_INIT(&nat_policy_list);
+}
+
+void
+npf_nat_sysfini(void)
+{
+
+	/* Flush NAT policies. */
+	npf_nat_reload(NULL);
+	KASSERT(LIST_EMPTY(&nat_policy_list));
+	pool_cache_destroy(nat_cache);
+}
+
+/*
+ * npf_nat_newpolicy: allocate a new NAT policy.
+ *
+ * => Shares portmap if policy is on existing translation address.
+ * => XXX: serialise at upper layer.
+ */
+npf_natpolicy_t *
+npf_nat_newpolicy(in_addr_t gip)
+{
+	npf_natpolicy_t *np, *it;
+	npf_portmap_t *pm;
+
+	np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
+	if (np == NULL) {
+		return NULL;
+	}
+	np->n_gw_ip = gip;
+
+	/* Search for a NAT policy using the same translation address. */
+	pm = NULL;
+	LIST_FOREACH(it, &nat_policy_list, n_entry) {
+		if (it->n_gw_ip != np->n_gw_ip)
+			continue;
+		pm = it->n_portmap;
+		break;
+	}
+	if (pm == NULL) {
+		/* Allocate a new port map for the NAT policy. */
+		pm = kmem_zalloc(sizeof(npf_portmap_t) +
+		    (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP);
+		if (pm == NULL) {
+			kmem_free(np, sizeof(npf_natpolicy_t));
+			return NULL;
+		}
+		pm->p_refcnt = 1;
+		KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
+	} else {
+		/* Share the port map. */
+		pm->p_refcnt++;
+	}
+	np->n_portmap = pm;
+	/*
+	 * Note: old policies with new might co-exist in the list,
+	 * while reload is in progress, but that is not an issue.
+	 */
+	LIST_INSERT_HEAD(&nat_policy_list, np, n_entry);
+	return np;
+}
+
+/*
+ * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
+ *
+ * => Called from npf_rule_free() during the reload via npf_nat_reload().
+ */
+void
+npf_nat_freepolicy(npf_natpolicy_t *np)
+{
+	npf_portmap_t *pm = np->n_portmap;
+
+	LIST_REMOVE(np, n_entry);
+	if (--pm->p_refcnt == 0) {
+		kmem_free(pm, sizeof(npf_portmap_t) +
+		    (PORTMAP_SIZE * sizeof(uint32_t)));
+	}
+	kmem_free(np, sizeof(npf_natpolicy_t));
+}
+
+/*
+ * npf_nat_reload: activate new ruleset of NAT policies and destroy old.
+ *
+ * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy.
+ */
+void
+npf_nat_reload(npf_ruleset_t *nset)
+{
+	npf_ruleset_t *oldnset;
+
+	oldnset = atomic_swap_ptr(&nat_ruleset, nset);
+	if (oldnset) {
+		npf_ruleset_destroy(oldnset);
+	}
+}
+
+/*
+ * npf_nat_getport: allocate and return a port in the NAT policy portmap.
+ *
+ * => Returns in network byte-order.
+ * => Zero indicates failure.
+ */
+static in_port_t
+npf_nat_getport(npf_natpolicy_t *np)
+{
+	npf_portmap_t *pm = np->n_portmap;
+	u_int n = PORTMAP_SIZE, idx, bit;
+	uint32_t map, nmap;
+
+	idx = arc4random() % PORTMAP_SIZE;
+	for (;;) {
+		KASSERT(idx < PORTMAP_SIZE);
+		map = pm->p_bitmap[idx];
+		if (__predict_false(map == PORTMAP_FILLED)) {
+			if (n-- == 0) {
+				/* No space. */
+				return 0;
+			}
+			/* This bitmap is sfilled, next. */
+			idx = (idx ? idx : PORTMAP_SIZE) - 1;
+			continue;
+		}
+		bit = ffs32(~map) - 1;
+		nmap = map | (1 << bit);
+		if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
+			/* Success. */
+			break;
+		}
+	}
+	return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
+}
+
+/*
+ * npf_nat_putport: return port as available in the NAT policy portmap.
+ *
+ * => Port should be in network byte-order.
+ */
+static void
+npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
+{
+	npf_portmap_t *pm = np->n_portmap;
+	uint32_t map, nmap;
+	u_int idx, bit;
+
+	port = ntohs(port) - PORTMAP_FIRST;
+	idx = port >> PORTMAP_SHIFT;
+	bit = port & PORTMAP_MASK;
+	do {
+		map = pm->p_bitmap[idx];
+		KASSERT(map | (1 << bit));
+		nmap = map & ~(1 << bit);
+	} while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
+}
+
+/*
+ * npf_natout:
+ *	- Inspect packet for a NAT policy, unless session with NAT
+ *	  association already exists.
+ *	- Perform "forwards" translation: rewrite source address, etc.
+ *	- Establish sessions or if already exists, associate NAT policy.
+ */
+int
+npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
+    struct ifnet *ifp, const int layer)
+{
+	const int proto = npc->npc_proto;
+	void *n_ptr = nbuf_dataptr(nbuf);
+	npf_session_t *nse = NULL; /* XXXgcc */
+	npf_natpolicy_t *np;
+	npf_nat_t *nt;
+	npf_rule_t *rl;
+	in_addr_t gwip;
+	in_port_t tport;
+	int error;
+	bool new;
+
+	/* All relevant IPv4 data should be already cached. */
+	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
+		return 0;
+	}
+
+	/* Detect if there is a linked session pointing to the NAT entry. */
+	nt = se ? npf_session_retlinknat(se) : NULL;
+	if (nt) {
+		np = nt->nt_natpolicy;
+		new = false;
+		goto skip;
+	}
+
+	/* Inspect packet against NAT ruleset, return a policy. */
+	rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer);
+	np = rl ? npf_rule_getnat(rl) : NULL;
+	if (np == NULL) {
+		/* If packet does not match - done. */
+		return 0;
+	}
+
+	/* New NAT association. */
+	nt = pool_cache_get(nat_cache, PR_NOWAIT);
+	if (nt == NULL){
+		return ENOMEM;
+	}
+	nt->nt_natpolicy = np;
+	nt->nt_alg = NULL;
+	new = true;
+
+	/* Save local (source) address. */
+	nt->nt_laddr = npc->npc_srcip;
+
+	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
+		/* Also, save local TCP/UDP port. */
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+		nt->nt_lport = npc->npc_sport;
+		/* Get a new port for translation. */
+		nt->nt_tport = npf_nat_getport(np);
+	} else {
+		nt->nt_lport = 0;
+		nt->nt_tport = 0;
+	}
+
+	/* Match any ALGs. */
+	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
+
+	/* If there is no local session, establish one before translation. */
+	if (se == NULL) {
+		nse = npf_session_establish(npc, NULL, PFIL_OUT);
+		if (nse == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		se = nse;
+	} else {
+		nse = NULL;
+	}
+skip:
+	if (layer == NPF_LAYER_2 && /* XXX */
+	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
+		return EINVAL;
+
+	/* Execute ALG hooks first. */
+	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
+
+	gwip = np->n_gw_ip;
+	tport = nt->nt_tport;
+
+	/*
+	 * Perform translation: rewrite source address et al.
+	 * Note: cache may be used in npf_rwrport(), update only in the end.
+	 */
+	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) {
+		error = EINVAL;
+		goto out;
+	}
+	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
+		KASSERT(tport != 0);
+		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) {
+			error = EINVAL;
+			goto out;
+		}
+	}
+	/* Success: cache new address and port (if any). */
+	npc->npc_srcip = gwip;
+	npc->npc_sport = tport;
+	error = 0;
+
+	if (__predict_false(new)) {
+		npf_session_t *natse;
+		/*
+		 * Establish a new NAT session using translated address and
+		 * associate NAT translation data with this session.
+		 *
+		 * Note: packet now has a translated address in the cache.
+		 */
+		natse = npf_session_establish(npc, nt, PFIL_OUT);
+		if (natse == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		/*
+		 * Link local session with NAT session, if no link already.
+		 */
+		npf_session_link(se, natse);
+		npf_session_release(natse);
+out:
+		if (error) {
+			if (nse != NULL) {
+				/* XXX: expire local session if new? */
+			}
+			/* Will free the structure and return the port. */
+			npf_nat_expire(nt);
+		}
+		if (nse != NULL) {
+			/* Drop the reference local session was new. */
+			npf_session_release(nse);
+		}
+	}
+	return error;
+}
+
+/*
+ * npf_natin:
+ *	- Inspect packet for a session with associated NAT policy.
+ *	- Perform "backwards" translation: rewrite destination address, etc.
+ */
+int
+npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer)
+{
+	npf_nat_t *nt = se ? npf_session_retnat(se) : NULL;
+
+	if (nt == NULL) {
+		/* No association - no translation. */
+		return 0;
+	}
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+
+	void *n_ptr = nbuf_dataptr(nbuf);
+	in_addr_t laddr = nt->nt_laddr;
+	in_port_t lport = nt->nt_lport;
+
+	if (layer == NPF_LAYER_2) {
+		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen);
+		if (n_ptr == NULL) {
+			return EINVAL;
+		}
+	}
+
+	/* Execute ALG hooks first. */
+	npf_alg_exec(npc, nbuf, nt, PFIL_IN);
+
+	/*
+	 * Address translation: rewrite destination address.
+	 * Note: cache will be used in npf_rwrport(), update only in the end.
+	 */
+	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) {
+		return EINVAL;
+	}
+	switch (npc->npc_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+		/* Rewrite destination port. */
+		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) {
+			return EINVAL;
+		}
+		break;
+	case IPPROTO_ICMP:
+		/* None. */
+		break;
+	default:
+		return ENOTSUP;
+	}
+	/* Cache new address and port. */
+	npc->npc_dstip = laddr;
+	npc->npc_dport = lport;
+	return 0;
+}
+
+/*
+ * npf_nat_getlocal: return local IP address and port from translation entry.
+ */
+void
+npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
+{
+
+	*addr = nt->nt_laddr;
+	*port = nt->nt_lport;
+}
+
+void
+npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
+{
+
+	nt->nt_alg = alg;
+	nt->nt_alg_arg = arg;
+}
+
+/*
+ * npf_nat_expire: free NAT-related data structures on session expiration.
+ */
+void
+npf_nat_expire(npf_nat_t *nt)
+{
+
+	if (nt->nt_tport) {
+		npf_natpolicy_t *np = nt->nt_natpolicy;
+		npf_nat_putport(np, nt->nt_tport);
+	}
+	pool_cache_put(nat_cache, nt);
+}
+
+#if defined(DDB) || defined(_NPF_TESTING)
+
+void
+npf_nat_dump(npf_nat_t *nt)
+{
+	npf_natpolicy_t *np;
+	struct in_addr ip;
+
+	if (nt) {
+		np = nt->nt_natpolicy;
+		goto skip;
+	}
+	LIST_FOREACH(np, &nat_policy_list, n_entry) {
+skip:
+		ip.s_addr = np->n_gw_ip;
+		printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip));
+		if (nt == NULL) {
+			continue;
+		}
+		ip.s_addr = nt->nt_laddr;
+		printf("\tNAT: original address %s, lport %d, tport = %d\n",
+		    inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport));
+		if (nt->nt_alg) {
+			printf("\tNAT ALG = %p, ARG = %p\n",
+			    nt->nt_alg, (void *)nt->nt_alg_arg);
+		}
+		return;
+	}
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_ncode.h	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,107 @@
+/*	$NetBSD: npf_ncode.h,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF n-code interface.
+ *
+ * WARNING: Backwards compatibilty is not _yet_ maintained and instructions
+ * or their codes may (or may not) change.  Expect ABI breakage.
+ */
+
+#ifndef _NPF_NCODE_H_
+#define _NPF_NCODE_H_
+
+#include "npf.h"
+
+/* N-code processing, validation & building. */
+int	npf_ncode_process(npf_cache_t *, const void *, nbuf_t *, const int);
+int	npf_ncode_validate(const void *, size_t, int *);
+
+void *	npf_ncode_alloc(size_t);
+void	npf_ncode_free(void *, size_t);
+
+/* Error codes. */
+#define	NPF_ERR_OPCODE		-1	/* Invalid instruction. */
+#define	NPF_ERR_JUMP		-2	/* Invalid jump (e.g. out of range). */
+#define	NPF_ERR_REG		-3	/* Invalid register. */
+#define	NPF_ERR_INVAL		-4	/* Invalid argument value. */
+#define	NPF_ERR_RANGE		-5	/* Processing out of range. */
+
+/* Number of registers: [0..N] */
+#define	NPF_NREGS		4
+
+/* Maximum loop count. */
+#define	NPF_LOOP_LIMIT		100
+
+/* Shift to check if CISC-like instruction. */
+#define	NPF_CISC_SHIFT		7
+#define	NPF_CISC_OPCODE(insn)	(insn >> NPF_CISC_SHIFT)
+
+/*
+ * RISC-like n-code instructions.
+ */
+
+/* Return, advance, jump, tag and invalidate instructions. */
+#define	NPF_OPCODE_RET			0x00
+#define	NPF_OPCODE_ADVR			0x01
+#define	NPF_OPCODE_J			0x02
+#define	NPF_OPCODE_INVL			0x03
+#define	NPF_OPCODE_TAG			0x04
+
+/* Set and load instructions. */
+#define	NPF_OPCODE_MOV			0x10
+#define	NPF_OPCODE_LOAD			0x11
+
+/* Compare and jump instructions. */
+#define	NPF_OPCODE_CMP			0x21
+#define	NPF_OPCODE_CMPR			0x22
+#define	NPF_OPCODE_BEQ			0x23
+#define	NPF_OPCODE_BNE			0x24
+#define	NPF_OPCODE_BGT			0x25
+#define	NPF_OPCODE_BLT			0x26
+
+/* Bitwise instructions. */
+#define	NPF_OPCODE_AND			0x30
+
+/*
+ * CISC-like n-code instructions.
+ */
+
+#define	NPF_OPCODE_ETHER		0x80
+
+#define	NPF_OPCODE_IP4MASK		0x90
+#define	NPF_OPCODE_IP4TABLE		0x91
+#define	NPF_OPCODE_ICMP4		0x92
+
+#define	NPF_OPCODE_TCP_PORTS		0xa0
+#define	NPF_OPCODE_UDP_PORTS		0xa1
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_processor.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,533 @@
+/*	$NetBSD: npf_processor.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF n-code processor.
+ *	Inspired by the Berkeley Packet Filter.
+ *
+ * Few major design goals are:
+ *
+ * - Keep engine lightweight, well abstracted and simple.
+ * - Avoid knowledge of internal network buffer structures (e.g. mbuf).
+ * - Avoid knowledge of network protocols.
+ *
+ * There are two instruction sets: RISC-like and CISC-like.  The later are
+ * instructions to cover most common filter cases, and reduce interpretation
+ * overhead.  These instructions use protocol knowledge and are supposed to
+ * be fully optimized.
+ *
+ * N-code memory address and thus instructions should be word aligned.
+ * All processing is done in 32 bit words, since both instructions (their
+ * codes) and arguments use 32 bits words.
+ *
+ * TODO:
+ * - There is some space for better a abstraction.  Duplicated opcode
+ *   maintenance in npf_ncode_process() and nc_insn_check() might be avoided.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+#endif
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/kmem.h>
+
+#include "npf_impl.h"
+#include "npf_ncode.h"
+
+/*
+ * nc_fetch_word: fetch a word (32 bits) from the n-code and increase
+ * instruction pointer by one word.
+ */
+static inline const void *
+nc_fetch_word(const void *iptr, uint32_t *a)
+{
+	const uint32_t *tptr = (const uint32_t *)iptr;
+
+	KASSERT(ALIGNED_POINTER(iptr, uint32_t));
+	*a = *tptr++;
+	return tptr;
+}
+
+/*
+ * nc_fetch_double: fetch two words (2 x 32 bits) from the n-code and
+ * increase instruction pointer by two words.
+ */
+static inline const void *
+nc_fetch_double(const void *iptr, uint32_t *a, uint32_t *b)
+{
+	const uint32_t *tptr = (const uint32_t *)iptr;
+
+	KASSERT(ALIGNED_POINTER(iptr, uint32_t));
+	*a = *tptr++;
+	*b = *tptr++;
+	return tptr;
+}
+
+/*
+ * nc_jump: helper function to jump to specified line (32 bit word)
+ * in the n-code, fetch a word, and update the instruction pointer.
+ */
+static inline const void *
+nc_jump(const void *iptr, int n, u_int *lcount)
+{
+
+	/* Detect infinite loops. */
+	if (__predict_false(*lcount == 0)) {
+		return NULL;
+	}
+	*lcount = *lcount - 1;
+	return (const uint32_t *)iptr + n;
+}
+
+void *
+npf_ncode_alloc(size_t sz)
+{
+
+	return kmem_alloc(sz, KM_SLEEP);
+}
+
+void
+npf_ncode_free(void *nc, size_t sz)
+{
+
+	kmem_free(nc, sz);
+}
+
+/*
+ * npf_ncode_process: process n-code using data of the specified packet.
+ *
+ * => Argument nbuf (network buffer) is opaque to this function.
+ * => Chain of nbufs (and their data) should be protected from any change.
+ * => N-code memory address and thus instructions should be aligned.
+ * => N-code should be protected from any change.
+ * => Routine prevents from infinite loop.
+ */
+int
+npf_ncode_process(npf_cache_t *npc, const void *ncode,
+    nbuf_t *nbuf0, const int layer)
+{
+	/* N-code instruction pointer. */
+	const void *	i_ptr;
+	/* Pointer of current nbuf in the chain. */
+	nbuf_t *	nbuf;
+	/* Data pointer in the current nbuf. */
+	void *		n_ptr;
+	/* Virtual registers. */
+	uint32_t	regs[NPF_NREGS];
+	/* Local, state variables. */
+	uint32_t d, i, n;
+	u_int lcount;
+	int cmpval;
+
+	i_ptr = ncode;
+	regs[0] = layer;
+
+	lcount = NPF_LOOP_LIMIT;	/* XXX */
+	cmpval = 0;
+
+	/* Note: offset = n_ptr - nbuf_dataptr(nbuf); */
+	nbuf = nbuf0;
+	n_ptr = nbuf_dataptr(nbuf);
+
+process_next:
+	/*
+	 * Loop must always start on instruction, therefore first word
+	 * should be an opcode.  Most used instructions are checked first.
+	 */
+	i_ptr = nc_fetch_word(i_ptr, &d);
+	if (__predict_true(NPF_CISC_OPCODE(d))) {
+		/* It is a CISC-like instruction. */
+		goto cisc_like;
+	}
+
+	/*
+	 * RISC-like instructions.
+	 *
+	 * - ADVR, LOAD, CMP, CMPR
+	 * - BEQ, BNE, BGT, BLT
+	 * - RET, TAG, MOV
+	 * - AND, J, INVL
+	 */
+	switch (d) {
+	case NPF_OPCODE_ADVR:
+		i_ptr = nc_fetch_word(i_ptr, &i);	/* Register */
+		KASSERT(i < NPF_NREGS);
+		n_ptr = nbuf_advance(&nbuf, n_ptr, regs[i]);
+		if (__predict_false(n_ptr == NULL)) {
+			goto fail;
+		}
+		break;
+	case NPF_OPCODE_LOAD:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Size, register */
+		KASSERT(i < NPF_NREGS);
+		KASSERT(n >= sizeof(uint8_t) && n <= sizeof(uint32_t));
+		if (nbuf_fetch_datum(nbuf, n_ptr, n, (uint32_t *)regs + i)) {
+			goto fail;
+		}
+		break;
+	case NPF_OPCODE_CMP:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Value, register */
+		KASSERT(i < NPF_NREGS);
+		if (n != regs[i]) {
+			cmpval = (n > regs[i]) ? 1 : -1;
+		} else {
+			cmpval = 0;
+		}
+		break;
+	case NPF_OPCODE_CMPR:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Value, register */
+		KASSERT(i < NPF_NREGS);
+		if (regs[n] != regs[i]) {
+			cmpval = (regs[n] > regs[i]) ? 1 : -1;
+		} else {
+			cmpval = 0;
+		}
+		break;
+	case NPF_OPCODE_BEQ:
+		i_ptr = nc_fetch_word(i_ptr, &n);	/* N-code line */
+		if (cmpval == 0)
+			goto make_jump;
+		break;
+	case NPF_OPCODE_BNE:
+		i_ptr = nc_fetch_word(i_ptr, &n);
+		if (cmpval != 0)
+			goto make_jump;
+		break;
+	case NPF_OPCODE_BGT:
+		i_ptr = nc_fetch_word(i_ptr, &n);
+		if (cmpval > 0)
+			goto make_jump;
+		break;
+	case NPF_OPCODE_BLT:
+		i_ptr = nc_fetch_word(i_ptr, &n);
+		if (cmpval < 0)
+			goto make_jump;
+		break;
+	case NPF_OPCODE_RET:
+		(void)nc_fetch_word(i_ptr, &n);		/* Return value */
+		return n;
+	case NPF_OPCODE_TAG:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Key, value */
+		if (nbuf_add_tag(n_ptr, n, i)) {
+			goto fail;
+		}
+		break;
+	case NPF_OPCODE_MOV:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Value, register */
+		KASSERT(i < NPF_NREGS);
+		regs[i] = n;
+		break;
+	case NPF_OPCODE_AND:
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);	/* Value, register */
+		KASSERT(i < NPF_NREGS);
+		regs[i] = n & regs[i];
+		break;
+	case NPF_OPCODE_J:
+		i_ptr = nc_fetch_word(i_ptr, &n);	/* N-code line */
+make_jump:
+		i_ptr = nc_jump(i_ptr, n - 2, &lcount);
+		if (__predict_false(i_ptr == NULL)) {
+			goto fail;
+		}
+		break;
+	case NPF_OPCODE_INVL:
+		/* Invalidate all cached data. */
+		npc->npc_info = 0;
+		break;
+	default:
+		/* Invalid instruction. */
+		KASSERT(false);
+	}
+	goto process_next;
+
+cisc_like:
+	/*
+	 * CISC-like instructions.
+	 */
+	switch (d) {
+	case NPF_OPCODE_ETHER:
+		/* Source/destination, reserved, ethernet type. */
+		i_ptr = nc_fetch_word(i_ptr, &d);
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_ether(nbuf, d, n, i, &regs[NPF_NREGS - 1]);
+		break;
+	case NPF_OPCODE_IP4MASK:
+		/* Source/destination, network address, subnet mask. */
+		i_ptr = nc_fetch_word(i_ptr, &d);
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_ip4mask(npc, nbuf, n_ptr, d, n, i);
+		break;
+	case NPF_OPCODE_IP4TABLE:
+		/* Source/destination, NPF table ID. */
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_ip4table(npc, nbuf, n_ptr, n, i);
+		break;
+	case NPF_OPCODE_TCP_PORTS:
+		/* Source/destination, port range. */
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_tcp_ports(npc, nbuf, n_ptr, n, i);
+		break;
+	case NPF_OPCODE_UDP_PORTS:
+		/* Source/destination, port range. */
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_udp_ports(npc, nbuf, n_ptr, n, i);
+		break;
+	case NPF_OPCODE_ICMP4:
+		/* ICMP type, code. */
+		i_ptr = nc_fetch_double(i_ptr, &n, &i);
+		cmpval = npf_match_icmp4(npc, nbuf, n_ptr, n, i);
+		break;
+	default:
+		/* Invalid instruction. */
+		KASSERT(false);
+	}
+	goto process_next;
+fail:
+	/* Failure case. */
+	return -1;
+}
+
+/*
+ * nc_ptr_check: validate that instruction pointer is not out of range.
+ * If not - advance by number of arguments and fetch specified argument.
+ */
+static int
+nc_ptr_check(uintptr_t *iptr, const void *nc, size_t sz,
+    u_int nargs, uint32_t *val, u_int r)
+{
+	const uint32_t *tptr = (const uint32_t *)*iptr;
+	u_int i;
+
+	KASSERT(ALIGNED_POINTER(*iptr, uint32_t));
+	KASSERT(nargs > 0);
+
+	if ((uintptr_t)tptr < (uintptr_t)nc)
+		return NPF_ERR_JUMP;
+
+	if ((uintptr_t)tptr + (nargs * sizeof(uint32_t)) > (uintptr_t)nc + sz)
+		return NPF_ERR_RANGE;
+
+	for (i = 1; i <= nargs; i++) {
+		if (val && i == r) {
+			*val = *tptr;
+		}
+		tptr++;
+	}
+	*iptr = (uintptr_t)tptr;
+	return 0;
+}
+
+/*
+ * nc_insn_check: validate the instruction and its arguments.
+ */
+static int
+nc_insn_check(const uintptr_t optr, const void *nc, size_t sz,
+    size_t *adv, size_t *jmp, bool *ret)
+{
+	uintptr_t iptr = optr;
+	uint32_t regidx, val;
+	int error;
+
+	/* Fetch the instruction code. */
+	error = nc_ptr_check(&iptr, nc, sz, 1, &val, 1);
+	if (error)
+		return error;
+
+	regidx = 0;
+	*ret = false;
+	*jmp = 0;
+
+	/*
+	 * RISC-like instructions.
+	 */
+	switch (val) {
+	case NPF_OPCODE_ADVR:
+		error = nc_ptr_check(&iptr, nc, sz, 1, &regidx, 1);
+		break;
+	case NPF_OPCODE_LOAD:
+		error = nc_ptr_check(&iptr, nc, sz, 1, &val, 1);
+		if (error || val < sizeof(uint8_t) || val > sizeof(uint32_t)) {
+			return error ? error : NPF_ERR_INVAL;
+		}
+		error = nc_ptr_check(&iptr, nc, sz, 1, &regidx, 1);
+		break;
+	case NPF_OPCODE_CMP:
+		error = nc_ptr_check(&iptr, nc, sz, 2, &regidx, 2);
+		break;
+	case NPF_OPCODE_BEQ:
+	case NPF_OPCODE_BNE:
+	case NPF_OPCODE_BGT:
+	case NPF_OPCODE_BLT:
+		error = nc_ptr_check(&iptr, nc, sz, 1, &val, 1);
+		/* Validate jump address. */
+		goto jmp_check;
+
+	case NPF_OPCODE_RET:
+		error = nc_ptr_check(&iptr, nc, sz, 1, NULL, 0);
+		*ret = true;
+		break;
+	case NPF_OPCODE_TAG:
+		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		break;
+	case NPF_OPCODE_MOV:
+		error = nc_ptr_check(&iptr, nc, sz, 2, &regidx, 2);
+		break;
+	case NPF_OPCODE_CMPR:
+		error = nc_ptr_check(&iptr, nc, sz, 1, &regidx, 1);
+		/* Handle first register explicitly. */
+		if (error || (u_int)regidx < NPF_NREGS) {
+			return error ? error : NPF_ERR_REG;
+		}
+		error = nc_ptr_check(&iptr, nc, sz, 1, &regidx, 1);
+		break;
+	case NPF_OPCODE_AND:
+		error = nc_ptr_check(&iptr, nc, sz, 2, &regidx, 2);
+		break;
+	case NPF_OPCODE_J:
+		error = nc_ptr_check(&iptr, nc, sz, 1, &val, 1);
+jmp_check:
+		/*
+		 * We must check for JMP 0 i.e. to oneself.  Pass the jump
+		 * address to the caller, it will validate if it is correct.
+		 */
+		if (error == 0 && val == 0) {
+			return NPF_ERR_JUMP;
+		}
+		*jmp = val * sizeof(uint32_t);
+		break;
+	case NPF_OPCODE_INVL:
+		break;
+	/*
+	 * CISC-like instructions.
+	 */
+	case NPF_OPCODE_ETHER:
+		error = nc_ptr_check(&iptr, nc, sz, 3, NULL, 0);
+		break;
+	case NPF_OPCODE_IP4MASK:
+		error = nc_ptr_check(&iptr, nc, sz, 3, NULL, 0);
+		break;
+	case NPF_OPCODE_IP4TABLE:
+		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		break;
+	case NPF_OPCODE_TCP_PORTS:
+		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		break;
+	case NPF_OPCODE_UDP_PORTS:
+		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		break;
+	case NPF_OPCODE_ICMP4:
+		error = nc_ptr_check(&iptr, nc, sz, 2, NULL, 0);
+		break;
+	default:
+		/* Invalid instruction. */
+		return NPF_ERR_OPCODE;
+	}
+	if (error) {
+		return error;
+	}
+	if ((u_int)regidx >= NPF_NREGS) {
+		/* Invalid register. */
+		return NPF_ERR_REG;
+	}
+	*adv = iptr - optr;
+	return 0;
+}
+
+/*
+ * nc_jmp_check: validate that jump address points to the instruction.
+ * Loop from the begining of n-code until we hit jump address or error.
+ */
+static inline int
+nc_jmp_check(const void *nc, size_t sz, const uintptr_t jaddr)
+{
+	uintptr_t iaddr = (uintptr_t)nc;
+	size_t _jmp, adv;
+	bool _ret;
+	int error;
+
+	KASSERT(iaddr != jaddr);
+	do {
+		error = nc_insn_check(iaddr, nc, sz, &adv, &_jmp, &_ret);
+		if (error)
+			break;
+		iaddr += adv;
+
+	} while (iaddr != jaddr);
+
+	return error;
+}
+
+/*
+ * npf_ncode_validate: validate n-code.
+ * Performs the following operations:
+ *
+ * - Checks that each instruction is valid (i.e. existing opcode).
+ * - Validates registers i.e. that their indexes are correct.
+ * - Checks that jumps are within n-code and to the instructions.
+ * - Checks that n-code returns, and processing is within n-code memory.
+ */
+int
+npf_ncode_validate(const void *nc, size_t sz, int *errat)
+{
+	const uintptr_t nc_end = (uintptr_t)nc + sz;
+	uintptr_t iptr = (uintptr_t)nc;
+	int error;
+	bool ret;
+
+	do {
+		size_t jmp, adv;
+
+		/* Validate instruction and its arguments. */
+		error = nc_insn_check(iptr, nc, sz, &adv, &jmp, &ret);
+		if (error)
+			break;
+
+		/* If jumping, check that address points to the instruction. */
+		if (jmp && nc_jmp_check(nc, sz, iptr + jmp)) {
+			/* Note: the actual error might be different. */
+			return NPF_ERR_JUMP;
+		}
+
+		/* Advance and check for the end of n-code memory block. */
+		iptr += adv;
+
+	} while (iptr != nc_end);
+
+	if (!error) {
+		error = ret ? 0 : NPF_ERR_RANGE;
+	}
+	*errat = (iptr - (uintptr_t)nc) / sizeof(uint32_t);
+	return error;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_ruleset.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,467 @@
+/*	$NetBSD: npf_ruleset.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF ruleset module.
+ *
+ * Lock order:
+ *
+ *	ruleset_lock -> table_lock -> npf_table_t::t_lock
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#endif
+
+#include <sys/atomic.h>
+#include <sys/kmem.h>
+#include <sys/pool.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include "npf_ncode.h"
+#include "npf_impl.h"
+
+struct npf_hook {
+	void				(*hk_fn)(const npf_cache_t *, void *);
+	void *				hk_arg;
+	LIST_ENTRY(npf_hook)		hk_entry;
+};
+
+struct npf_ruleset {
+	TAILQ_HEAD(, npf_rule)		rs_queue;
+	npf_rule_t *			rs_default;
+	int				_reserved;
+};
+
+/* Rule structure. */
+struct npf_rule {
+	/* List entry in the ruleset. */
+	TAILQ_ENTRY(npf_rule)		r_entry;
+	/* Optional: sub-ruleset, NAT policy. */
+	npf_ruleset_t			r_subset;
+	npf_natpolicy_t *		r_nat;
+	/* Rule priority: (highest) 0, 1, 2 ... n (lowest). */
+	u_int				r_priority;
+	/* N-code to process. */
+	void *				r_ncode;
+	size_t				r_nc_size;
+	/* Attributes of this rule. */
+	int				r_attr;
+	/* Interface. */
+	u_int				r_ifid;
+	/* Hit counter. */
+	u_long				r_hitcount;
+	/* List of hooks to process on match. */
+	LIST_HEAD(, npf_hook)		r_hooks;
+};
+
+/* Global ruleset, its lock, cache and NAT ruleset. */
+static npf_ruleset_t *			ruleset;
+static krwlock_t			ruleset_lock;
+static pool_cache_t			rule_cache;
+
+/*
+ * npf_ruleset_sysinit: initialise ruleset structures.
+ */
+int
+npf_ruleset_sysinit(void)
+{
+
+	rule_cache = pool_cache_init(sizeof(npf_rule_t), coherency_unit,
+	    0, 0, "npfrlpl", NULL, IPL_NONE, NULL, NULL, NULL);
+	if (rule_cache == NULL) {
+		return ENOMEM;
+	}
+	rw_init(&ruleset_lock);
+	ruleset = npf_ruleset_create();
+	return 0;
+}
+
+void
+npf_ruleset_sysfini(void)
+{
+
+	npf_ruleset_destroy(ruleset);
+	rw_destroy(&ruleset_lock);
+	pool_cache_destroy(rule_cache);
+}
+
+npf_ruleset_t *
+npf_ruleset_create(void)
+{
+	npf_ruleset_t *rlset;
+
+	rlset = kmem_zalloc(sizeof(npf_ruleset_t), KM_SLEEP);
+	TAILQ_INIT(&rlset->rs_queue);
+	return rlset;
+}
+
+void
+npf_ruleset_destroy(npf_ruleset_t *rlset)
+{
+	npf_rule_t *rl;
+
+	while ((rl = TAILQ_FIRST(&rlset->rs_queue)) != NULL) {
+		TAILQ_REMOVE(&rlset->rs_queue, rl, r_entry);
+		npf_rule_free(rl);
+	}
+	kmem_free(rlset, sizeof(npf_ruleset_t));
+}
+
+/*
+ * npf_ruleset_insert: insert the rule into the specified ruleset.
+ *
+ * Note: multiple rules at the same priority are allowed.
+ */
+void
+npf_ruleset_insert(npf_ruleset_t *rlset, npf_rule_t *rl)
+{
+	npf_rule_t *it;
+
+	if (rl->r_attr & NPF_RULE_DEFAULT) {
+		rlset->rs_default = rl;
+		return;
+	}
+	TAILQ_FOREACH(it, &rlset->rs_queue, r_entry) {
+		/* Rule priority: (highest) 0, 1, 2, 4 ... n (lowest). */
+		if (it->r_priority > rl->r_priority)
+			break;
+	}
+	if (it == NULL) {
+		TAILQ_INSERT_TAIL(&rlset->rs_queue, rl, r_entry);
+	} else {
+		TAILQ_INSERT_BEFORE(it, rl, r_entry);
+	}
+}
+
+/*
+ * npf_ruleset_reload: atomically load new ruleset and tableset,
+ * and destroy old structures.
+ */
+void
+npf_ruleset_reload(npf_ruleset_t *nrlset, npf_tableset_t *ntblset)
+{
+	npf_ruleset_t *oldrlset;
+	npf_tableset_t *oldtblset;
+
+	/*
+	 * Swap old ruleset with the new.
+	 * XXX: Rework to be fully lock-less; later.
+	 */
+	rw_enter(&ruleset_lock, RW_WRITER);
+	oldrlset = atomic_swap_ptr(&ruleset, nrlset);
+
+	/*
+	 * Setup a new tableset.  It will lock the global tableset lock,
+	 * therefore ensures atomicity.  We shall free the old table-set.
+	 */
+	oldtblset = npf_tableset_reload(ntblset);
+	KASSERT(oldtblset != NULL);
+	/* Unlock.  Everything goes "live" now. */
+	rw_exit(&ruleset_lock);
+
+	npf_tableset_destroy(oldtblset);
+	npf_ruleset_destroy(oldrlset);
+}
+
+/*
+ * npf_rule_alloc: allocate a rule and copy ncode from user-space.
+ */
+npf_rule_t *
+npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz)
+{
+	npf_rule_t *rl;
+	int errat;
+
+	/* Perform validation & building of n-code. */
+	if (nc && npf_ncode_validate(nc, sz, &errat)) {
+		return NULL;
+	}
+	/* Allocate a rule structure. */
+	rl = pool_cache_get(rule_cache, PR_WAITOK);
+	if (rl == NULL) {
+		return NULL;
+	}
+	TAILQ_INIT(&rl->r_subset.rs_queue);
+	LIST_INIT(&rl->r_hooks);
+	rl->r_priority = pri;
+	rl->r_attr = attr;
+	rl->r_ifid = ifidx;
+	rl->r_ncode = nc;
+	rl->r_nc_size = sz;
+	rl->r_hitcount = 0;
+	rl->r_nat = NULL;
+	return rl;
+}
+#if 0
+/*
+ * npf_activate_rule: activate rule by inserting it into the global ruleset.
+ */
+void
+npf_activate_rule(npf_rule_t *rl)
+{
+
+	rw_enter(&ruleset_lock, RW_WRITER);
+	npf_ruleset_insert(ruleset, rl);
+	rw_exit(&ruleset_lock);
+}
+
+/*
+ * npf_deactivate_rule: deactivate rule by removing it from the ruleset.
+ */
+void
+npf_deactivate_rule(npf_rule_t *)
+{
+
+	rw_enter(&ruleset_lock, RW_WRITER);
+	TAILQ_REMOVE(&ruleset->rs_queue, rl, r_entry);
+	rw_exit(&ruleset_lock);
+}
+#endif
+
+/*
+ * npf_rule_free: free the specified rule.
+ */
+void
+npf_rule_free(npf_rule_t *rl)
+{
+
+	if (rl->r_ncode) {
+		/* Free n-code (if any). */
+		npf_ncode_free(rl->r_ncode, rl->r_nc_size);
+	}
+	if (rl->r_nat) {
+		/* Free NAT policy (if associated). */
+		npf_nat_freepolicy(rl->r_nat);
+	}
+	pool_cache_put(rule_cache, rl);
+}
+
+/*
+ * npf_rule_subset: return sub-ruleset, if any.
+ * npf_rule_getnat: get NAT policy assigned to the rule.
+ * npf_rule_setnat: assign NAT policy to the rule.
+ */
+
+npf_ruleset_t *
+npf_rule_subset(npf_rule_t *rl)
+{
+	return &rl->r_subset;
+}
+
+npf_natpolicy_t *
+npf_rule_getnat(const npf_rule_t *rl)
+{
+	return rl->r_nat;
+}
+
+void
+npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
+{
+	rl->r_nat = np;
+}
+
+/*
+ * npf_hook_register: register action hook in the rule.
+ */
+npf_hook_t *
+npf_hook_register(npf_rule_t *rl,
+    void (*fn)(const npf_cache_t *, void *), void *arg)
+{
+	npf_hook_t *hk;
+
+	hk = kmem_alloc(sizeof(npf_hook_t), KM_SLEEP);
+	if (hk != NULL) {
+		hk->hk_fn = fn;
+		hk->hk_arg = arg;
+		rw_enter(&ruleset_lock, RW_WRITER);
+		LIST_INSERT_HEAD(&rl->r_hooks, hk, hk_entry);
+		rw_exit(&ruleset_lock);
+	}
+	return hk;
+}
+
+/*
+ * npf_hook_unregister: unregister a specified hook.
+ *
+ * => Hook should have been registered in the rule.
+ */
+void
+npf_hook_unregister(npf_rule_t *rl, npf_hook_t *hk)
+{
+
+	rw_enter(&ruleset_lock, RW_WRITER);
+	LIST_REMOVE(hk, hk_entry);
+	rw_exit(&ruleset_lock);
+	kmem_free(hk, sizeof(npf_hook_t));
+}
+
+/*
+ * npf_ruleset_match: inspect the packet against the ruleset.
+ *
+ * Loop for each rule in the set and perform run n-code processor of each
+ * rule against the packet (nbuf chain).  If sub-ruleset found, inspect it.
+ *
+ * => If found, ruleset is kept read-locked.
+ * => Caller should protect the nbuf chain.
+ */
+npf_rule_t *
+npf_ruleset_match(npf_ruleset_t *rlset0, npf_cache_t *npc, nbuf_t *nbuf,
+    struct ifnet *ifp, const int di, const int layer)
+{
+	npf_rule_t *final_rl = NULL, *rl;
+	npf_ruleset_t *rlset = rlset0;
+
+	KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));
+reinspect:
+	TAILQ_FOREACH(rl, &rlset->rs_queue, r_entry) {
+		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
+
+		/* Match the interface. */
+		if (rl->r_ifid && rl->r_ifid != ifp->if_index) {
+			continue;
+		}
+		/* Match the direction. */
+		if ((rl->r_attr & NPF_RULE_DIMASK) != NPF_RULE_DIMASK) {
+			const int di_mask =
+			    (di & PFIL_IN) ? NPF_RULE_IN : NPF_RULE_OUT;
+
+			if ((rl->r_attr & di_mask) == 0)
+				continue;
+		}
+		/* Process the n-code, if any. */
+		const void *nc = rl->r_ncode;
+		if (nc && npf_ncode_process(npc, nc, nbuf, layer)) {
+			continue;
+		}
+		/* Set the matching rule and check for "final". */
+		final_rl = rl;
+		if (rl->r_attr & NPF_RULE_FINAL) {
+			goto final;
+		}
+	}
+	/* Default, if no final rule. */
+	if (final_rl == NULL) {
+		rlset = rlset0;
+		final_rl = rlset->rs_default;
+	}
+	/* Inspect the sub-ruleset, if any. */
+	if (final_rl) {
+final:
+		if (TAILQ_EMPTY(&final_rl->r_subset.rs_queue)) {
+			return final_rl;
+		}
+		rlset = &final_rl->r_subset;
+		final_rl = NULL;
+		goto reinspect;
+	}
+	return final_rl;
+}
+
+/*
+ * npf_ruleset_inspect: inspection of the main ruleset for filtering.
+ */
+npf_rule_t *
+npf_ruleset_inspect(npf_cache_t *npc, nbuf_t *nbuf,
+    struct ifnet *ifp, const int di, const int layer)
+{
+	npf_rule_t *rl;
+
+	rw_enter(&ruleset_lock, RW_READER);
+	rl = npf_ruleset_match(ruleset, npc, nbuf, ifp, di, layer);
+	if (rl == NULL) {
+		rw_exit(&ruleset_lock);
+	}
+	return rl;
+}
+
+/*
+ * npf_rule_apply: apply the rule i.e. run hooks and return appropriate value.
+ *
+ * => Returns ENETUNREACH if "block" and 0 if "pass".
+ * => Releases the ruleset lock.
+ */
+int
+npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl, bool *keepstate)
+{
+	npf_hook_t *hk;
+
+	KASSERT(rw_lock_held(&ruleset_lock));
+
+	/* Update the "hit" counter. */
+	if (rl->r_attr & NPF_RULE_COUNT) {
+		atomic_inc_ulong(&rl->r_hitcount);
+	}
+
+	/* If not passing - drop the packet. */
+	if ((rl->r_attr & NPF_RULE_PASS) == 0) {
+		rw_exit(&ruleset_lock);
+		return ENETUNREACH;
+	}
+
+	/* Passing.  Run the hooks. */
+	LIST_FOREACH(hk, &rl->r_hooks, hk_entry) {
+		KASSERT(hk->hk_fn != NULL);
+		(*hk->hk_fn)(npc, hk->hk_arg);
+	}
+	*keepstate = (rl->r_attr & NPF_RULE_KEEPSTATE) != 0;
+	rw_exit(&ruleset_lock);
+
+	return 0;
+}
+
+#if defined(DDB) || defined(_NPF_TESTING)
+
+void
+npf_rulenc_dump(npf_rule_t *rl)
+{
+	uint32_t *op = rl->r_ncode;
+	size_t n = rl->r_nc_size;
+
+	do {
+		printf("\t> |0x%02x|\n", (uint32_t)*op);
+		op++;
+		n -= sizeof(*op);
+	} while (n);
+
+	printf("-> %s\n", (rl->r_attr & NPF_RULE_PASS) ? "pass" : "block");
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_session.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,843 @@
+/*	$NetBSD: npf_session.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF session tracking for stateful filtering and translation.
+ *
+ * Overview
+ *
+ *	There are two types of sessions: "pass" and "NAT".  The former are
+ *	sessions created according to the rules with "keep state" attribute
+ *	and are used for stateful filtering.  Such sessions indicate that
+ *	packet of the "backwards" stream should be passed without inspection
+ *	of the ruleset.
+ *
+ *	NAT sessions are created according to the NAT policies.  Since they
+ *	are used to perform translation, such sessions have 1:1 relationship
+ *	with NAT translation structure via npf_session_t::s_nat.  Therefore,
+ *	non-NULL value of npf_session_t::s_nat indicates this session type.
+ *
+ * Session life-cycle
+ *
+ *	Sessions are established when packet matches said rule or NAT policy.
+ *	Established session is inserted into the hashed tree.  A garbage
+ *	collection thread periodically scans all sessions and depending on
+ *	their properties (e.g. last activity time, protocol) expires them.
+ *
+ *	Each session has a reference count, which is taken on lookup and
+ *	needs to be released by the caller.  Reference guarantees that
+ *	session will not be destroyed, although it might be expired.
+ *
+ * Linked sessions
+ *
+ *	Often NAT policies have overlapping stateful filtering rules.  In
+ *	order to avoid unnecessary lookups, "pass" session can be linked
+ *	with a "NAT" session (npf_session_t::s_nat_se pointer).  Such link
+ *	is used to detect translation on "forwards" stream.
+ *
+ *	Additional reference is held on linked "NAT" sessions to prevent
+ *	them from destruction while linked.  Link is broken and reference
+ *	is dropped when "pass" session expires.
+ *
+ * External session identifiers
+ *
+ *	Application-level gateways (ALGs) can inspect the packet and fill
+ *	the packet cache (npf_cache_t) representing the IDs.  It is done
+ *	via npf_alg_sessionid() call.  In such case, ALGs are responsible
+ *	for correct filling of protocol, addresses and ports/IDs.
+ *
+ * TODO:
+ * - Session monitoring via descriptor.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#endif
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/hash.h>
+#include <sys/kmem.h>
+#include <sys/kthread.h>
+#include <sys/mutex.h>
+#include <net/pfil.h>
+#include <sys/pool.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+
+#include "npf_impl.h"
+
+#define	NPF_SESSION_TCP		1
+#define	NPF_SESSION_UDP		2
+#define	NPF_SESSION_ICMP	3
+
+struct npf_session {
+	/* Session node / list entry and reference count. */
+	union {
+		struct rb_node		rbnode;
+		LIST_ENTRY(npf_session)	gclist;
+	} se_entry;
+	u_int				s_refcnt;
+	/* Session type.  Supported: TCP, UDP, ICMP. */
+	int				s_type;
+	int				s_direction;
+	int				s_state;
+	/* NAT data associated with this session (if any). */
+	npf_nat_t *			s_nat;
+	npf_session_t *			s_nat_se;
+	/* Source and destination addresses. */
+	in_addr_t			s_src_addr;
+	in_addr_t			s_dst_addr;
+	/* Source and destination ports (TCP / UDP) or generic IDs. */
+	union {
+		in_port_t		port;
+		uint32_t		id;
+	} s_src;
+	union {
+		in_port_t		port;
+		uint32_t		id;
+	} s_dst;
+	/* Last activity time (used to calculate expiration time). */
+	struct timespec 		s_atime;
+};
+
+/* Return pointer to npf_session_t from RB-tree node. (XXX fix rb-tree) */
+#define	NPF_RBN2SESENT(n)		\
+    (npf_session_t *)((uintptr_t)n - offsetof(npf_session_t, se_entry.rbnode))
+
+LIST_HEAD(npf_sesslist, npf_session);
+
+#define	SESS_HASH_BUCKETS		1024	/* XXX tune + make tunable */
+#define	SESS_HASH_MASK			(SESS_HASH_BUCKETS - 1)
+
+typedef struct {
+	struct rb_tree			sh_tree;
+	krwlock_t			sh_lock;
+	u_int				sh_count;
+} npf_sess_hash_t;
+
+/* XXX: give a separate cache-line to these. */
+static int				sess_tracking;
+
+/* Session hash table, lock and session cache. */
+static npf_sess_hash_t *		sess_hashtbl;
+static pool_cache_t			sess_cache;
+
+static kmutex_t				sess_lock;
+static kcondvar_t			sess_cv;
+static lwp_t *				sess_gc_lwp;
+
+#define	SESS_GC_INTERVAL		5		/* 5 sec */
+
+/* Session expiration table.  XXX: TCP close: 2 * tcp_msl (e.g. 120)?  Maybe. */
+static const u_int sess_expire_table[ ] = {
+	[NPF_SESSION_TCP]		= 600,		/* 10 min */
+	[NPF_SESSION_UDP]		= 300,		/*  5 min */
+	[NPF_SESSION_ICMP]		= 30		/*  1 min */
+};
+
+#define	SE_OPENING		1
+#define	SE_OPENING2		2
+#define	SE_ESTABLISHED		3
+#define	SE_CLOSING		4
+
+static void	sess_tracking_stop(void);
+static void	npf_session_worker(void *);
+
+#ifdef DEBUG
+#define	DPRINTF(x)	printf x
+#else
+#define	DPRINTF(x)
+#endif
+
+/*
+ * npf_session_sys{init,fini}: initialise/destroy session handling structures.
+ *
+ * Session table and G/C thread are initialised when session tracking gets
+ * actually enabled via npf_session_tracking() interface.
+ */
+
+int
+npf_session_sysinit(void)
+{
+
+	mutex_init(&sess_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&sess_cv, "npfgccv");
+	sess_gc_lwp = NULL;
+	sess_tracking = 0;
+	return 0;
+}
+
+void
+npf_session_sysfini(void)
+{
+	int error;
+
+	/* Disable tracking to destroy all structures. */
+	error = npf_session_tracking(false);
+	KASSERT(error == 0);
+	KASSERT(sess_tracking == 0);
+	KASSERT(sess_gc_lwp == NULL);
+
+	cv_destroy(&sess_cv);
+	mutex_destroy(&sess_lock);
+}
+
+/*
+ * Session hash table and RB-tree helper routines.
+ * Order: (node1, node2) where (node1 < node2).
+ */
+
+static signed int
+sess_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2)
+{
+	const npf_session_t *se1 = NPF_RBN2SESENT(n1);
+	const npf_session_t *se2 = NPF_RBN2SESENT(n2);
+
+	if (se1->s_src.id < se2->s_src.id || se1->s_dst.id < se2->s_dst.id)
+		return 1;
+	if (se1->s_src.id > se2->s_src.id || se1->s_dst.id > se2->s_dst.id)
+		return -1;
+
+	if (se1->s_src_addr < se2->s_src_addr ||
+	    se1->s_dst_addr < se2->s_dst_addr)
+		return -1;
+	if (se1->s_src_addr > se2->s_src_addr ||
+	    se1->s_dst_addr > se2->s_dst_addr)
+		return 1;
+
+	return 0;
+}
+
+static signed int
+sess_rbtree_cmp_key(const struct rb_node *n1, const void *key)
+{
+	const npf_session_t *se = NPF_RBN2SESENT(n1);
+	const npf_cache_t *npc = key;
+	in_port_t sport, dport;
+	in_addr_t src, dst;
+
+	if (se->s_direction == npc->npc_dir) {
+		/* Direction "forwards". */
+		src = npc->npc_srcip; sport = npc->npc_sport;
+		dst = npc->npc_dstip; dport = npc->npc_dport;
+	} else {
+		/* Direction "backwards". */
+		src = npc->npc_dstip; sport = npc->npc_dport;
+		dst = npc->npc_srcip; dport = npc->npc_sport;
+	}
+
+	/* Ports are the main criteria and are first. */
+	if (se->s_src.id < sport || se->s_dst.id < dport)
+		return 1;
+	if (se->s_src.id > sport || se->s_dst.id > dport)
+		return -1;
+
+	/* Note that hash should minimise differentiation on these. */
+	if (__predict_false(se->s_src_addr < src || se->s_dst_addr < dst))
+		return 1;
+	if (__predict_false(se->s_src_addr > src || se->s_dst_addr > dst))
+		return -1;
+
+	return 0;
+}
+
+static const struct rb_tree_ops sess_rbtree_ops = {
+	.rbto_compare_nodes = sess_rbtree_cmp_nodes,
+	.rbto_compare_key = sess_rbtree_cmp_key
+};
+
+static inline npf_sess_hash_t *
+sess_hash_bucket(const npf_cache_t *key)
+{
+	uint32_t hash, mix;
+
+	KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS));
+
+	/* Sum addresses for both directions and mix in protocol. */
+	mix = key->npc_srcip + key->npc_dstip + key->npc_proto;
+	hash = hash32_buf(&mix, sizeof(uint32_t), HASH32_BUF_INIT);
+	return &sess_hashtbl[hash & SESS_HASH_MASK];
+}
+
+/*
+ * Session tracking routines.  Note: manages tracking structures.
+ */
+
+static int
+sess_tracking_start(void)
+{
+	npf_sess_hash_t *sh;
+	u_int i;
+
+	sess_cache = pool_cache_init(sizeof(npf_session_t), coherency_unit,
+	    0, 0, "npfsespl", NULL, IPL_NET, NULL, NULL, NULL);
+	if (sess_cache == NULL)
+		return ENOMEM;
+
+	sess_hashtbl = kmem_alloc(SESS_HASH_BUCKETS * sizeof(*sh), KM_SLEEP);
+	if (sess_hashtbl == NULL) {
+		pool_cache_destroy(sess_cache);
+		return ENOMEM;
+	}
+
+	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
+		sh = &sess_hashtbl[i];
+		rb_tree_init(&sh->sh_tree, &sess_rbtree_ops);
+		rw_init(&sh->sh_lock);
+		sh->sh_count = 0;
+	}
+
+	/* Make it visible before thread start. */
+	sess_tracking = 1;
+
+	if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
+	    npf_session_worker, NULL, &sess_gc_lwp, "npfgc")) {
+		sess_tracking_stop();
+		return ENOMEM;
+	}
+	return 0;
+}
+
+static void
+sess_tracking_stop(void)
+{
+	npf_sess_hash_t *sh;
+	u_int i;
+
+	/* Notify G/C thread to flush all sessions, wait for the exit. */
+	mutex_enter(&sess_lock);
+	sess_tracking = 0;
+	cv_signal(&sess_cv);
+	while (sess_gc_lwp != NULL) {
+		cv_wait(&sess_cv, &sess_lock);
+	}
+	mutex_exit(&sess_lock);
+
+	/* Destroy and free the hash table with other structures. */
+	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
+		sh = &sess_hashtbl[i];
+		rw_destroy(&sh->sh_lock);
+	}
+	kmem_free(sess_hashtbl, SESS_HASH_BUCKETS * sizeof(*sh));
+	pool_cache_destroy(sess_cache);
+}
+
+/*
+ * npf_session_tracking: enable/disable session tracking.
+ *
+ * => Called before ruleset reload.
+ * => XXX: serialize at upper layer; ignore for now.
+ */
+int
+npf_session_tracking(bool track)
+{
+
+	if (!sess_tracking && track) {
+		/* Disabled -> Enable. */
+		return sess_tracking_start();
+	}
+	if (sess_tracking && !track) {
+		/* Enabled -> Disable. */
+		sess_tracking_stop();
+		return 0;
+	}
+	if (sess_tracking && track) {
+		/*
+		 * Enabled -> Re-enable.
+		 * Flush existing entries.
+		 */
+		mutex_enter(&sess_lock);
+		sess_tracking = -1;	/* XXX */
+		cv_signal(&sess_cv);
+		cv_wait(&sess_cv, &sess_lock);
+		sess_tracking = 1;
+		mutex_exit(&sess_lock);
+	} else {
+		/* Disabled -> Disable. */
+	}
+	return 0;
+}
+
+/*
+ * npf_session_pstate: handle session state according to protocol data.
+ */
+static inline bool
+npf_session_pstate(const npf_cache_t *npc, npf_session_t *se, const int dir)
+{
+	const bool backwards = (se->s_direction != dir);
+	const int proto = npc->npc_proto;
+
+	if (proto != IPPROTO_TCP) {
+		/* Handle UDP or ICMP response for opening session. */
+		if (se->s_state == SE_OPENING && backwards) {
+			se->s_state = SE_ESTABLISHED;
+		}
+		return true;
+	}
+
+	const int tcpfl = npc->npc_tcp_flags & (TH_SYN|TH_ACK|TH_RST|TH_FIN);
+
+	switch (tcpfl) {
+	case TH_ACK:
+		/* Common case. */
+		if (__predict_true(se->s_state == SE_ESTABLISHED)) {
+			return true;
+		}
+		/* ACK seen after SYN-ACK: session fully established. */
+		if (se->s_state == SE_OPENING2 && !backwards) {
+			se->s_state = SE_ESTABLISHED;
+		}
+		break;
+	case TH_SYN | TH_ACK:
+		/* SYN-ACK seen, wait for ACK. */
+		if (se->s_state == SE_OPENING && backwards) {
+			se->s_state = SE_OPENING2;
+		}
+		break;
+	case TH_RST:
+	case TH_FIN:
+		/* XXX/TODO: Handle TCP reset attacks; later. */
+		se->s_state = SE_CLOSING;
+		break;
+	}
+	return true;
+}
+
+/*
+ * npf_session_inspect: look if there is an established session (connection).
+ *
+ * => If found, we will hold a reference for caller.
+ */
+npf_session_t *
+npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
+    struct ifnet *ifp, const int di, const int layer)
+{
+	npf_sess_hash_t *sh;
+	struct rb_node *nd;
+	npf_session_t *se;
+
+	/* Attempt to fetch and cache all relevant IPv4 data. */
+	if (!sess_tracking || !npf_cache_all_ip4(npc, nbuf, layer)) {
+		return NULL;
+	}
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+	KASSERT(npf_iscached(npc, NPC_PORTS) || npf_iscached(npc, NPC_ICMP));
+
+	/*
+	 * Execute ALG session helpers.
+	 */
+	npf_cache_t algkey, *key;
+
+	if (npf_alg_sessionid(npc, nbuf, &algkey)) {
+		/* Unique IDs filled by ALG in a separate key cache. */
+		key = &algkey;
+	} else {
+		/* Default: original packet, pass its cache. */
+		key = npc;
+	}
+	key->npc_dir = di;
+
+	/*
+	 * Get a hash bucket from the cached key data.
+	 * Pre-check if there are any entries in the hash table.
+	 */
+	sh = sess_hash_bucket(key);
+	if (sh->sh_count == 0) {
+		return NULL;
+	}
+
+	/* Lookup the tree for a state entry. */
+	rw_enter(&sh->sh_lock, RW_READER);
+	nd = rb_tree_find_node(&sh->sh_tree, key);
+	if (nd == NULL) {
+		rw_exit(&sh->sh_lock);
+		return NULL;
+	}
+	se = NPF_RBN2SESENT(nd);
+
+	/* Inspect the protocol data and handle state changes. */
+	if (npf_session_pstate(npc, se, di)) {
+		/* Must update the last activity time. */
+		getnanouptime(&se->s_atime);
+		/* Hold a reference. */
+		atomic_inc_uint(&se->s_refcnt);
+	} else {
+		se = NULL;
+	}
+	rw_exit(&sh->sh_lock);
+
+	return se;
+}
+
+/*
+ * npf_establish_session: create a new session, insert into the global list.
+ *
+ * => Sessions is created with the held reference (for caller).
+ */
+npf_session_t *
+npf_session_establish(const npf_cache_t *npc, npf_nat_t *nt, const int di)
+{
+	npf_sess_hash_t *sh;
+	npf_session_t *se;
+	bool ok;
+
+	if (!sess_tracking)	/* XXX */
+		return NULL;
+
+	/* Allocate and initialise new state. */
+	se = pool_cache_get(sess_cache, PR_NOWAIT);
+	if (__predict_false(se == NULL)) {
+		return NULL;
+	}
+	/* Reference count and direction. */
+	se->s_refcnt = 1;
+	se->s_direction = di;
+
+	/* NAT and backwards session. */
+	se->s_nat = nt;
+	se->s_nat_se = NULL;
+
+	/* Unique IDs: IP addresses. */
+	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
+	se->s_src_addr = npc->npc_srcip;
+	se->s_dst_addr = npc->npc_dstip;
+
+	switch (npc->npc_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		KASSERT(npf_iscached(npc, NPC_PORTS));
+		se->s_type = (npc->npc_proto == IPPROTO_TCP) ?
+		    NPF_SESSION_TCP : NPF_SESSION_UDP;
+		/* Additional IDs: ports. */
+		se->s_src.id = npc->npc_sport;
+		se->s_dst.id = npc->npc_dport;
+		break;
+	case IPPROTO_ICMP:
+		if (npf_iscached(npc, NPC_ICMP_ID)) {
+			/* ICMP query ID. (XXX) */
+			se->s_type = NPF_SESSION_ICMP;
+			se->s_src.id = npc->npc_icmp_id;
+			se->s_dst.id = npc->npc_icmp_id;
+			break;
+		}
+		/* FALLTHROUGH */
+	default:
+		/* Unsupported. */
+		pool_cache_put(sess_cache, se);
+		return NULL;
+	}
+
+	/* Set last activity time for a new session. */
+	se->s_state = SE_OPENING;
+	getnanouptime(&se->s_atime);
+
+	/* Find the hash bucket and insert the state into the tree. */
+	sh = sess_hash_bucket(npc);
+	rw_enter(&sh->sh_lock, RW_WRITER);
+	ok = rb_tree_insert_node(&sh->sh_tree, &se->se_entry.rbnode);
+	if (__predict_true(ok)) {
+		sh->sh_count++;
+		DPRINTF(("NPF: new se %p (link %p, nat %p)\n",
+		    se, se->s_nat_se, se->s_nat));
+	}
+	rw_exit(&sh->sh_lock);
+
+	if (__predict_false(!ok)) {
+		/* Race with duplicate packet. */
+		pool_cache_put(sess_cache, se);
+		return NULL;
+	}
+	return se;
+}
+
+/*
+ * npf_session_pass: return true if session is "pass" one, otherwise false.
+ */
+bool
+npf_session_pass(const npf_session_t *se)
+{
+
+	KASSERT(se->s_refcnt > 0);
+	return true;	/* FIXME */
+}
+
+/*
+ * npf_session_release: release a reference, which might allow G/C thread
+ * to destroy this session.
+ */
+void
+npf_session_release(npf_session_t *se)
+{
+
+	KASSERT(se->s_refcnt > 0);
+	atomic_dec_uint(&se->s_refcnt);
+}
+
+/*
+ * npf_session_retnat: return associated NAT data, if any.
+ */
+npf_nat_t *
+npf_session_retnat(const npf_session_t *se)
+{
+
+	KASSERT(se->s_refcnt > 0);
+	return se->s_nat;
+}
+
+void
+npf_session_link(npf_session_t *se, npf_session_t *natse)
+{
+
+	/* Hold a reference on a session we link. */
+	KASSERT(se->s_refcnt > 0 && natse->s_refcnt > 0);
+	atomic_inc_uint(&natse->s_refcnt);
+	se->s_nat_se = natse;
+}
+
+npf_nat_t *
+npf_session_retlinknat(const npf_session_t *se)
+{
+	npf_session_t *natse = se->s_nat_se;
+
+	KASSERT(se->s_refcnt > 0);
+	KASSERT(natse == NULL || natse->s_refcnt > 0);
+
+	/* If there is a link, we hold a reference on it. */
+	return natse ? natse->s_nat : NULL;
+}
+
+/*
+ * npf_session_expired: criterion to check if session is expired.
+ */
+static inline bool
+npf_session_expired(const npf_session_t *se, const struct timespec *tsnow)
+{
+	struct timespec tsdiff;
+	int etime = 0;
+
+	switch (se->s_state) {
+	case SE_ESTABLISHED:
+		etime = sess_expire_table[se->s_type];
+		break;
+	case SE_OPENING:
+	case SE_OPENING2:
+	case SE_CLOSING:
+		etime = 10;	/* XXX: figure out reasonable time */
+		break;
+	default:
+		KASSERT(false);
+	}
+	timespecsub(tsnow, &se->s_atime, &tsdiff);
+	return (tsdiff.tv_sec > etime);
+}
+
+/*
+ * npf_session_gc: scan all sessions, insert into G/C list all expired ones.
+ */
+static void
+npf_session_gc(struct npf_sesslist *gc_list, bool flushall)
+{
+	struct timespec tsnow;
+	npf_session_t *se;
+	u_int i;
+
+	getnanouptime(&tsnow);
+
+	/* Scan each session in the hash table. */
+	for (i = 0; i < SESS_HASH_BUCKETS; i++) {
+		npf_sess_hash_t *sh;
+		struct rb_node *nd;
+
+		sh = &sess_hashtbl[i];
+		if (sh->sh_count == 0) {
+			continue;
+		}
+		rw_enter(&sh->sh_lock, RW_WRITER);
+		/* For each (left -> right) ... */
+		nd = rb_tree_iterate(&sh->sh_tree, NULL, RB_DIR_LEFT);
+		while (nd != NULL) {
+			/* Get item, pre-iterate, skip if not expired. */
+			se = NPF_RBN2SESENT(nd);
+			nd = rb_tree_iterate(&sh->sh_tree, nd, RB_DIR_RIGHT);
+			if (!npf_session_expired(se, &tsnow) && !flushall) {
+				continue;
+			}
+
+			/* Expired - move to G/C list. */
+			rb_tree_remove_node(&sh->sh_tree, &se->se_entry.rbnode);
+			LIST_INSERT_HEAD(gc_list, se, se_entry.gclist);
+			sh->sh_count--;
+
+			/* If linked, drop the reference. */
+			DPRINTF(("NPF: se %p expired\n", se));
+			if (se->s_nat_se) {
+				npf_session_release(se->s_nat_se);
+				DPRINTF(("NPF: se %p unlinked %p\n",
+				    se, se->s_nat_se));
+				se->s_nat_se = NULL;
+			}
+		}
+		KASSERT(!flushall || sh->sh_count == 0);
+		rw_exit(&sh->sh_lock);
+	}
+}
+
+/*
+ * npf_sessions_free: destroy all sessions in the G/C list, which
+ * have no references.  Return true, if list is empty.
+ */
+static void
+npf_sessions_free(struct npf_sesslist *gc_list)
+{
+	npf_session_t *se, *nse;
+
+	se = LIST_FIRST(gc_list);
+	while (se != NULL) {
+		nse = LIST_NEXT(se, se_entry.gclist);
+		if (se->s_refcnt == 0) {
+			/* Destroy only if no references. */
+			LIST_REMOVE(se, se_entry.gclist);
+			if (se->s_nat) {
+				/* Release any NAT related structures. */
+				npf_nat_expire(se->s_nat);
+			}
+			DPRINTF(("NPF: se %p destroyed\n", se));
+			pool_cache_put(sess_cache, se);
+		}
+		se = nse;
+	}
+}
+
+/*
+ * npf_session_worker: G/C worker thread.
+ */
+static void
+npf_session_worker(void *arg)
+{
+	struct npf_sesslist gc_list;
+	bool flushreq = false;
+
+	LIST_INIT(&gc_list);
+	do {
+		/* Periodically wake up, unless get notified. */
+		mutex_enter(&sess_lock);
+		if (flushreq) {
+			/* Flush was performed, notify waiter. */
+			cv_signal(&sess_cv);
+		}
+		(void)cv_timedwait(&sess_cv, &sess_lock, SESS_GC_INTERVAL);
+		flushreq = (sess_tracking != 1);	/* XXX */
+		mutex_exit(&sess_lock);
+
+		/* Flush all if session tracking got disabled. */
+		npf_session_gc(&gc_list, flushreq);
+		npf_sessions_free(&gc_list);
+
+	} while (sess_tracking);
+
+	/* Wait for any referenced sessions to be released. */
+	while (!LIST_EMPTY(&gc_list)) {
+		kpause("npfgcfr", false, 1, NULL);
+		npf_sessions_free(&gc_list);
+	}
+
+	/* Notify that we are done. */
+	mutex_enter(&sess_lock);
+	sess_gc_lwp = NULL;
+	cv_signal(&sess_cv);
+	mutex_exit(&sess_lock);
+
+	kthread_exit(0);
+}
+
+#if defined(DDB) || defined(_NPF_TESTING)
+
+void
+npf_sessions_dump(void)
+{
+	npf_sess_hash_t *sh;
+	struct rb_node *nd;
+	npf_session_t *se;
+	struct timespec tsnow;
+
+	if (!sess_tracking) {
+		return;
+	}
+
+	getnanouptime(&tsnow);
+	for (u_int i = 0; i < SESS_HASH_BUCKETS; i++) {
+		sh = &sess_hashtbl[i];
+		if (sh->sh_count == 0) {
+			KASSERT(rb_tree_iterate(&sh->sh_tree,
+			    NULL, RB_DIR_RIGHT) == NULL);
+			continue;
+		}
+		printf("s_bucket %d (count = %d)\n", i, sh->sh_count);
+		RB_TREE_FOREACH(nd, &sh->sh_tree) {
+			struct timespec tsdiff;
+			struct in_addr ip;
+			int etime;
+
+			se = NPF_RBN2SESENT(nd);
+
+			timespecsub(&tsnow, &se->s_atime, &tsdiff);
+			etime = (se->s_state == SE_ESTABLISHED) ?
+			    sess_expire_table[se->s_type] : 10;
+
+			printf("\t%p: type(%d) di = %d, tsdiff = %d, "
+			    "etime = %d\n", se, se->s_type, se->s_direction,
+			    (int)tsdiff.tv_sec, etime);
+			ip.s_addr = se->s_src_addr;
+			printf("\tsrc (%s, %d) ",
+			    inet_ntoa(ip), ntohs(se->s_src.port));
+			ip.s_addr = se->s_dst_addr;
+			printf("dst (%s, %d)\n", 
+			    inet_ntoa(ip), ntohs(se->s_dst.port));
+			if (se->s_nat_se != NULL) {
+				printf("\tlinked with %p\n", se->s_nat_se);
+			}
+			if (se->s_nat != NULL) {
+				npf_nat_dump(se->s_nat);
+			}
+		}
+	}
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_tableset.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,559 @@
+/*	$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF table module.
+ *
+ *	table_lock ->
+ *		npf_table_t::t_lock
+ *
+ * TODO:
+ * - Currently, code is modeled to handle IPv4 CIDR blocks.
+ * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
+ * - Dynamic array resize.
+ */
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $");
+#endif
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <sys/atomic.h>
+#include <sys/hash.h>
+#include <sys/kmem.h>
+#include <sys/pool.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+
+#include "npf_impl.h"
+
+/* Table entry structure. */
+struct npf_tblent {
+	/* IPv4 CIDR block. */
+	in_addr_t			te_addr;
+	in_addr_t			te_mask;
+	union {
+		LIST_ENTRY(npf_tblent)	hashq;
+		struct rb_node		rbnode;
+	} te_entry;
+};
+
+/* Return pointer to npf_tblent_t from RB-tree node. (XXX fix rb-tree) */
+#define	NPF_RBN2TBLENT(n)		\
+    (npf_tblent_t *)((uintptr_t)n - offsetof(npf_tblent_t, te_entry.rbnode))
+
+LIST_HEAD(npf_hashl, npf_tblent);
+
+/* Table structure. */
+struct npf_table {
+	char				t_name[16];
+	/* Lock and reference count. */
+	krwlock_t			t_lock;
+	u_int				t_refcnt;
+	/* Table ID. */
+	u_int				t_id;
+	/* The storage type can be: 1. Hash 2. RB-tree. */
+	u_int				t_type;
+	struct npf_hashl *		t_hashl;
+	u_long				t_hashmask;
+	struct rb_tree			t_rbtree;
+};
+
+/* Global table array and its lock. */
+static npf_tableset_t *		table_array;
+static krwlock_t		table_lock;
+static pool_cache_t		tblent_cache;
+
+/*
+ * npf_table_sysinit: initialise tableset structures.
+ */
+int
+npf_tableset_sysinit(void)
+{
+
+	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
+	    0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
+	if (tblent_cache == NULL) {
+		return ENOMEM;
+	}
+	table_array = npf_tableset_create();
+	if (table_array == NULL) {
+		pool_cache_destroy(tblent_cache);
+		return ENOMEM;
+	}
+	rw_init(&table_lock);
+	return 0;
+}
+
+void
+npf_tableset_sysfini(void)
+{
+
+	npf_tableset_destroy(table_array);
+	pool_cache_destroy(tblent_cache);
+	rw_destroy(&table_lock);
+}
+
+npf_tableset_t *
+npf_tableset_create(void)
+{
+	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
+
+	return kmem_zalloc(sz, KM_SLEEP);
+}
+
+void
+npf_tableset_destroy(npf_tableset_t *tblset)
+{
+	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
+	npf_table_t *t;
+	u_int tid;
+
+	/*
+	 * Destroy all tables (no references should be held, as ruleset
+	 * should be destroyed before).
+	 */
+	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
+		t = tblset[tid];
+		if (t != NULL) {
+			npf_table_destroy(t);
+		}
+	}
+	kmem_free(tblset, sz);
+}
+
+/*
+ * npf_tableset_insert: insert the table into the specified tableset.
+ *
+ * => Returns 0 on success, fails and returns errno if ID is already used.
+ */
+int
+npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
+{
+	const u_int tid = t->t_id;
+	int error;
+
+	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
+
+	if (tblset[tid] == NULL) {
+		tblset[tid] = t;
+		error = 0;
+	} else {
+		error = EEXIST;
+	}
+	return error;
+}
+
+/*
+ * npf_tableset_reload: replace old tableset array with a new one.
+ *
+ * => Called from npf_ruleset_reload() with a global ruleset lock held.
+ * => Returns pointer to the old tableset, caller will destroy it.
+ */
+npf_tableset_t *
+npf_tableset_reload(npf_tableset_t *tblset)
+{
+	npf_tableset_t *oldtblset;
+
+	rw_enter(&table_lock, RW_WRITER);
+	oldtblset = table_array;
+	table_array = tblset;
+	rw_exit(&table_lock);
+
+	return oldtblset;
+}
+
+/*
+ * Red-black tree storage.
+ */
+
+static signed int
+table_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2)
+{
+	const npf_tblent_t *te1 = NPF_RBN2TBLENT(n1);
+	const npf_tblent_t *te2 = NPF_RBN2TBLENT(n2);
+	const in_addr_t x = te1->te_addr & te1->te_mask;
+	const in_addr_t y = te2->te_addr & te2->te_mask;
+
+	if (x < y)
+		return 1;
+	if (x > y)
+		return -1;
+	return 0;
+}
+
+static signed int
+table_rbtree_cmp_key(const struct rb_node *n1, const void *key)
+{
+	const npf_tblent_t *te = NPF_RBN2TBLENT(n1);
+	const in_addr_t x = te->te_addr & te->te_mask;
+	const in_addr_t y = *(const in_addr_t *)key;
+
+	if (x < y)
+		return 1;
+	if (x > y)
+		return -1;
+	return 0;
+}
+
+static const struct rb_tree_ops table_rbtree_ops = {
+	.rbto_compare_nodes = table_rbtree_cmp_nodes,
+	.rbto_compare_key = table_rbtree_cmp_key
+};
+
+/*
+ * Hash helper routine.
+ */
+
+static inline struct npf_hashl *
+table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
+{
+	const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
+
+	return &t->t_hashl[hidx & t->t_hashmask];
+}
+
+/*
+ * npf_table_create: create table with a specified ID.
+ */
+npf_table_t *
+npf_table_create(u_int tid, int type, size_t hsize)
+{
+	npf_table_t *t;
+
+	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
+
+	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
+	switch (type) {
+	case NPF_TABLE_RBTREE:
+		rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
+		break;
+	case NPF_TABLE_HASH:
+		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
+		if (t->t_hashl == NULL) {
+			kmem_free(t, sizeof(npf_table_t));
+			return NULL;
+		}
+		break;
+	default:
+		KASSERT(false);
+	}
+	rw_init(&t->t_lock);
+	t->t_type = type;
+	t->t_refcnt = 1;
+	t->t_id = tid;
+	return t;
+}
+
+/*
+ * npf_table_destroy: free all table entries and table itself.
+ */
+void
+npf_table_destroy(npf_table_t *t)
+{
+	npf_tblent_t *e;
+	struct rb_node *nd;
+	u_int n;
+
+	switch (t->t_type) {
+	case NPF_TABLE_HASH:
+		for (n = 0; n <= t->t_hashmask; n++) {
+			while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
+				LIST_REMOVE(e, te_entry.hashq);
+				pool_cache_put(tblent_cache, e);
+			}
+		}
+		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
+		break;
+	case NPF_TABLE_RBTREE:
+		while ((nd = rb_tree_iterate(&t->t_rbtree, NULL,
+		    RB_DIR_RIGHT)) != NULL) {
+			e = NPF_RBN2TBLENT(nd);
+			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
+			pool_cache_put(tblent_cache, e);
+		}
+		break;
+	default:
+		KASSERT(false);
+	}
+	rw_destroy(&t->t_lock);
+	kmem_free(t, sizeof(npf_table_t));
+}
+
+/*
+ * npf_table_ref: holds the reference on table.
+ *
+ * => Table must be locked.
+ */
+void
+npf_table_ref(npf_table_t *t)
+{
+
+	KASSERT(rw_lock_held(&t->t_lock));
+	atomic_inc_uint(&t->t_refcnt);
+}
+
+/*
+ * npf_table_unref: drop reference from the table and destroy the table if
+ * it is the last reference.
+ */
+void
+npf_table_unref(npf_table_t *t)
+{
+
+	if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
+		return;
+	}
+	npf_table_destroy(t);
+}
+
+/*
+ * npf_table_get: find the table according to ID and "get it" by locking it.
+ */
+npf_table_t *
+npf_table_get(npf_tableset_t *tset, u_int tid)
+{
+	npf_table_t *t;
+
+	if ((u_int)tid >= NPF_TABLE_SLOTS) {
+		return NULL;
+	}
+	if (tset) {
+		t = tset[tid];
+		if (t != NULL) {
+			rw_enter(&t->t_lock, RW_READER);
+		}
+		return t;
+	}
+	rw_enter(&table_lock, RW_READER);
+	t = table_array[tid];
+	if (t != NULL) {
+		rw_enter(&t->t_lock, RW_READER);
+	}
+	rw_exit(&table_lock);
+	return t;
+}
+
+/*
+ * npf_table_put: "put table back" by unlocking it.
+ */
+void
+npf_table_put(npf_table_t *t)
+{
+
+	rw_exit(&t->t_lock);
+}
+
+/*
+ * npf_table_check: validate ID and type.
+ * */
+int
+npf_table_check(npf_tableset_t *tset, u_int tid, int type)
+{
+
+	if ((u_int)tid >= NPF_TABLE_SLOTS) {
+		return EINVAL;
+	}
+	if (tset[tid] != NULL) {
+		return EEXIST;
+	}
+	if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
+		return EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
+ */
+int
+npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
+    in_addr_t addr, in_addr_t mask)
+{
+	struct npf_hashl *htbl;
+	npf_tblent_t *e, *it;
+	npf_table_t *t;
+	in_addr_t val;
+	int error = 0;
+
+	/* Allocate and setup entry. */
+	e = pool_cache_get(tblent_cache, PR_WAITOK);
+	if (e == NULL) {
+		return ENOMEM;
+	}
+	e->te_addr = addr;
+	e->te_mask = mask;
+
+	/* Locks the table. */
+	t = npf_table_get(tset, tid);
+	if (__predict_false(t == NULL)) {
+		pool_cache_put(tblent_cache, e);
+		return EINVAL;
+	}
+	switch (t->t_type) {
+	case NPF_TABLE_HASH:
+		/* Generate hash value from: address & mask. */
+		val = addr & mask;
+		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
+		/* Lookup to check for duplicates. */
+		LIST_FOREACH(it, htbl, te_entry.hashq) {
+			if (it->te_addr == addr && it->te_mask == mask)
+				break;
+		}
+		/* If no duplicate - insert entry. */
+		if (__predict_true(it == NULL)) {
+			LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
+		} else {
+			error = EEXIST;
+		}
+		break;
+	case NPF_TABLE_RBTREE:
+		/* Insert entry.  Returns false, if duplicate. */
+		if (!rb_tree_insert_node(&t->t_rbtree, &e->te_entry.rbnode)) {
+			error = EEXIST;
+		}
+		break;
+	default:
+		KASSERT(false);
+	}
+	npf_table_put(t);
+
+	if (__predict_false(error)) {
+		pool_cache_put(tblent_cache, e);
+	}
+	return error;
+}
+
+/*
+ * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
+ */
+int
+npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
+    in_addr_t addr, in_addr_t mask)
+{
+	struct npf_hashl *htbl;
+	struct rb_node *nd;
+	npf_tblent_t *e;
+	npf_table_t *t;
+	in_addr_t val;
+	int error;
+
+	e = NULL;
+
+	/* Locks the table. */
+	t = npf_table_get(tset, tid);
+	if (__predict_false(t == NULL)) {
+		return EINVAL;
+	}
+	/* Lookup & remove. */
+	switch (t->t_type) {
+	case NPF_TABLE_HASH:
+		/* Generate hash value from: (address & mask). */
+		val = addr & mask;
+		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
+		LIST_FOREACH(e, htbl, te_entry.hashq) {
+			if (e->te_addr == addr && e->te_mask == mask)
+				break;
+		}
+		if (__predict_true(e != NULL)) {
+			LIST_REMOVE(e, te_entry.hashq);
+		} else {
+			error = ESRCH;
+		}
+		break;
+	case NPF_TABLE_RBTREE:
+		/* Key: (address & mask). */
+		val = addr & mask;
+		nd = rb_tree_find_node(&t->t_rbtree, &val);
+		if (__predict_true(nd != NULL)) {
+			e = NPF_RBN2TBLENT(nd);
+			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
+		} else {
+			error = ESRCH;
+		}
+		break;
+	default:
+		KASSERT(false);
+	}
+	npf_table_put(t);
+
+	/* Free table the entry. */
+	if (__predict_true(e != NULL)) {
+		pool_cache_put(tblent_cache, e);
+	}
+	return e ? 0 : -1;
+}
+
+/*
+ * npf_table_match_v4addr: find the table according to ID, lookup and
+ * match the contents with specified IPv4 address.
+ */
+int
+npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
+{
+	struct npf_hashl *htbl;
+	struct rb_node *nd;
+	npf_tblent_t *e;
+	npf_table_t *t;
+
+	e = NULL;
+
+	/* Locks the table. */
+	t = npf_table_get(NULL, tid);
+	if (__predict_false(t == NULL)) {
+		return EINVAL;
+	}
+	switch (t->t_type) {
+	case NPF_TABLE_HASH:
+		htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
+		LIST_FOREACH(e, htbl, te_entry.hashq) {
+			if ((ip4addr & e->te_mask) == e->te_addr) {
+				break;
+			}
+		}
+		break;
+	case NPF_TABLE_RBTREE:
+		nd = rb_tree_find_node(&t->t_rbtree, &ip4addr);
+		e = NPF_RBN2TBLENT(nd);
+		KASSERT((ip4addr & e->te_mask) == e->te_addr);
+		break;
+	default:
+		KASSERT(false);
+	}
+	npf_table_put(t);
+
+	return e ? 0 : -1;
+}
--- a/usr.sbin/Makefile	Sun Aug 22 18:01:01 2010 +0000
+++ b/usr.sbin/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.245 2010/08/04 17:13:33 christos Exp $
+#	$NetBSD: Makefile,v 1.246 2010/08/22 18:56:23 rmind Exp $
 #	from: @(#)Makefile	5.20 (Berkeley) 6/12/93
 
 .include <bsd.own.mk>
@@ -52,6 +52,11 @@
 SUBDIR+= racoon racoonctl
 .endif
 
+# NPF
+.if (${MKNPF} != "no")
+SUBDIR+=npf
+.endif
+
 # IP Filter
 .if (${MKIPFILTER} != "no")
 SUBDIR+=ipf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,5 @@
+# $NetBSD: Makefile,v 1.1 2010/08/22 18:56:23 rmind Exp $
+
+SUBDIR=		npfctl
+
+.include <bsd.subdir.mk>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/Makefile.inc	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,7 @@
+# $NetBSD: Makefile.inc,v 1.1 2010/08/22 18:56:23 rmind Exp $
+
+WARNS?=		4
+
+.if exists(${.CURDIR}/../../Makefile.inc)
+.include "${.CURDIR}/../../Makefile.inc"
+.endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/Makefile	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,14 @@
+# $NetBSD: Makefile,v 1.1 2010/08/22 18:56:23 rmind Exp $
+
+PROG=		npfctl
+MAN=		npfctl.8 npf.conf.8
+
+SRCS=		npfctl.c npf_parser.c npf_data.c npf_ncgen.c
+
+LDADD+=		-lprop
+DPADD+=		${LIBPROP}
+
+WARNS?=		4
+NOLINT=		# defined (note: deliberately)
+
+.include <bsd.prog.mk>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npf.conf.8	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,169 @@
+.\"	$NetBSD: npf.conf.8,v 1.1 2010/08/22 18:56:23 rmind Exp $
+.\"
+.\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This material is based upon work partially supported by The
+.\" NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 22, 2010
+.Dt NPF.CONF 8
+.Os
+.Sh NAME
+.Nm npf.conf
+.Nd NPF packet filter configuration file
+.\" -----
+.Sh DESCRIPTION
+.Nm
+is the default configuration file for NPF packet filter.
+It can contain definitions, grouped rules, and tables.
+.Sh DEFINITIONS
+Definitions are general purpose keywords which can be used in the
+ruleset to make it more flexible and easier to manage.
+Most commonly, definitions are used to define one of the following:
+IP addresses, networks, ports, or interfaces.
+Definitions can contain multiple elements.
+.Sh GROUPS
+Having one huge ruleset for all interfaces or directions might be
+inefficient; therefore, NPF requires that all rules be defined within groups.
+Groups can be thought of as higher level rules which have subrules.
+The main properties of a group are its interface and traffic direction.
+Packets matching group criteria are passed to the ruleset of that group.
+If a packet does not match any group, it is passed to the default group.
+The default group must always be defined.
+.Sh RULES
+Rules, which are the main part of NPF configuration, describe the criteria
+used to inspect and make decisions about packets.
+Currently, NPF supports filtering on the following criteria: interface,
+traffic direction, protocol, IPv4 address or network, and TCP/UDP port
+or range.
+Supported actions are blocking or passing the packet.
+.Pp
+Each rule has a priority, which is set according to its order in the ruleset.
+Rules defined first are accordingly inspected first.
+All rules in the group are inspected sequentially, and the last matching
+dictates the action to be taken.
+Rules, however, may be explicitly marked as final (that is, "quick").
+In such cases, processing stops after encountering the first matching rule
+marked as final.
+If there is no matching rule in the custom group, then rules in the default
+group will be inspected.
+.Pp
+Definitions (prefixed with "$") and tables (specified by an ID within
+"\*[Lt]\*[Gt]" marks) can be used in the filter options of rules.
+.Sh TABLES
+Certain configurations might use very large sets of IP addresses or change
+sets frequently.
+Storing large IP sets in the configuration file or performing frequent
+reloads can have a significant performance cost.
+.Pp
+In order to achieve high performance, NPF has tables.
+NPF tables provide separate storage designed for large IP sets and frequent
+updates without reloading the entire ruleset.
+Tables can be managed dynamically or loaded from a separate file, which
+is useful for large static tables.
+There are two types of storage: "tree" (red-black tree is used) and
+"hash".
+.Sh NAT
+Special rules for Network Address Translation (NAT) can be added.
+Translation is performed on specified interface, assigning a specified
+address of said interface.
+Minimal filtering criteria on local network and destination are provided.
+.\" -----
+.Sh GRAMMAR
+.Bd -literal
+line		= ( def | table | nat | group )
+
+def		= ( "{ a, b, ... }" | "text" | "$\*[Lt]interface\*[Gt]" )
+iface		= ( \*[Lt]interface\*[Gt] | def )
+
+table		= "table" \*[Lt]tid\*[Gt] "type" ( "hash" | "tree" )
+		  ( "dynamic" | "file" \*[Lt]path\*[Gt] )
+
+nat		= "nat" iface "from" \*[Lt]addr/mask\*[Gt] "to" \*[Lt]addr/mask\*[Gt] "->" \*[Lt]addr\*[Gt]
+
+group		= "group" "(" ( "default" | group-opts ) "") ruleset
+group-opts	= "interface" iface "," [ "in" | "out" ]
+
+ruleset		= "{" rule1 \*[Lt]newline\*[Gt], rule2 \*[Lt]newline\*[Gt], ... "}"
+
+rule		= ( "block" | "pass" ) [ "in" | out" ] rule-opts
+		  [ "on" iface ] [ "inet" | "inet6" ] [ "proto" \*[Lt]protocol\*[Gt] ]
+		  ( "all" | filt-opts )
+
+rule-opts	= [ "log" ] [ "count" ] [ "quick" ]
+filt-opts	= [ "from" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
+		  [ "to" ( iface | def | \*[Lt]addr/mask\*[Gt] | \*[Lt]tid\*[Gt] ) port-opts ]
+port-opts	= [ "port" ( \*[Lt]port-num\*[Gt] | \*[Lt]port-from\*[Gt] ":" \*[Lt]port-to\*[Gt] | def ) ]
+.Ed
+.\" -----
+.Sh FILES
+.Bl -tag -width /dev/npf.conf -compact
+.It Pa /dev/npf
+control device
+.It Pa /etc/npf.conf
+default configuration file
+.El
+.\" -----
+.Sh EXAMPLES
+.Bd -literal
+ext_if = "wm0"
+int_if = "wm1"
+
+services_tcp = "{ http, https, smtp, domain, 6000 }"
+services_udp = "{ domain, ntp, 6000 }"
+
+table "1" type "hash" file "/etc/npf_blacklist"
+table "2" type "tree" dynamic
+
+nat $ext_if from 192.168.0.0/24 to 0.0.0.0/0 -> $ext_if
+
+group (name "external", interface $ext_if) {
+	block in quick from \*[Lt]1\*[Gt]
+	pass out quick from $ext_if keep state
+
+	pass in log quick inet proto tcp to $ext_if port ssh
+	pass in quick proto tcp to $ext_if port $services_tcp
+	pass in quick proto udp to $ext_if port $services_udp
+	pass in quick proto tcp to $ext_if port 49151:65535	# Passive FTP
+	pass in quick proto udp to $ext_if port 33434:33600	# Traceroute
+}
+
+group (name "internal", interface $int_if) {
+	block in all
+	pass in quick from \*[Lt]2\*[Gt]
+	pass out quick all
+}
+
+group (default) {
+        block all
+}
+.Ed
+.\" -----
+.Sh SEE ALSO
+.Xr npfctl 8 ,
+.Xr npf_ncode 9
+.Sh HISTORY
+NPF first appeared in
+.Nx 6.0 .
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npf_data.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,579 @@
+/*	$NetBSD: npf_data.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF proplib(9) dictionary producer.
+ *
+ * XXX: Needs some clean-up.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+
+#include <arpa/inet.h>
+#include <prop/proplib.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <err.h>
+#include <ifaddrs.h>
+#include <netdb.h>
+#include <assert.h>
+
+#include "npfctl.h"
+
+static struct ifaddrs *		ifs_list = NULL;
+
+static prop_dictionary_t	npf_dict, settings_dict;
+static prop_array_t		nat_arr, tables_arr, rules_arr;
+
+static pri_t			gr_prio_counter = 1;
+static pri_t			rl_prio_counter = 1;
+static pri_t			nat_prio_counter = 1;
+
+void
+npfctl_init_data(void)
+{
+	prop_number_t ver;
+
+	if (getifaddrs(&ifs_list) == -1)
+		err(EXIT_FAILURE, "getifaddrs");
+
+	npf_dict = prop_dictionary_create();
+
+	ver = prop_number_create_integer(NPF_VERSION);
+	prop_dictionary_set(npf_dict, "version", ver);
+
+	nat_arr = prop_array_create();
+	prop_dictionary_set(npf_dict, "nat", nat_arr);
+
+	settings_dict = prop_dictionary_create();
+	prop_dictionary_set(npf_dict, "settings", settings_dict);
+
+	tables_arr = prop_array_create();
+	prop_dictionary_set(npf_dict, "tables", tables_arr);
+
+	rules_arr = prop_array_create();
+	prop_dictionary_set(npf_dict, "rules", rules_arr);
+}
+
+int
+npfctl_ioctl_send(int fd)
+{
+	int ret = 0, errval;
+
+#ifdef DEBUG
+	prop_dictionary_externalize_to_file(npf_dict, "/tmp/npf.plist");
+#else
+	errval = prop_dictionary_send_ioctl(npf_dict, fd, IOC_NPF_RELOAD);
+	if (errval) {
+		errx(EXIT_FAILURE, "npf_ioctl_send: %s\n", strerror(errval));
+		ret = -1;
+	}
+#endif
+	prop_object_release(npf_dict);
+	return ret;
+}
+
+/*
+ * Helper routines:
+ *
+ *	npfctl_getif() - get interface addresses and index number from name.
+ *	npfctl_servname2port() - get service ports from name.
+ *	npfctl_parse_v4mask() - parse address/mask integers from CIDR block.
+ */
+
+static struct ifaddrs *
+npfctl_getif(char *ifname, unsigned int *if_idx)
+{
+	struct ifaddrs *ifent;
+	struct sockaddr_in *sin;
+
+	for (ifent = ifs_list; ifent != NULL; ifent = ifent->ifa_next) {
+		sin = (struct sockaddr_in *)ifent->ifa_addr;
+
+		if (sin->sin_family != AF_INET)
+			continue;
+		if (strcmp(ifent->ifa_name, ifname) == 0)
+			break;
+	}
+	if (ifent) {
+		*if_idx = if_nametoindex(ifname);
+	}
+	return ifent;
+}
+
+static int
+npfctl_servname2port(char *name)
+{
+	struct servent *se;
+
+	se = getservbyname(name, NULL);
+	return se ? se->s_port : -1;
+}
+
+bool
+npfctl_parse_v4mask(char *str, in_addr_t *addr, in_addr_t *mask)
+{
+	char *p = strchr(str, '/');
+	u_int bits;
+
+	/* In network byte order. */
+	if (p) {
+		*p++ = '\0';
+		bits = (u_int)atoi(p);
+		*mask = bits ? htonl(0xffffffff << (32 - bits)) : 0;
+	} else {
+		*mask = 0xffffffff;
+	}
+	return inet_aton(str, (struct in_addr *)addr) != 0;
+}
+
+static void
+npfctl_parse_cidr(char *str, in_addr_t *addr, in_addr_t *mask)
+{
+
+	if (isalpha((unsigned char)*str)) {
+		struct ifaddrs *ifa;
+		struct sockaddr_in *sin;
+		u_int idx;
+
+		if ((ifa = npfctl_getif(str, &idx)) == NULL) {
+			errx(EXIT_FAILURE, "invalid interface '%s'", str);
+		}
+		/* Interface address. */
+		sin = (struct sockaddr_in *)ifa->ifa_addr;
+		*addr = sin->sin_addr.s_addr;
+		*mask = 0xffffffff;
+
+	} else if (!npfctl_parse_v4mask(str, addr, mask)) {
+		errx(EXIT_FAILURE, "invalid CIDR '%s'\n", str);
+	}
+}
+
+/*
+ * NPF table creation and construction routines.
+ */
+
+prop_dictionary_t
+npfctl_lookup_table(char *tidstr)
+{
+	prop_dictionary_t tl;
+	prop_object_iterator_t it;
+	prop_object_t obj;
+	u_int tid;
+
+	if ((it = prop_array_iterator(tables_arr)) == NULL)
+		err(EXIT_FAILURE, "prop_array_iterator");
+
+	tid = atoi(tidstr);
+	while ((tl = prop_object_iterator_next(it)) != NULL) {
+		obj = prop_dictionary_get(tl, "id");
+		if (tid == prop_number_integer_value(obj))
+			break;
+	}
+	return tl;
+}
+
+prop_dictionary_t
+npfctl_mk_table(void)
+{
+	prop_dictionary_t tl;
+	prop_array_t tlist;
+
+	tl = prop_dictionary_create();
+	tlist = prop_array_create();
+	prop_dictionary_set(tl, "entries", tlist);
+
+	return tl;
+}
+
+void
+npfctl_table_setup(prop_dictionary_t tl, char *idstr, char *typestr)
+{
+	prop_number_t typenum;
+	unsigned int id;
+
+	id = atoi(idstr);
+	/* TODO: 1. check ID range 2. check if not a duplicate */
+	prop_dictionary_set(tl, "id", prop_number_create_integer(id));
+
+	if (strcmp(typestr, "hash")) {
+		typenum = prop_number_create_integer(NPF_TABLE_HASH);
+	} else if (strcmp(typestr, "tree")) {
+		typenum = prop_number_create_integer(NPF_TABLE_RBTREE);
+	} else {
+		errx(EXIT_FAILURE, "invalid table type '%s'\n", typestr);
+	}
+	prop_dictionary_set(tl, "type", typenum);
+}
+
+void
+npfctl_construct_table(prop_dictionary_t tl, char *fname)
+{
+	prop_dictionary_t entdict;
+	prop_array_t tblents;
+	char *buf;
+	FILE *fp;
+	size_t n;
+	int l;
+
+	tblents = prop_dictionary_get(tl, "entries");
+	assert(tblents != NULL);
+
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		err(EXIT_FAILURE, "fopen");
+	}
+	l = 1;
+	buf = NULL;
+	while (getline(&buf, &n, fp) != -1) {
+		in_addr_t addr, mask;
+
+		if (*buf == '\n' || *buf == '#')
+			continue;
+
+		/* IPv4 CIDR: a.b.c.d/mask */
+		if (!npfctl_parse_v4mask(buf, &addr, &mask))
+			errx(EXIT_FAILURE, "invalid table entry at line %d", l);
+
+		/* Create and add table entry. */
+		entdict = prop_dictionary_create();
+		prop_dictionary_set(entdict, "addr",
+		    prop_number_create_integer(addr));
+		prop_dictionary_set(entdict, "mask",
+		    prop_number_create_integer(mask));
+		prop_array_add(tblents, entdict);
+		l++;
+	}
+	if (buf != NULL) {
+		free(buf);
+	}
+}
+
+void
+npfctl_add_table(prop_dictionary_t tl)
+{
+
+	prop_array_add(tables_arr, tl);
+}
+
+/*
+ * npfctl_mk_rule: create a rule (or group) dictionary.
+ *
+ * Note: group is a rule containing subrules.  It has no n-code, however.
+ */
+prop_dictionary_t
+npfctl_mk_rule(bool group)
+{
+	prop_dictionary_t rl;
+	prop_array_t subrl;
+	pri_t pri;
+
+	rl = prop_dictionary_create();
+	if (group) {
+		subrl = prop_array_create();
+		prop_dictionary_set(rl, "subrules", subrl);
+		/* Give new priority, reset rule priority counter. */
+		pri = gr_prio_counter++;
+		rl_prio_counter = 1;
+	} else {
+		pri = rl_prio_counter++;
+	}
+	prop_dictionary_set(rl, "priority",
+	    prop_number_create_integer(pri));
+
+	return rl;
+}
+
+void
+npfctl_add_rule(prop_dictionary_t rl, prop_dictionary_t parent)
+{
+	prop_array_t rlset;
+
+	if (parent) {
+		rlset = prop_dictionary_get(parent, "subrules");
+		assert(rlset != NULL);
+	} else {
+		rlset = rules_arr;
+	}
+	prop_array_add(rlset, rl);
+}
+
+void
+npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface)
+{
+	prop_number_t attrnum;
+
+	attrnum = prop_number_create_integer(attr);
+	prop_dictionary_set(rl, "attributes", attrnum);
+	if (iface) {
+		prop_number_t ifnum;
+		unsigned int if_idx;
+
+		if (npfctl_getif(iface, &if_idx) == NULL) {
+			errx(EXIT_FAILURE, "invalid interface '%s'", iface);
+		}
+		ifnum = prop_number_create_integer(if_idx);
+		prop_dictionary_set(rl, "interface", ifnum);
+	}
+}
+
+/*
+ * Main rule generation routines.
+ */
+
+static void
+npfctl_rulenc_v4cidr(void **nc, int nblocks[], var_t *dat, bool sd)
+{
+	element_t *el = dat->v_elements;
+	int foff;
+
+	/* If table, generate a single table matching block. */
+	if (dat->v_type == VAR_TABLE) {
+		u_int tid = atoi(el->e_data);
+
+		nblocks[0]--;
+		foff = npfctl_failure_offset(nblocks);
+		npfctl_gennc_tbl(nc, foff, tid, sd);
+		return;
+	}
+
+	/* Generate v4 CIDR matching blocks. */
+	for (el = dat->v_elements; el != NULL; el = el->e_next) {
+		in_addr_t addr, mask;
+
+		npfctl_parse_cidr(el->e_data, &addr, &mask);
+
+		nblocks[1]--;
+		foff = npfctl_failure_offset(nblocks);
+		npfctl_gennc_v4cidr(nc, foff, addr, mask, sd);
+	}
+}
+
+static void
+npfctl_rulenc_ports(void **nc, int nblocks[], var_t *dat, bool tcpudp, bool sd)
+{
+	element_t *el = dat->v_elements;
+	int foff;
+
+	assert(dat->v_type != VAR_TABLE);
+
+	/* Generate TCP/UDP port matching blocks. */
+	for (el = dat->v_elements; el != NULL; el = el->e_next) {
+		int pfrom, pto;
+		char *sep;
+
+		if ((sep = strchr(el->e_data, ':')) != NULL) {
+			/* Port range (only numeric). */
+			*sep = '\0';
+		}
+		if (isalpha((unsigned char)*el->e_data)) {
+			pfrom = npfctl_servname2port(el->e_data);
+			if (pfrom == -1) {
+				errx(EXIT_FAILURE, "invalid service '%s'",
+				    el->e_data);
+			}
+		} else {
+			pfrom = htons(atoi(el->e_data));
+		}
+		pto = sep ? htons(atoi(sep + 1)) : pfrom;
+
+		nblocks[0]--;
+		foff = npfctl_failure_offset(nblocks);
+		npfctl_gennc_ports(nc, foff, pfrom, pto, tcpudp, sd);
+	}
+}
+
+static void
+npfctl_rulenc_block(void **nc, int nblocks[], var_t *cidr, var_t *ports,
+    bool both, bool tcpudp, bool sd)
+{
+
+	npfctl_rulenc_v4cidr(nc, nblocks, cidr, sd);
+	if (ports == NULL) {
+		return;
+	}
+	npfctl_rulenc_ports(nc, nblocks, ports, tcpudp, sd);
+	if (!both) {
+		return;
+	}
+	npfctl_rulenc_ports(nc, nblocks, ports, !tcpudp, sd);
+}
+
+void
+npfctl_rule_protodata(prop_dictionary_t rl, char *proto, var_t *from,
+    var_t *fports, var_t *to, var_t *tports)
+{
+	prop_data_t ncdata;
+	bool icmp, tcpudp, both;
+	int nblocks[2] = { 0, 0 };
+	void *ncptr, *nc;
+	size_t sz;
+
+	/*
+	 * Default: both TCP and UDP.
+	 */
+	icmp = false;
+	tcpudp = true;
+	both = false;
+	if (proto == NULL) {
+		goto skip_proto;
+	}
+
+	if (strcmp(proto, "icmp") == 0) {
+		/* ICMP case. */
+		fports = NULL;
+		tports = NULL;
+		icmp = true;
+		nblocks[0] += 1;
+
+	} else if (strcmp(proto, "tcp") == 0) {
+		/* Just TCP. */
+		tcpudp = true;
+
+	} else if (strcmp(proto, "udp") == 0) {
+		/* Just UDP. */
+		tcpudp = false;
+
+	} else {
+		/* Default. */
+	}
+skip_proto:
+
+	/* Calculate how blocks to determince n-code. */
+	if (from && from->v_count) {
+		if (from->v_type == VAR_TABLE)
+			nblocks[0] += 1;
+		else
+			nblocks[1] += from->v_count;
+		if (fports && fports->v_count)
+			nblocks[0] += fports->v_count * (both ? 2 : 1);
+	}
+	if (to && to->v_count) {
+		if (to->v_type == VAR_TABLE)
+			nblocks[0] += 1;
+		else
+			nblocks[1] += to->v_count;
+		if (tports && tports->v_count)
+			nblocks[0] += tports->v_count * (both ? 2 : 1);
+	}
+
+	/* Allocate memory for the n-code. */
+	sz = npfctl_calc_ncsize(nblocks);
+	ncptr = malloc(sz);
+	if (ncptr == NULL) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+	nc = ncptr;
+
+	/* Ethernet fragment (ETHERTYPE_IP), XXX. */
+	npfctl_gennc_ether(&nc, npfctl_failure_offset(nblocks), htons(0x0800));
+
+	/* Generate v4 CIDR matching blocks and TCP/UDP port matching. */
+	if (from) {
+		npfctl_rulenc_block(&nc, nblocks, from, fports,
+		    both, tcpudp, true);
+	}
+	if (to) {
+		npfctl_rulenc_block(&nc, nblocks, to, tports,
+		    both, tcpudp, false);
+	}
+	/* ICMP case. */
+	if (icmp) {
+		const int foff = npfctl_failure_offset(nblocks);
+		npfctl_gennc_icmp(&nc, foff, -1, -1);
+	}
+	npfctl_gennc_complete(&nc);
+
+	if ((uintptr_t)nc - (uintptr_t)ncptr != sz)
+		errx(EXIT_FAILURE, "n-code size got wrong (%lu != %lu)",
+		    (uintptr_t)nc - (uintptr_t)ncptr, sz);
+
+#ifdef DEBUG
+	uint32_t *op = ncptr;
+	size_t n = sz;
+	do {
+		DPRINTF(("\t> |0x%02x|\n", (u_int)*op));
+		op++;
+		n -= sizeof(*op);
+	} while (n);
+#endif
+
+	/* Create a final memory block of data, ready to send. */
+	ncdata = prop_data_create_data(ncptr, sz);
+	if (ncdata == NULL) {
+		perror("prop_data_create_data");
+		exit(EXIT_FAILURE);
+	}
+	prop_dictionary_set(rl, "ncode", ncdata);
+	free(ncptr);
+}
+
+/*
+ * NAT policy construction routines.
+ */
+
+prop_dictionary_t
+npfctl_mk_nat(void)
+{
+	prop_dictionary_t rl;
+	pri_t pri;
+
+	/* NAT policy is rule with extra info. */
+	rl = prop_dictionary_create();
+	pri = nat_prio_counter++;
+	prop_dictionary_set(rl, "priority",
+	    prop_number_create_integer(pri));
+	return rl;
+}
+
+void
+npfctl_add_nat(prop_dictionary_t nat)
+{
+	prop_array_add(nat_arr, nat);
+}
+
+void
+npfctl_nat_setup(prop_dictionary_t rl, char *iface, char *gwip)
+{
+	const int attr = NPF_RULE_PASS | NPF_RULE_OUT | NPF_RULE_FINAL;
+	in_addr_t addr, mask;
+
+	/* Interface and attributes. */
+	npfctl_rule_setattr(rl, attr, iface);
+
+	/* Gateway IP, XXX should be no mask. */
+	npfctl_parse_cidr(gwip, &addr, &mask);
+	prop_dictionary_set(rl, "gateway_ip", prop_number_create_integer(addr));
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npf_ncgen.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,218 @@
+/*	$NetBSD: npf_ncgen.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * N-code generation.
+ *
+ * WARNING: Update npfctl_calc_ncsize() and npfctl_failure_offset()
+ * calculations, when changing generation routines.
+ */
+
+#include <sys/types.h>
+
+#include "npfctl.h"
+
+/*
+ * npfctl_calc_ncsize: calculate size required for the n-code.
+ */
+size_t
+npfctl_calc_ncsize(int nblocks[])
+{
+	/*
+	 * Blocks:
+	 * - 13 words by npfctl_gennc_ether(), single initial block.
+	 * - 5 words each by npfctl_gennc_ports/tbl(), stored in nblocks[0].
+	 * - 6 words each by npfctl_gennc_v4cidr(), stored in nblocks[1].
+	 * - 4 words by npfctl_gennc_complete(), single last fragment.
+	 */
+	return nblocks[0] * 5 * sizeof(uint32_t) +
+	    nblocks[1] * 6 * sizeof(uint32_t) +
+	    13 * sizeof(uint32_t) +
+	    4 * sizeof(uint32_t);
+}
+
+/*
+ * npfctl_failure_offset: calculate offset value to the failure block.
+ */
+size_t
+npfctl_failure_offset(int nblocks[])
+{
+	size_t tblport_blocks, v4cidr_blocks;
+	/*
+	 * Take into account all blocks (plus 2 words for comparison each),
+	 * and additional 4 words to skip the last comparison and success path.
+	 */
+	tblport_blocks = (3 + 2) * nblocks[0];
+	v4cidr_blocks = (4 + 2) * nblocks[1];
+	return tblport_blocks + v4cidr_blocks + 4;
+}
+
+/*
+ * npfctl_gennc_ether: initial n-code fragment to check Ethernet frame.
+ */
+void
+npfctl_gennc_ether(void **ncptr, int foff, uint16_t ethertype)
+{
+	uint32_t *nc = *ncptr;
+
+	/* NPF handler will set REG_0 to either NPF_LAYER_2 or NPF_LAYER_3. */
+	*nc++ = NPF_OPCODE_CMP;
+	*nc++ = NPF_LAYER_3;
+	*nc++ = 0;
+
+	/* Skip all further code, if layer 3. */
+	*nc++ = NPF_OPCODE_BEQ;
+	*nc++ = 0x0a;
+
+	/* Otherwise, assume layer 2 and perform NPF_OPCODE_ETHER. */
+	*nc++ = NPF_OPCODE_ETHER;
+	*nc++ = 0x00;		/* reserved */
+	*nc++ = 0x00;		/* reserved */
+	*nc++ = ethertype;
+
+	/* Fail (+ 2 words of ADVR) or advance to layer 3 (IPv4) header. */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff + 2;
+	/* Offset to the header is returned by NPF_OPCODE_ETHER in REG_3. */
+	*nc++ = NPF_OPCODE_ADVR;
+	*nc++ = 3;
+
+	/* + 13 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
+ * npfctl_gennc_v4cidr: fragment to match IPv4 CIDR.
+ */
+void
+npfctl_gennc_v4cidr(void **ncptr, int foff,
+    in_addr_t netaddr, in_addr_t subnet, bool sd)
+{
+	uint32_t *nc = *ncptr;
+
+	/* OP, direction, netaddr/subnet (4 words) */
+	*nc++ = NPF_OPCODE_IP4MASK;
+	*nc++ = (sd ? 0x01 : 0x00);
+	*nc++ = netaddr;
+	*nc++ = subnet;
+
+	/* If not equal, jump to failure block, continue otherwise (2 words). */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff;
+
+	/* + 6 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
+ * npfctl_gennc_ports: fragment to match TCP or UDP ports.
+ */
+void
+npfctl_gennc_ports(void **ncptr, int foff,
+    in_port_t pfrom, in_port_t pto, bool tcpudp, bool sd)
+{
+	uint32_t *nc = *ncptr;
+
+	/* OP, direction, port range (3 words). */
+	*nc++ = (tcpudp ? NPF_OPCODE_TCP_PORTS : NPF_OPCODE_UDP_PORTS);
+	*nc++ = (sd ? 0x01 : 0x00);
+	*nc++ = ((uint32_t)pfrom << 16) | pto;
+
+	/* If not equal, jump to failure block, continue otherwise (2 words). */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff;
+
+	/* + 5 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
+ * npfctl_gennc_icmp: fragment to match ICMP code and type.
+ */
+void
+npfctl_gennc_icmp(void **ncptr, int foff, int code, int type)
+{
+	uint32_t *nc = *ncptr;
+
+	/* OP, code, type (3 words) */
+	*nc++ = NPF_OPCODE_ICMP4;
+	*nc++ = code;
+	*nc++ = type;
+
+	/* If not equal, jump to failure block, continue otherwise (2 words). */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff;
+
+	/* + 5 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
+ * npfctl_gennc_tbl: fragment to match IPv4 source/destination address of
+ * the packet against table specified by ID.
+ */
+void
+npfctl_gennc_tbl(void **ncptr, int foff, u_int tid, bool sd)
+{
+	uint32_t *nc = *ncptr;
+
+	/* OP, direction, table ID (3 words). */
+	*nc++ = NPF_OPCODE_IP4TABLE;
+	*nc++ = (sd ? 0x01 : 0x00);
+	*nc++ = tid;
+
+	/* If not equal, jump to failure block, continue otherwise (2 words). */
+	*nc++ = NPF_OPCODE_BNE;
+	*nc++ = foff;
+
+	/* + 5 words. */
+	*ncptr = (void *)nc;
+}
+
+/*
+ * npfctl_gennc_complete: append success and failure fragments.
+ */
+void
+npfctl_gennc_complete(void **ncptr)
+{
+	uint32_t *nc = *ncptr;
+
+	/* Success path (return 0x0). */
+	*nc++ = NPF_OPCODE_RET;
+	*nc++ = 0x0;
+
+	/* Failure path (return 0xff). */
+	*nc++ = NPF_OPCODE_RET;
+	*nc++ = 0xff;
+
+	/* + 4 words. */
+	*ncptr = (void *)nc;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npf_parser.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,621 @@
+/*	$NetBSD: npf_parser.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * XXX: This needs clean-up!
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <err.h>
+
+#include "npfctl.h"
+
+/*
+ * Few ugly helpers.
+ */
+
+#define	PARSE_ERR()			(-1)
+
+#define	PARSE_TOKEN(_arg_)					\
+	if ((p = strtok_r(_arg_, " \t", &sptr)) == NULL)	\
+		return PARSE_ERR();
+
+#define	PARSE_FIRST_TOKEN()		PARSE_TOKEN(p)
+#define	PARSE_NEXT_TOKEN()		PARSE_TOKEN(NULL)
+#define	PARSE_NEXT_TOKEN_NOCHECK()	p = strtok_r(NULL, " \t", &sptr)
+
+/*
+ * Global variable list.
+ *
+ * npfctl_lookup_varlist(): lookups the list by key.
+ */
+
+static var_t *			var_list = NULL;
+
+static var_t *
+npfctl_lookup_varlist(char *key)
+{
+	var_t *it;
+
+	for (it = var_list; it != NULL; it = it->v_next)
+		if (strcmp(it->v_key, key) == 0)
+			break;
+	return it;
+}
+
+/*
+ * npfctl_parsevalue: helper function to parse a value.
+ *
+ * => Value could be a single element (no quotes),
+ * => or an array of elements between { }.
+ */
+static var_t *
+npfctl_parsevalue(char *buf)
+{
+	var_t *vr = NULL;
+	element_t *el = NULL, *it = NULL;
+	char *p = buf, *tend, *sptr;
+
+	switch (*p) {
+	case '$':
+		/* Definition - lookup. */
+		vr = npfctl_lookup_varlist(++p);
+		if (vr == NULL) {
+			errx(EXIT_FAILURE, "invalid variable '%s'", p);
+		}
+		break;
+	case '{':
+		/* Array. */
+		vr = zalloc(sizeof(var_t));
+		p = strtok_r(buf, ", \t", &sptr);
+		while (p) {
+			if (*p == '}')
+				break;
+			el = zalloc(sizeof(element_t));
+			el->e_data = xstrdup(p);
+			el->e_next = it;
+			vr->v_count++;
+			it = el;
+			p = strtok_r(NULL, ", \t", &sptr);
+		}
+		if (el) {
+			vr->v_type = VAR_ARRAY;
+			vr->v_elements = el;
+		} else {
+			free(vr);
+			vr = NULL;
+		}
+		break;
+	case '<':
+		/* Table. */
+		if ((tend = strchr(++p, '>')) == NULL) {
+			return NULL;
+		}
+		*tend = '\0';
+		if (npfctl_lookup_table(p) == NULL) {
+			errx(EXIT_FAILURE, "invalid table '%s'", p);
+		}
+		vr = zalloc(sizeof(var_t));
+		vr->v_type = VAR_TABLE;
+		/* FALLTHROUGH */
+	default:
+		/* Data. */
+		el = zalloc(sizeof(element_t));
+		el->e_data = xstrdup(p);
+		if (vr == NULL) {
+			vr = zalloc(sizeof(var_t));
+			vr->v_type = VAR_SINGLE;
+		}
+		vr->v_elements = el;
+		vr->v_count = 1;
+	}
+	return vr;
+}
+
+/*
+ * npfctl_parserule: main routine to parse a rule.  Syntax:
+ *
+ *	{ pass | block | count } [ in | out ] [ log ] [ quick ]
+ *	    [on <if>] [inet | inet6 ] proto <array>
+ *	    from <addr/mask> port <port(s)|range>
+ *	    too <addr/mask> port <port(s)|range>
+ *	    [ keep state ]
+ */
+static inline int
+npfctl_parserule(char *buf, prop_dictionary_t rl)
+{
+	var_t *from_cidr = NULL, *fports = NULL;
+	var_t *to_cidr = NULL, *tports = NULL;
+	char *proto = NULL;
+	char *p, *sptr, *iface;
+	int ret, attr = 0;
+
+	DPRINTF(("rule\t|%s|\n", buf));
+
+	p = buf;
+	PARSE_FIRST_TOKEN();
+
+	/* pass or block (mandatory) */
+	if (strcmp(p, "block") == 0) {
+		attr = 0;
+	} else if (strcmp(p, "pass") == 0) {
+		attr = NPF_RULE_PASS;
+	} else {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN();
+
+	/* in or out */
+	if (strcmp(p, "in") == 0) {
+		attr |= NPF_RULE_IN;
+		PARSE_NEXT_TOKEN();
+	} else if (strcmp(p, "out") == 0) {
+		attr |= NPF_RULE_OUT;
+		PARSE_NEXT_TOKEN();
+	} else {
+		attr |= (NPF_RULE_IN | NPF_RULE_OUT);
+	}
+
+	/* log (XXX: NOP) */
+	if (strcmp(p, "log") == 0) {
+		attr |= NPF_RULE_LOG;
+		PARSE_NEXT_TOKEN();
+	}
+
+	/* count */
+	if (strcmp(p, "count") == 0) {
+		attr |= NPF_RULE_COUNT;
+		PARSE_NEXT_TOKEN();
+	}
+
+	/* quick */
+	if (strcmp(p, "quick") == 0) {
+		attr |= NPF_RULE_FINAL;
+		PARSE_NEXT_TOKEN();
+	}
+
+	/* on <interface> */
+	if (strcmp(p, "on") == 0) {
+		var_t *ifvar;
+		element_t *el;
+
+		PARSE_NEXT_TOKEN();
+		if ((ifvar = npfctl_parsevalue(p)) == NULL)
+			return PARSE_ERR();
+		if (ifvar->v_type != VAR_SINGLE) {
+			errx(EXIT_FAILURE, "invalid interface value '%s'", p);
+		}
+		el = ifvar->v_elements;
+		iface = el->e_data;
+
+		PARSE_NEXT_TOKEN();
+	} else {
+		iface = NULL;
+	}
+
+	/* inet, inet6 (TODO) */
+	if (strcmp(p, "inet") == 0) {
+		PARSE_NEXT_TOKEN();
+	} else if (strcmp(p, "inet6") == 0) {
+		PARSE_NEXT_TOKEN();
+	}
+
+	/* proto <proto> */
+	if (strcmp(p, "proto") == 0) {
+		PARSE_NEXT_TOKEN();
+		var_t *pvar = npfctl_parsevalue(p);
+		PARSE_NEXT_TOKEN();
+		element_t *el = pvar->v_elements;
+		proto = el->e_data;
+	}
+
+	/*
+	 * Can be: "all", "from", "to" or "from + to".
+	 */
+
+	if (strcmp(p, "all") == 0) {
+		/* Should be no "from"/"to" after it. */
+		PARSE_NEXT_TOKEN_NOCHECK();
+		goto last;
+	}
+
+	ret = PARSE_ERR();
+
+	/* from <addr> port <port | range> */
+	if (strcmp(p, "from") == 0) {
+		PARSE_NEXT_TOKEN();
+		from_cidr = npfctl_parsevalue(p);
+
+		PARSE_NEXT_TOKEN_NOCHECK();
+		if (p && strcmp(p, "port") == 0) {
+			PARSE_NEXT_TOKEN();
+			fports = npfctl_parsevalue(p);
+			PARSE_NEXT_TOKEN_NOCHECK();
+		}
+		ret = 0;
+	}
+
+	/* to <addr> port <port | range> */
+	if (p && strcmp(p, "to") == 0) {
+		PARSE_NEXT_TOKEN();
+		to_cidr = npfctl_parsevalue(p);
+
+		PARSE_NEXT_TOKEN_NOCHECK();
+		if (p && strcmp(p, "port") == 0) {
+			PARSE_NEXT_TOKEN();
+			tports = npfctl_parsevalue(p);
+			PARSE_NEXT_TOKEN_NOCHECK();
+		}
+		ret = 0;
+	}
+
+	if (ret) {
+		return ret;
+	}
+last:
+	/* keep state */
+	if (p && strcmp(p, "keep") == 0) {
+		attr |= NPF_RULE_KEEPSTATE;
+		PARSE_NEXT_TOKEN();
+	}
+
+	/* Set the rule attributes and interface, if any. */
+	npfctl_rule_setattr(rl, attr, iface);
+
+	/*
+	 * Generate all protocol data.
+	 */
+	npfctl_rule_protodata(rl, proto, from_cidr, fports, to_cidr, tports);
+	return 0;
+}
+
+/*
+ * npfctl_parsegroup: parse group definition.  Syntax:
+ *
+ *	group (name <name>, interface <if>, [ in | out ]) { <rules> }
+ *	group (default) { <rules> }
+ */
+
+#define	GROUP_ATTRS	(NPF_RULE_PASS | NPF_RULE_FINAL)
+
+static inline int
+npfctl_parsegroup(char *buf, prop_dictionary_t rl)
+{
+	char *p = buf, *end, *sptr, *iface;
+	int attr_dir;
+
+	DPRINTF(("group\t|%s|\n", buf));
+
+	p = strchr(p, '(');
+	if (p == NULL)
+		return -1;
+	*p = '\0';
+	end = strchr(++p, ')');
+	if (end == NULL)
+		return -1;
+	*end = '\0';
+	if (strchr(++end, '{') == NULL)
+		return -1;
+	while (isspace((unsigned char)*p))
+		p++;
+
+	/*
+	 * If default group - no other options.
+	 */
+	if (strcmp(p, "default") == 0) {
+		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
+		npfctl_rule_setattr(rl,
+		    GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL);
+		return 0;
+	}
+
+	PARSE_FIRST_TOKEN();
+
+	/* Name of the group (mandatory). */
+	if (strcmp(p, "name") == 0) {
+		PARSE_NEXT_TOKEN()
+		if (*p != '"')
+			return -1;
+		if ((end = strchr(++p, '"')) == NULL)
+			return -1;
+		*end = '\0';
+		/* TODO: p == name */
+		PARSE_NEXT_TOKEN_NOCHECK();
+	}
+
+	/* Interface for this group (optional). */
+	if (p && strcmp(p, "interface") == 0) {
+		var_t *ifvar;
+		element_t *el;
+
+		PARSE_NEXT_TOKEN();
+		if ((ifvar = npfctl_parsevalue(p)) == NULL)
+			return -1;
+		if (ifvar->v_type != VAR_SINGLE) {
+			errx(EXIT_FAILURE, "invalid key '%s'", ifvar->v_key);
+		}
+		el = ifvar->v_elements;
+		iface = el->e_data;
+		PARSE_NEXT_TOKEN_NOCHECK();
+	} else {
+		iface = NULL;
+	}
+
+	/* Direction (optional). */
+	if (p == NULL) {
+		attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
+	} else {
+		if (strcmp(p, "in") == 0)
+			attr_dir = NPF_RULE_IN;
+		else if (strcmp(p, "out") == 0)
+			attr_dir = NPF_RULE_OUT;
+		else
+			return -1;
+	}
+	npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface);
+	return 0;
+}
+
+/*
+ * npfctl_parsetable: parse table definition.
+ *
+ *	table <num> type <t> [ dynamic | file <path> ]
+ */
+static inline int
+npfctl_parsetable(char *buf, prop_dictionary_t tl)
+{
+	char *p, *sptr;
+	char *id_ptr, *type_ptr, *fname;
+
+	DPRINTF(("table\t|%s|\n", buf));
+
+	/* Name of the set. */
+	if ((p = strchr(buf, '"')) == NULL) {
+		return PARSE_ERR();
+	}
+	id_ptr = ++p;
+	p = strchr(p, '"');
+	*p++ = '\0';
+
+	PARSE_FIRST_TOKEN();
+
+	/* Table type (mandatory). */
+	if (strcmp(p, "type") != 0) {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN_NOCHECK();
+	if (p == NULL || *p != '"') {
+		return PARSE_ERR();
+	}
+	type_ptr = p;
+	if ((p = strchr(++p, '"')) == NULL) {
+		return PARSE_ERR();
+	}
+	*p = '\0';
+
+	/*
+	 * Setup the table.
+	 */
+	npfctl_table_setup(tl, id_ptr, type_ptr);
+	PARSE_NEXT_TOKEN();
+
+	/* Dynamic. */
+	if (strcmp(p, "dynamic") == 0) {
+		/* No other options. */
+		return 0;
+	}
+
+	/* File. */
+	if (strcmp(p, "file") != 0) {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN();
+	fname = ++p;
+	p = strchr(p, '"');
+	*p = '\0';
+
+	/* Construct the table. */
+	npfctl_construct_table(tl, fname);
+	return 0;
+}
+
+/*
+ * npfctl_parse_nat: parse NAT policy definition.
+ *
+ *	nat on <if> from <localnet> to <filter> -> <ip>
+ */
+static inline int
+npfctl_parse_nat(char *buf, prop_dictionary_t nat)
+{
+	var_t *ifvar, *from_cidr, *to_cidr, *ip;
+	element_t *iface, *cidr;
+	char *p, *sptr;
+
+	DPRINTF(("nat\t|%s|\n", buf));
+	if ((p = strchr(buf, ' ')) == NULL) {
+		return PARSE_ERR();
+	}
+	PARSE_FIRST_TOKEN();
+
+	/* on <interface> */
+	if ((ifvar = npfctl_parsevalue(p)) == NULL) {
+		return PARSE_ERR();
+	}
+	if (ifvar->v_type != VAR_SINGLE) {
+		errx(EXIT_FAILURE, "invalid interface value '%s'", p);
+	} else {
+		iface = ifvar->v_elements;
+	}
+	PARSE_NEXT_TOKEN();
+
+	/* from <addr> */
+	if (strcmp(p, "from") != 0) {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN();
+	from_cidr = npfctl_parsevalue(p);
+	PARSE_NEXT_TOKEN();
+
+	/* to <addr> */
+	if (strcmp(p, "to") != 0) {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN();
+	to_cidr = npfctl_parsevalue(p);
+	PARSE_NEXT_TOKEN();
+
+	/* -> <ip> */
+	if (strcmp(p, "->") != 0) {
+		return PARSE_ERR();
+	}
+	PARSE_NEXT_TOKEN();
+	ip = npfctl_parsevalue(p);
+	cidr = ip->v_elements;
+
+	/* Setup NAT policy (rule as filter and extra info). */
+	npfctl_rule_protodata(nat, NULL, from_cidr, NULL, to_cidr, NULL);
+	npfctl_nat_setup(nat, iface->e_data, cidr->e_data);
+	return 0;
+}
+
+/*
+ * npfctl_parsevar: parse defined variable.
+ *
+ * => Assigned value should be with double quotes (").
+ * => Value can be an array, use npf_parsevalue().
+ * => Insert variable into the global list.
+ */
+static inline int
+npfctl_parsevar(char *buf)
+{
+	char *s = buf, *p, *key;
+	var_t *vr;
+
+	DPRINTF(("def\t|%s|\n", buf));
+
+	if ((p = strpbrk(s, "= \t")) == NULL)
+		return -1;
+
+	/* Validation of '='. */
+	if (*p != '=' && strchr(p, '=') == NULL)
+		return -1;
+	*p = '\0';
+	key = s;
+
+	/* Check for duplicates. */
+	if (npfctl_lookup_varlist(key))
+		return -1;
+
+	/* Parse quotes before. */
+	if ((s = strchr(p + 1, '"')) == NULL)
+		return -1;
+	if ((p = strchr(++s, '"')) == NULL)
+		return -1;
+	*p = '\0';
+
+	if ((vr = npfctl_parsevalue(s)) == NULL)
+		return -1;
+	vr->v_key = xstrdup(key);
+	vr->v_next = var_list;
+	var_list = vr;
+	return 0;
+}
+
+/*
+ * npf_parseline: main function parsing a single configuration line.
+ *
+ * => Distinguishes 'group', rule (in-group), 'table' and definitions.
+ * => Tracks begin-end of the group i.e. in-group state.
+ */
+int
+npf_parseline(char *buf)
+{
+	static prop_dictionary_t curgr = NULL;
+	char *p = buf;
+	int ret;
+
+	/* Skip emptry lines and comments. */
+	while (isspace((unsigned char)*p))
+		p++;
+	if (*p == '\0' || *p == '\n' || *p == '#')
+		return 0;
+
+	/* At first, check if inside the group. */
+	if (curgr) {
+		prop_dictionary_t rl;
+
+		/* End of the group. */
+		if (*p == '}') {
+			curgr = NULL;
+			return 0;
+		}
+		/* Rule. */
+		rl = npfctl_mk_rule(false);
+		ret = npfctl_parserule(p, rl);
+		if (ret)
+			return ret;
+		npfctl_add_rule(rl, curgr);
+
+	} else if (strncmp(p, "group", 5) == 0) {
+
+		/* Group. */
+		curgr = npfctl_mk_rule(true);
+		ret = npfctl_parsegroup(p, curgr);
+		if (ret)
+			return ret;
+		npfctl_add_rule(curgr, NULL);
+
+	} else if (strncmp(p, "table", 5) == 0) {
+		prop_dictionary_t tl;
+
+		/* Table. */
+		tl = npfctl_mk_table();
+		ret = npfctl_parsetable(p, tl);
+		if (ret)
+			return ret;
+		npfctl_add_table(tl);
+
+	} else if (strncmp(p, "nat", 3) == 0) {
+		prop_dictionary_t nat;
+
+		/* NAT policy. */
+		nat = npfctl_mk_nat();
+		ret = npfctl_parse_nat(p, nat);
+		if (ret)
+			return ret;
+		npfctl_add_nat(nat);
+
+	} else {
+		/* Defined variable or syntax error. */
+		ret = npfctl_parsevar(p);
+	}
+	return ret;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.8	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,123 @@
+.\"	$NetBSD: npfctl.8,v 1.1 2010/08/22 18:56:24 rmind Exp $
+.\"
+.\" Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This material is based upon work partially supported by The
+.\" NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 22, 2010
+.Dt NPFCTL 8
+.Os
+.Sh NAME
+.Nm npfctl
+.Nd control NPF packet filter
+.Sh SYNOPSIS
+.Nm npfctl
+.Ar command
+.Op Ar arguments
+.\" -----
+.Sh DESCRIPTION
+The
+.Nm
+command can be used to control the NPF packet filter.
+For a description of NPF's configuration file, see
+.Xr npf.conf 8 .
+.Pp
+The first argument,
+.Ar command ,
+specifies the action to take.
+Valid commands are:
+.Bl -tag -width reload
+.It start
+Enable packet inspection using the currently loaded configuration, if any.
+Note that this command does not load or reload the configuration.
+.It stop
+Disable packet inspection.
+This command does not change the currently loaded configuration.
+.It reload Op Ar path
+Load or reload configuration from file.
+The configuration file at
+.Pa /etc/npf.conf
+will be used unless a file is specified by
+.Ar path .
+The reload operation (i.e., replacing the ruleset) is atomic.
+.It flush
+Flush configuration.
+That is, remove all rules and tables.
+This command does not disable packet inspection.
+.It table Ar tid
+List all entries in the currently loaded table specified by
+.Ar tid .
+Fail if
+.Ar tid
+does not exist.
+.It table Ar tid Aq Ar addr/mask
+Query the table
+.Ar tid
+for a specific IPv4 CIDR, specified by
+.Ar addr/mask .
+If no mask is specified, a single host is assumed.
+.It table Ar tid Ar [ add | rem ] Aq Ar addr/mask
+In table
+.Ar tid ,
+add or remove the IPv4 CIDR specified by
+.Aq Ar addr/mask .
+.El
+.\" -----
+.Sh PERFORMANCE
+Reloading the configuration is a relatively expensive operation.
+Therefore, frequent reloads should be avoided.
+Use of tables should be considered as an alternative design.
+See
+.Xr npf.conf 8
+for details.
+.\" -----
+.Sh FILES
+.Bl -tag -width /etc/npf.conf -compact
+.It Pa /dev/npf
+control device
+.It Pa /etc/npf.conf
+default configuration file
+.El
+.\" -----
+.Sh EXAMPLES
+Starting the NPF packet filter:
+.Bd -literal -offset indent
+# npfctl reload
+# npfctl start
+.Ed
+.Pp
+Addition and removal of entries in the table whose ID is 2:
+.Bd -literal -offset indent
+# npfctl table 2 add 10.0.0.1
+# npfctl table 2 rem 182.168.0.0/24
+.Ed
+.\" -----
+.Sh SEE ALSO
+.Xr npf.conf 8 ,
+.Xr npf_ncode 9
+.Sh HISTORY
+NPF first appeared in
+.Nx 6.0 .
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.c	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,229 @@
+/*	$NetBSD: npfctl.c,v 1.1 2010/08/22 18:56:24 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "npfctl.h"
+
+#define	NPFCTL_START		1
+#define	NPFCTL_STOP		2
+#define	NPFCTL_RELOAD		3
+#define	NPFCTL_FLUSH		4
+#define	NPFCTL_TABLE		5
+
+static struct operations_s {
+	const char *		cmd;
+	int			action;
+} operations[] = {
+	/* Start, stop, reload */
+	{	"start",	NPFCTL_START	},
+	{	"stop",		NPFCTL_STOP	},
+	{	"reload",	NPFCTL_RELOAD	},
+	{	"flush",	NPFCTL_FLUSH	},
+	/* Table */
+	{	"table",	NPFCTL_TABLE	},
+	/* --- */
+	{	NULL,		0		}
+};
+
+void *
+zalloc(size_t sz)
+{
+	void *p;
+
+	p = malloc(sz);
+	if (p == NULL) {
+		perror("zalloc");
+		exit(EXIT_FAILURE);
+	}
+	memset(p, 0, sz);
+	return p;
+}
+
+char *
+xstrdup(const char *s)
+{
+	char *p;
+
+	p = strdup(s);
+	if (p == NULL) {
+		perror("xstrdup");
+		exit(EXIT_FAILURE);
+	}
+	return p;
+}
+
+static void
+usage(void)
+{
+	const char *progname = getprogname();
+
+	fprintf(stderr,
+	    "usage:\t%s [ start | stop | reload ]\n",
+	    progname);
+	fprintf(stderr,
+	    "\t%s table <tid> [ flush ]\n",
+	    progname);
+	fprintf(stderr,
+	    "\t%s table <tid> { add | rem } <address/mask>\n",
+	    progname);
+
+	exit(EXIT_FAILURE);
+}
+
+static void
+npfctl_parsecfg(const char *cfg)
+{
+	char *buf, *p;
+	FILE *fp;
+	size_t n;
+	int l;
+
+	fp = fopen(cfg, "r");
+	if (fp == NULL) {
+		err(EXIT_FAILURE, "fopen");
+	}
+	l = 0;
+	buf = NULL;
+	while (getline(&buf, &n, fp) != -1) {
+		l++;
+		p = strpbrk(buf, "#\n");
+		if (p != NULL) {
+			*p = '\0';
+		}
+		if (npf_parseline(buf)) {
+			fprintf(stderr, "invalid syntax at line %d\n", l);
+			exit(EXIT_FAILURE);
+		}
+	}
+	if (buf != NULL) {
+		free(buf);
+	}
+}
+
+static void
+npfctl(int action, int argc, char **argv)
+{
+	int fd, ret, ver, boolval;
+	npf_ioctl_table_t tbl;
+	char *arg;
+
+#ifdef DEBUG
+	npfctl_init_data();
+	npfctl_parsecfg("npf.conf");
+	ret = npfctl_ioctl_send(fd);
+	return;
+#endif
+	fd = open(NPF_DEV_PATH, O_RDONLY);
+	if (fd == -1) {
+		err(EXIT_FAILURE, "cannot open " NPF_DEV_PATH);
+	}
+	ret = ioctl(fd, IOC_NPF_VERSION, &ver);
+	if (ver != NPF_VERSION) {
+		errx(EXIT_FAILURE, "incompatible npf interface version "
+		    "(%d, kernel %d)", NPF_VERSION, ver);
+	}
+	switch (action) {
+	case NPFCTL_START:
+		boolval = true;
+		ret = ioctl(fd, IOC_NPF_SWITCH, &boolval);
+		break;
+	case NPFCTL_STOP:
+		boolval = false;
+		ret = ioctl(fd, IOC_NPF_SWITCH, &boolval);
+		break;
+	case NPFCTL_RELOAD:
+		npfctl_init_data();
+		npfctl_parsecfg(argc < 3 ? NPF_CONF_PATH : argv[2]);
+		ret = npfctl_ioctl_send(fd);
+		break;
+	case NPFCTL_FLUSH:
+		/* Pass empty configuration to flush. */
+		npfctl_init_data();
+		ret = npfctl_ioctl_send(fd);
+		break;
+	case NPFCTL_TABLE:
+		if (argc < 5) {
+			usage();
+		}
+		tbl.nct_tid = atoi(argv[2]);
+		if (strcmp(argv[3], "add") == 0) {
+			tbl.nct_action = NPF_IOCTL_TBLENT_ADD;
+			arg = argv[4];
+		} else if (strcmp(argv[3], "rem") == 0) {
+			tbl.nct_action = NPF_IOCTL_TBLENT_REM;
+			arg = argv[4];
+		} else {
+			tbl.nct_action = 0;
+			arg = argv[3];
+		}
+		if (!npfctl_parse_v4mask(arg,
+		    &tbl.nct_addr, &tbl.nct_mask)) {
+			errx(EXIT_FAILURE, "invalid CIDR '%s'", arg);
+		}
+		ret = ioctl(fd, IOC_NPF_TABLE, &tbl);
+		break;
+	}
+	if (ret == -1) {
+		err(EXIT_FAILURE, "ioctl");
+	}
+	close(fd);
+}
+
+int
+main(int argc, char **argv)
+{
+	char *cmd;
+	int n;
+
+	if (argc < 2) {
+		usage();
+	}
+	cmd = argv[1];
+
+	/* Find and call the subroutine */
+	for (n = 0; operations[n].cmd != NULL; n++) {
+		if (strcmp(cmd, operations[n].cmd) != 0)
+			continue;
+		npfctl(operations[n].action, argc, argv);
+		break;
+	}
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.sbin/npf/npfctl/npfctl.h	Sun Aug 22 18:56:18 2010 +0000
@@ -0,0 +1,109 @@
+/*	$NetBSD: npfctl.h,v 1.1 2010/08/22 18:56:24 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NPFCTL_H_
+#define _NPFCTL_H_
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#ifndef _NPF_TESTING
+#include <net/npf.h>
+#include <net/npf_ncode.h>
+#else
+#include "npf.h"
+#include "npf_ncode.h"
+#endif
+
+#ifdef DEBUG
+#define	DPRINTF(x)	printf x
+#else
+#define	DPRINTF(x)
+#endif
+
+#define	NPF_DEV_PATH	"/dev/npf"
+#define	NPF_CONF_PATH	"/etc/npf.conf"
+
+typedef struct {
+	char *		e_data;
+	void *		e_next;
+} element_t;
+
+#define	VAR_SINGLE	1
+#define	VAR_ARRAY	2
+#define	VAR_TABLE	3
+
+typedef struct {
+	char *		v_key;
+	element_t *	v_elements;
+	int		v_type;
+	int		v_count;
+	void *		v_next;
+} var_t;
+
+void *		zalloc(size_t);
+char *		xstrdup(const char *);
+
+void		npfctl_init_data(void);
+int		npfctl_ioctl_send(int);
+
+bool		npfctl_parse_v4mask(char *, in_addr_t *, in_addr_t *);
+
+prop_dictionary_t npfctl_mk_rule(bool);
+void		npfctl_add_rule(prop_dictionary_t, prop_dictionary_t);
+void		npfctl_rule_setattr(prop_dictionary_t, int, char *);
+void		npfctl_rule_protodata(prop_dictionary_t, char *, var_t *,
+		    var_t *, var_t *, var_t *);
+void		npfctl_rule_icmpdata(prop_dictionary_t, var_t *, var_t *);
+
+prop_dictionary_t npfctl_lookup_table(char *);
+prop_dictionary_t npfctl_mk_table(void);
+void		npfctl_table_setup(prop_dictionary_t, char *, char *);
+void		npfctl_construct_table(prop_dictionary_t, char *);
+void		npfctl_add_table(prop_dictionary_t);
+
+prop_dictionary_t npfctl_mk_nat(void);
+void		npfctl_add_nat(prop_dictionary_t);
+void		npfctl_nat_setup(prop_dictionary_t, char *, char *);
+
+size_t		npfctl_calc_ncsize(int []);
+size_t		npfctl_failure_offset(int []);
+
+void		npfctl_gennc_ether(void **, int, uint16_t);
+void		npfctl_gennc_v4cidr(void **, int,
+		    in_addr_t, in_addr_t, bool);
+void		npfctl_gennc_icmp(void **, int, int, int);
+void		npfctl_gennc_ports(void **, int,
+		    in_port_t, in_port_t, bool, bool);
+void		npfctl_gennc_tbl(void **, int, u_int , bool);
+void		npfctl_gennc_complete(void **);
+
+int		npf_parseline(char *);
+
+#endif