NPF: partially rewrite the connection tracking mechanism: trunk
authorrmind <rmind@NetBSD.org>
Sat, 19 Jul 2014 18:24:16 +0000
branchtrunk
changeset 228579 813f06cf2d4f
parent 228578 5f4e418a4d72
child 228580 7f2d700c9d7e
NPF: partially rewrite the connection tracking mechanism: - Separate the tracking interface from the storage (state table) and thus prepare to use a new data structure for the storage. - Fix some race conditions in NAT association logic.
sys/modules/npf/Makefile
sys/net/npf/files.npf
sys/net/npf/npf.c
sys/net/npf/npf.h
sys/net/npf/npf_alg.c
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_conn.c
sys/net/npf/npf_conndb.c
sys/net/npf/npf_ctl.c
sys/net/npf/npf_handler.c
sys/net/npf/npf_if.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_nat.c
sys/net/npf/npf_session.c
sys/net/npf/npf_state.c
sys/net/npf/npf_state_tcp.c
sys/rump/net/lib/libnpf/Makefile
--- a/sys/modules/npf/Makefile	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/modules/npf/Makefile	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.16 2013/11/08 00:38:26 rmind Exp $
+# $NetBSD: Makefile,v 1.17 2014/07/19 18:24:17 rmind Exp $
 #
 # Public Domain.
 #
@@ -11,9 +11,9 @@
 
 SRCS=		npf.c npf_alg.c npf_conf.c npf_ctl.c npf_handler.c
 SRCS+=		npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
-SRCS+=		npf_ruleset.c npf_rproc.c npf_sendpkt.c npf_session.c
+SRCS+=		npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c
 SRCS+=		npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+=		npf_tableset_ptree.c npf_worker.c
+SRCS+=		npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
 
 CPPFLAGS+=	-DINET6
 
--- a/sys/net/npf/files.npf	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/files.npf	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.16 2013/11/08 00:38:26 rmind Exp $
+# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
 #
 # Public Domain.
 #
@@ -22,7 +22,8 @@
 file	net/npf/npf_tableset_ptree.c		npf
 file	net/npf/npf_if.c			npf
 file	net/npf/npf_inet.c			npf
-file	net/npf/npf_session.c			npf
+file	net/npf/npf_conn.c			npf
+file	net/npf/npf_conndb.c			npf
 file	net/npf/npf_state.c			npf
 file	net/npf/npf_state_tcp.c			npf
 file	net/npf/npf_nat.c			npf
--- a/sys/net/npf/npf.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.c,v 1.19 2014/03/16 05:20:30 dholland Exp $	*/
+/*	$NetBSD: npf.c,v 1.20 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.19 2014/03/16 05:20:30 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.20 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -52,6 +52,7 @@
 #include <sys/uio.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
 
 /*
  * Module and device structures.
@@ -100,7 +101,7 @@
 	npf_bpf_sysinit();
 	npf_worker_sysinit();
 	npf_tableset_sysinit();
-	npf_session_sysinit();
+	npf_conn_sysinit();
 	npf_nat_sysinit();
 	npf_alg_sysinit();
 	npf_ext_sysinit();
@@ -129,15 +130,15 @@
 #endif
 	npf_pfil_unregister(true);
 
-	/* Flush all sessions, destroy configuration (ruleset, etc). */
-	npf_session_tracking(false);
+	/* Flush all connections, destroy configuration (ruleset, etc). */
+	npf_conn_tracking(false);
 	npf_config_fini();
 
 	/* Finally, safe to destroy the subsystems. */
 	npf_ext_sysfini();
 	npf_alg_sysfini();
 	npf_nat_sysfini();
-	npf_session_sysfini();
+	npf_conn_sysfini();
 	npf_tableset_sysfini();
 	npf_bpf_sysfini();
 
@@ -226,10 +227,10 @@
 		error = npfctl_stats(data);
 		break;
 	case IOC_NPF_SESSIONS_SAVE:
-		error = npfctl_sessions_save(cmd, data);
+		error = npfctl_conn_save(cmd, data);
 		break;
 	case IOC_NPF_SESSIONS_LOAD:
-		error = npfctl_sessions_load(cmd, data);
+		error = npfctl_conn_load(cmd, data);
 		break;
 	case IOC_NPF_SWITCH:
 		error = npfctl_switch(data);
--- a/sys/net/npf/npf.h	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf.h	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf.h,v 1.42 2014/06/29 00:05:24 rmind Exp $	*/
+/*	$NetBSD: npf.h,v 1.43 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -45,7 +45,7 @@
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 
-#define	NPF_VERSION		14
+#define	NPF_VERSION		15
 
 /*
  * Public declarations and definitions.
@@ -326,7 +326,7 @@
 	/* Packets blocked. */
 	NPF_STAT_BLOCK_DEFAULT,
 	NPF_STAT_BLOCK_RULESET,
-	/* Session and NAT entries. */
+	/* Connection and NAT entries. */
 	NPF_STAT_SESSION_CREATE,
 	NPF_STAT_SESSION_DESTROY,
 	NPF_STAT_NAT_CREATE,
--- a/sys/net/npf/npf_alg.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_alg.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg.c,v 1.12 2014/02/17 02:38:46 rmind Exp $	*/
+/*	$NetBSD: npf_alg.c,v 1.13 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.12 2014/02/17 02:38:46 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.13 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -226,10 +226,10 @@
 	pserialize_read_exit(s);
 }
 
-npf_session_t *
-npf_alg_session(npf_cache_t *npc, nbuf_t *nbuf, int di)
+npf_conn_t *
+npf_alg_conn(npf_cache_t *npc, nbuf_t *nbuf, int di)
 {
-	npf_session_t *se = NULL;
+	npf_conn_t *con = NULL;
 	int s;
 
 	s = pserialize_read_enter();
@@ -238,9 +238,9 @@
 
 		if (!f->inspect)
 			continue;
-		if ((se = f->inspect(npc, nbuf, di)) != NULL)
+		if ((con = f->inspect(npc, nbuf, di)) != NULL)
 			break;
 	}
 	pserialize_read_exit(s);
-	return se;
+	return con;
 }
--- a/sys/net/npf/npf_alg_icmp.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.21 2014/06/08 12:12:56 spz Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.22 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.21 2014/06/08 12:12:56 spz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.22 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -49,6 +49,7 @@
 #include <net/pfil.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
 
 MODULE(MODULE_CLASS_MISC, npf_alg_icmp, "npf");
 
@@ -195,7 +196,7 @@
 }
 
 /*
- * npfa_icmp_session: ALG ICMP inspector.
+ * npfa_icmp_inspect: ALG ICMP inspector.
  *
  * => Returns true if "enpc" is filled.
  */
@@ -241,8 +242,8 @@
 	return true;
 }
 
-static npf_session_t *
-npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, int di)
+static npf_conn_t *
+npfa_icmp_conn(npf_cache_t *npc, nbuf_t *nbuf, int di)
 {
 	npf_cache_t enpc;
 
@@ -294,8 +295,8 @@
 		return false;
 	}
 
-	/* Lookup for a session using embedded packet. */
-	return npf_session_lookup(&enpc, nbuf, di, &forw);
+	/* Lookup a connection using the embedded packet. */
+	return npf_conn_lookup(&enpc, nbuf, di, &forw);
 }
 
 /*
@@ -414,7 +415,7 @@
 	static const npfa_funcs_t icmp = {
 		.match		= npfa_icmp_match,
 		.translate	= npfa_icmp_nat,
-		.inspect	= npfa_icmp_session,
+		.inspect	= npfa_icmp_conn,
 	};
 	alg_icmp = npf_alg_register("icmp", &icmp);
 	return alg_icmp ? 0 : ENOMEM;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_conn.c	Sat Jul 19 18:24:16 2014 +0000
@@ -0,0 +1,982 @@
+/*	$NetBSD: npf_conn.c,v 1.1 2014/07/19 18:24:16 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org>
+ * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF connection tracking for stateful filtering and translation.
+ *
+ * Overview
+ *
+ *	Connection direction is identified by the direction of its first
+ *	packet.  Packets can be incoming or outgoing with respect to an
+ *	interface.  To describe the packet in the context of connection
+ *	direction we will use the terms "forwards stream" and "backwards
+ *	stream".  All connections have two keys and thus two entries:
+ *
+ *		npf_conn_t::c_forw_entry for the forwards stream and
+ *		npf_conn_t::c_back_entry for the backwards stream.
+ *
+ *	The keys are formed from the 5-tuple (source/destination address,
+ *	source/destination port and the protocol).  Additional matching
+ *	is performed for the interface (a common behaviour is equivalent
+ *	to the 6-tuple lookup including the interface ID).  Note that the
+ *	key may be formed using translated values in a case of NAT.
+ *
+ *	Connections can serve two purposes: for the implicit passing or
+ *	to accommodate the dynamic NAT.  Connections for the former purpose
+ *	are created by the rules with "stateful" attribute and are used for
+ *	stateful filtering.  Such connections indicate that the packet of
+ *	the backwards stream should be passed without inspection of the
+ *	ruleset.  The other purpose is to associate a dynamic NAT mechanism
+ *	with a connection.  Such connections are created by the NAT policies
+ *	and they have a relationship with NAT translation structure via
+ *	npf_conn_t::c_nat.  A single connection can serve both purposes,
+ *	which is a common case.
+ *
+ * Connection life-cycle
+ *
+ *	Connections are established when a packet matches said rule or
+ *	NAT policy.  Both keys of the established connection are inserted
+ *	into the connection database.  A garbage collection thread
+ *	periodically scans all connections and depending on connection
+ *	properties (e.g. last activity time, protocol) removes connection
+ *	entries and expires the actual connections.
+ *
+ *	Each connection has a reference count.  The reference is acquired
+ *	on lookup and should be released by the caller.  It guarantees that
+ *	the connection will not be destroyed, although it may be expired.
+ *
+ * Synchronisation
+ *
+ *	Connection database is accessed in a lock-less manner by the main
+ *	routines: npf_conn_inspect() and npf_conn_establish().  Since they
+ *	are always called from a software interrupt, the database is
+ *	protected using passive serialisation.  The main place which can
+ *	destroy a connection is npf_conn_worker().  The database itself
+ *	can be replaced and destroyed in npf_conn_reload().
+ *
+ * ALG support
+ *
+ *	Application-level gateways (ALGs) can override generic connection
+ *	inspection (npf_alg_conn() call in npf_conn_inspect() function) by
+ *	performing their own lookup using different key.  Recursive call
+ *	to npf_conn_inspect() is not allowed.  The ALGs ought to use the
+ *	npf_conn_lookup() function for this purpose.
+ *
+ * Lock order
+ *
+ *	conn_lock ->
+ *		[ npf_config_lock -> ]
+ *			npf_hashbucket_t::cd_lock ->
+ *				npf_conn_t::c_lock
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.1 2014/07/19 18:24:16 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/kmem.h>
+#include <sys/kthread.h>
+#include <sys/mutex.h>
+#include <net/pfil.h>
+#include <sys/pool.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+
+#define __NPF_CONN_PRIVATE
+#include "npf_conn.h"
+#include "npf_impl.h"
+
+/*
+ * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction.
+ */
+CTASSERT(PFIL_ALL == (0x001 | 0x002));
+#define	CONN_ACTIVE	0x004	/* visible on inspection */
+#define	CONN_PASS	0x008	/* perform implicit passing */
+#define	CONN_EXPIRE	0x010	/* explicitly expire */
+#define	CONN_REMOVED	0x020	/* "forw/back" entries removed */
+
+/*
+ * Connection tracking state: disabled (off), enabled (on) or flush request.
+ */
+enum { CONN_TRACKING_OFF, CONN_TRACKING_ON, CONN_TRACKING_FLUSH };
+static volatile int	conn_tracking	__cacheline_aligned;
+
+/* Connection tracking database, connection cache and the lock. */
+static npf_conndb_t *	conn_db		__read_mostly;
+static pool_cache_t	conn_cache	__read_mostly;
+static kmutex_t		conn_lock	__cacheline_aligned;
+static kcondvar_t	conn_cv		__cacheline_aligned;
+
+static void	npf_conn_worker(void);
+static void	npf_conn_destroy(npf_conn_t *);
+
+/*
+ * npf_conn_sys{init,fini}: initialise/destroy connection tracking.
+ *
+ * Connection database is initialised when connection tracking gets
+ * enabled via npf_conn_tracking() interface.
+ */
+
+void
+npf_conn_sysinit(void)
+{
+	conn_cache = pool_cache_init(sizeof(npf_conn_t), coherency_unit,
+	    0, 0, "npfconpl", NULL, IPL_NET, NULL, NULL, NULL);
+	mutex_init(&conn_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&conn_cv, "npfconcv");
+	conn_tracking = CONN_TRACKING_OFF;
+	conn_db = NULL;
+
+	npf_worker_register(npf_conn_worker);
+}
+
+void
+npf_conn_sysfini(void)
+{
+	/* Disable tracking, flush all connections. */
+	npf_conn_tracking(false);
+	npf_worker_unregister(npf_conn_worker);
+
+	KASSERT(conn_tracking == CONN_TRACKING_OFF);
+	KASSERT(conn_db == NULL);
+	pool_cache_destroy(conn_cache);
+	mutex_destroy(&conn_lock);
+	cv_destroy(&conn_cv);
+}
+
+/*
+ * npf_conn_reload: perform the reload by flushing the current connection
+ * database and replacing with the new one or just destroying.
+ *
+ * Key routine synchronising with all other readers and writers.
+ */
+static void
+npf_conn_reload(npf_conndb_t *ndb, int tracking)
+{
+	npf_conndb_t *odb;
+
+	/* Must synchronise with G/C thread and connection saving/restoring. */
+	mutex_enter(&conn_lock);
+	while (conn_tracking == CONN_TRACKING_FLUSH) {
+		cv_wait(&conn_cv, &conn_lock);
+	}
+
+	/*
+	 * Set the flush status.  It disables connection inspection as well
+	 * as creation.  There may be some operations in-flight, drain them.
+	 */
+	npf_config_enter();
+	conn_tracking = CONN_TRACKING_FLUSH;
+	npf_config_sync();
+	npf_config_exit();
+
+	/* Notify the worker to G/C all connections. */
+	npf_worker_signal();
+	while (conn_tracking == CONN_TRACKING_FLUSH) {
+		cv_wait(&conn_cv, &conn_lock);
+	}
+
+	/* Install the new database, make it visible. */
+	odb = atomic_swap_ptr(&conn_db, ndb);
+	membar_sync();
+	conn_tracking = tracking;
+
+	/* Done.  Destroy the old database, if any. */
+	mutex_exit(&conn_lock);
+	if (odb) {
+		npf_conndb_destroy(odb);
+	}
+}
+
+/*
+ * npf_conn_tracking: enable/disable connection tracking.
+ */
+void
+npf_conn_tracking(bool track)
+{
+	if (conn_tracking == CONN_TRACKING_OFF && track) {
+		/* Disabled -> Enable. */
+		npf_conndb_t *cd = npf_conndb_create();
+		npf_conn_reload(cd, CONN_TRACKING_ON);
+		return;
+	}
+	if (conn_tracking == CONN_TRACKING_ON && !track) {
+		/* Enabled -> Disable. */
+		npf_conn_reload(NULL, CONN_TRACKING_OFF);
+		pool_cache_invalidate(conn_cache);
+		return;
+	}
+}
+
+static bool
+npf_conn_trackable_p(const npf_cache_t *npc)
+{
+	/*
+	 * Check if connection tracking is on.  Also, if layer 3 and 4 are
+	 * not cached - protocol is not supported or packet is invalid.
+	 */
+	if (conn_tracking != CONN_TRACKING_ON) {
+		return false;
+	}
+	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
+		return false;
+	}
+	return true;
+}
+
+/*
+ * npf_conn_conkey: construct a key for the connection lookup.
+ */
+bool
+npf_conn_conkey(const npf_cache_t *npc, npf_connkey_t *key, const bool forw)
+{
+	const u_int alen = npc->npc_alen;
+	const struct tcphdr *th;
+	const struct udphdr *uh;
+	u_int keylen, isrc, idst;
+	uint16_t id[2];
+
+	switch (npc->npc_proto) {
+	case IPPROTO_TCP:
+		KASSERT(npf_iscached(npc, NPC_TCP));
+		th = npc->npc_l4.tcp;
+		id[NPF_SRC] = th->th_sport;
+		id[NPF_DST] = th->th_dport;
+		break;
+	case IPPROTO_UDP:
+		KASSERT(npf_iscached(npc, NPC_UDP));
+		uh = npc->npc_l4.udp;
+		id[NPF_SRC] = uh->uh_sport;
+		id[NPF_DST] = uh->uh_dport;
+		break;
+	case IPPROTO_ICMP:
+		if (npf_iscached(npc, NPC_ICMP_ID)) {
+			const struct icmp *ic = npc->npc_l4.icmp;
+			id[NPF_SRC] = ic->icmp_id;
+			id[NPF_DST] = ic->icmp_id;
+			break;
+		}
+		return false;
+	case IPPROTO_ICMPV6:
+		if (npf_iscached(npc, NPC_ICMP_ID)) {
+			const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6;
+			id[NPF_SRC] = ic6->icmp6_id;
+			id[NPF_DST] = ic6->icmp6_id;
+			break;
+		}
+		return false;
+	default:
+		/* Unsupported protocol. */
+		return false;
+	}
+
+	/*
+	 * Finally, construct a key formed out of 32-bit integers.
+	 */
+	if (__predict_true(forw)) {
+		isrc = NPF_SRC, idst = NPF_DST;
+	} else {
+		isrc = NPF_DST, idst = NPF_SRC;
+	}
+
+	key->ck_key[0] = ((uint32_t)npc->npc_proto << 16) | (alen & 0xffff);
+	key->ck_key[1] = ((uint32_t)id[isrc] << 16) | id[idst];
+
+	if (__predict_true(alen == sizeof(in_addr_t))) {
+		key->ck_key[2] = npc->npc_ips[isrc]->s6_addr32[0];
+		key->ck_key[3] = npc->npc_ips[idst]->s6_addr32[0];
+		keylen = 4 * sizeof(uint32_t);
+	} else {
+		const u_int nwords = alen >> 2;
+		memcpy(&key->ck_key[2], npc->npc_ips[isrc], alen);
+		memcpy(&key->ck_key[2 + nwords], npc->npc_ips[idst], alen);
+		keylen = (2 + (nwords * 2)) * sizeof(uint32_t);
+	}
+	return true;
+}
+
+static __always_inline void
+connkey_set_addr(npf_connkey_t *key, const npf_addr_t *naddr, const int di)
+{
+	const u_int alen = key->ck_key[0] & 0xffff;
+	uint32_t *addr = &key->ck_key[2 + ((alen >> 2) * di)];
+
+	KASSERT(alen > 0);
+	memcpy(addr, naddr, alen);
+}
+
+static __always_inline void
+connkey_set_id(npf_connkey_t *key, const uint16_t id, const int di)
+{
+	const uint32_t oid = key->ck_key[1];
+	const u_int shift = 16 * !di;
+	const uint32_t mask = 0xffff0000 >> shift;
+
+	key->ck_key[1] = ((uint32_t)id << shift) | (oid & mask);
+}
+
+/*
+ * npf_conn_lookup: lookup if there is an established connection.
+ *
+ * => If found, we will hold a reference for the caller.
+ */
+npf_conn_t *
+npf_conn_lookup(const npf_cache_t *npc, const nbuf_t *nbuf,
+    const int di, bool *forw)
+{
+	npf_conn_t *con;
+	npf_connkey_t key;
+	u_int flags, cifid;
+	bool ok, pforw;
+
+	/* Construct a key and lookup for a connection in the store. */
+	if (!npf_conn_conkey(npc, &key, true)) {
+		return NULL;
+	}
+	con = npf_conndb_lookup(conn_db, &key, forw);
+	if (con == NULL) {
+		return NULL;
+	}
+	KASSERT(npc->npc_proto == con->c_proto);
+
+	/* Check if connection is active and not expired. */
+	flags = con->c_flags;
+	ok = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE;
+
+	if (__predict_false(!ok)) {
+		atomic_dec_uint(&con->c_refcnt);
+		return NULL;
+	}
+
+	/*
+	 * Match the interface and the direction of the connection entry
+	 * and the packet.
+	 */
+	cifid = con->c_ifid;
+	if (__predict_false(cifid && cifid != nbuf->nb_ifid)) {
+		atomic_dec_uint(&con->c_refcnt);
+		return NULL;
+	}
+	pforw = (flags & PFIL_ALL) == di;
+	if (__predict_false(*forw != pforw)) {
+		atomic_dec_uint(&con->c_refcnt);
+		return NULL;
+	}
+
+	/* Update the last activity time. */
+	getnanouptime(&con->c_atime);
+	return con;
+}
+
+/*
+ * npf_conn_inspect: lookup a connection and inspecting the protocol data.
+ *
+ * => If found, we will hold a reference for the caller.
+ */
+npf_conn_t *
+npf_conn_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di, int *error)
+{
+	npf_conn_t *con;
+	bool forw, ok;
+
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+	if (!npf_conn_trackable_p(npc)) {
+		return NULL;
+	}
+
+	/* Query ALG which may lookup connection for us. */
+	if ((con = npf_alg_conn(npc, nbuf, di)) != NULL) {
+		/* Note: reference is held. */
+		return con;
+	}
+	if (nbuf_head_mbuf(nbuf) == NULL) {
+		*error = ENOMEM;
+		return NULL;
+	}
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
+	/* Main lookup of the connection. */
+	if ((con = npf_conn_lookup(npc, nbuf, di, &forw)) == NULL) {
+		return NULL;
+	}
+
+	/* Inspect the protocol data and handle state changes. */
+	mutex_enter(&con->c_lock);
+	ok = npf_state_inspect(npc, nbuf, &con->c_state, forw);
+	mutex_exit(&con->c_lock);
+
+	if (__predict_false(!ok)) {
+		/* Invalid: let the rules deal with it. */
+		npf_conn_release(con);
+		npf_stats_inc(NPF_STAT_INVALID_STATE);
+		con = NULL;
+	}
+	return con;
+}
+
+/*
+ * npf_conn_establish: create a new connection, insert into the global list.
+ *
+ * => Connection is created with the reference held for the caller.
+ * => Connection will be activated on the first reference release.
+ */
+npf_conn_t *
+npf_conn_establish(npf_cache_t *npc, nbuf_t *nbuf, int di, bool per_if)
+{
+	npf_conn_t *con;
+
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
+
+	if (!npf_conn_trackable_p(npc)) {
+		return NULL;
+	}
+
+	/* Allocate and initialise the new connection. */
+	con = pool_cache_get(conn_cache, PR_NOWAIT);
+	if (__predict_false(!con)) {
+		return NULL;
+	}
+	NPF_PRINTF(("NPF: create conn %p\n", con));
+	npf_stats_inc(NPF_STAT_SESSION_CREATE);
+
+	/* Reference count and flags (indicate direction). */
+	mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
+	con->c_flags = (di & PFIL_ALL);
+	con->c_refcnt = 1;
+	con->c_rproc = NULL;
+	con->c_nat = NULL;
+
+	/* Initialize protocol state. */
+	if (!npf_state_init(npc, nbuf, &con->c_state)) {
+		goto err;
+	}
+
+	KASSERT(npf_iscached(npc, NPC_IP46));
+	npf_connkey_t *fw = &con->c_forw_entry;
+	npf_connkey_t *bk = &con->c_back_entry;
+
+	/*
+	 * Construct "forwards" and "backwards" keys.  Also, set the
+	 * interface ID for this connection (unless it is global).
+	 */
+	if (!npf_conn_conkey(npc, fw, true)) {
+		goto err;
+	}
+	if (!npf_conn_conkey(npc, bk, false)) {
+		goto err;
+	}
+	fw->ck_backptr = bk->ck_backptr = con;
+	con->c_ifid = per_if ? nbuf->nb_ifid : 0;
+	con->c_proto = npc->npc_proto;
+
+	/* Set last activity time for a new connection. */
+	getnanouptime(&con->c_atime);
+
+	/*
+	 * Insert both keys (entries representing directions) of the
+	 * connection.  At this point, it becomes visible.
+	 */
+	if (!npf_conndb_insert(conn_db, fw, con)) {
+		goto err;
+	}
+	if (!npf_conndb_insert(conn_db, bk, con)) {
+		/* We have hit the duplicate. */
+		npf_conndb_remove(conn_db, fw);
+		npf_stats_inc(NPF_STAT_RACE_SESSION);
+		goto err;
+	}
+
+	/* Finally, insert into the connection list. */
+	NPF_PRINTF(("NPF: establish conn %p\n", con));
+	npf_conndb_enqueue(conn_db, con);
+	return con;
+err:
+	npf_conn_destroy(con);
+	return NULL;
+}
+
+static void
+npf_conn_destroy(npf_conn_t *con)
+{
+	if (con->c_nat) {
+		/* Release any NAT structures. */
+		npf_nat_destroy(con->c_nat);
+	}
+	if (con->c_rproc) {
+		/* Release the rule procedure. */
+		npf_rproc_release(con->c_rproc);
+	}
+
+	/* Destroy the state. */
+	npf_state_destroy(&con->c_state);
+	mutex_destroy(&con->c_lock);
+
+	/* Free the structure, increase the counter. */
+	pool_cache_put(conn_cache, con);
+	npf_stats_inc(NPF_STAT_SESSION_DESTROY);
+	NPF_PRINTF(("NPF: conn %p destroyed\n", con));
+}
+
+/*
+ * npf_conn_setnat: associate NAT entry with the connection, update and
+ * re-insert connection entry using the translation values.
+ */
+int
+npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con,
+    npf_nat_t *nt, u_int ntype)
+{
+	static const u_int nat_type_dimap[] = {
+		[NPF_NATOUT] = NPF_DST,
+		[NPF_NATIN] = NPF_SRC,
+	};
+	npf_connkey_t key, *bk;
+	npf_conn_t *ret;
+	npf_addr_t *taddr;
+	in_port_t tport;
+	u_int tidx;
+
+	KASSERT(con->c_refcnt > 0);
+
+	npf_nat_gettrans(nt, &taddr, &tport);
+	KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN);
+	tidx = nat_type_dimap[ntype];
+
+	/* Construct a "backwards" key. */
+	if (!npf_conn_conkey(npc, &key, false)) {
+		return EINVAL;
+	}
+
+	/* Acquire the lock and check for the races. */
+	mutex_enter(&con->c_lock);
+	if (__predict_false(con->c_flags & CONN_EXPIRE)) {
+		/* The connection got expired. */
+		mutex_exit(&con->c_lock);
+		return EINVAL;
+	}
+	if (__predict_false(con->c_nat != NULL)) {
+		/* Race with a duplicate packet. */
+		mutex_exit(&con->c_lock);
+		npf_stats_inc(NPF_STAT_RACE_NAT);
+		return EISCONN;
+	}
+
+	/* Remove the "backwards" entry. */
+	ret = npf_conndb_remove(conn_db, &key);
+	KASSERT(ret == con);
+
+	/* Set the source/destination IDs to the translation values. */
+	bk = &con->c_back_entry;
+	connkey_set_addr(bk, taddr, tidx);
+	if (tport) {
+		connkey_set_id(bk, tport, tidx);
+	}
+
+	/* Finally, re-insert the "backwards" entry. */
+	if (!npf_conndb_insert(conn_db, bk, con)) {
+		/*
+		 * Race: we have hit the duplicate, remove the "forwards"
+		 * entry and expire our connection; it is no longer valid.
+		 */
+		(void)npf_conndb_remove(conn_db, &con->c_forw_entry);
+		atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
+		mutex_exit(&con->c_lock);
+
+		npf_stats_inc(NPF_STAT_RACE_NAT);
+		return EISCONN;
+	}
+
+	/* Associate the NAT entry and release the lock. */
+	con->c_nat = nt;
+	mutex_exit(&con->c_lock);
+	return 0;
+}
+
+/*
+ * npf_conn_expire: explicitly mark connection as expired.
+ */
+void
+npf_conn_expire(npf_conn_t *con)
+{
+	/* KASSERT(con->c_refcnt > 0); XXX: npf_nat_freepolicy() */
+	atomic_or_uint(&con->c_flags, CONN_EXPIRE);
+}
+
+/*
+ * npf_conn_pass: return true if connection is "pass" one, otherwise false.
+ */
+bool
+npf_conn_pass(const npf_conn_t *con, npf_rproc_t **rp)
+{
+	KASSERT(con->c_refcnt > 0);
+	if (__predict_true(con->c_flags & CONN_PASS)) {
+		*rp = con->c_rproc;
+		return true;
+	}
+	return false;
+}
+
+/*
+ * npf_conn_setpass: mark connection as a "pass" one and associate the
+ * rule procedure with it.
+ */
+void
+npf_conn_setpass(npf_conn_t *con, npf_rproc_t *rp)
+{
+	KASSERT((con->c_flags & CONN_ACTIVE) == 0);
+	KASSERT(con->c_refcnt > 0);
+	KASSERT(con->c_rproc == NULL);
+
+	/*
+	 * No need for atomic since the connection is not yet active.
+	 * If rproc is set, the caller transfers its reference to us,
+	 * which will be released on npf_conn_destroy().
+	 */
+	con->c_flags |= CONN_PASS;
+	con->c_rproc = rp;
+}
+
+/*
+ * npf_conn_release: release a reference, which might allow G/C thread
+ * to destroy this connection.
+ */
+void
+npf_conn_release(npf_conn_t *con)
+{
+	if ((con->c_flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) {
+		/* Activate: after this, connection is globally visible. */
+		con->c_flags |= CONN_ACTIVE;
+	}
+	KASSERT(con->c_refcnt > 0);
+	atomic_dec_uint(&con->c_refcnt);
+}
+
+/*
+ * npf_conn_retnat: return associated NAT data entry and indicate
+ * whether it is a "forwards" or "backwards" stream.
+ */
+npf_nat_t *
+npf_conn_retnat(npf_conn_t *con, const int di, bool *forw)
+{
+	KASSERT(con->c_refcnt > 0);
+	*forw = (con->c_flags & PFIL_ALL) == di;
+	return con->c_nat;
+}
+
+/*
+ * npf_conn_expired: criterion to check if connection is expired.
+ */
+static inline bool
+npf_conn_expired(const npf_conn_t *con, const struct timespec *tsnow)
+{
+	const int etime = npf_state_etime(&con->c_state, con->c_proto);
+	struct timespec tsdiff;
+
+	if (__predict_false(con->c_flags & CONN_EXPIRE)) {
+		/* Explicitly marked to be expired. */
+		return true;
+	}
+	timespecsub(tsnow, &con->c_atime, &tsdiff);
+	return tsdiff.tv_sec > etime;
+}
+
+/*
+ * npf_conn_worker: G/C to run from a worker thread.
+ */
+static void
+npf_conn_worker(void)
+{
+	npf_conn_t *con, *prev, *gclist = NULL;
+	npf_conndb_t *cd;
+	struct timespec tsnow;
+	bool flushall;
+
+	mutex_enter(&conn_lock);
+	if ((cd = conn_db) == NULL) {
+		goto done;
+	}
+	flushall = (conn_tracking != CONN_TRACKING_ON);
+	getnanouptime(&tsnow);
+
+	/*
+	 * Scan all connections and check them for expiration.
+	 */
+	prev = NULL;
+	con = npf_conndb_getlist(cd);
+	while (con) {
+		npf_conn_t *next = con->c_next;
+
+		/* Expired?  Flushing all? */
+		if (!npf_conn_expired(con, &tsnow) && !flushall) {
+			prev = con;
+			con = next;
+			continue;
+		}
+
+		/* Remove both entries of the connection. */
+		mutex_enter(&con->c_lock);
+		if ((con->c_flags & CONN_REMOVED) == 0) {
+			npf_conn_t *ret __diagused;
+
+			ret = npf_conndb_remove(cd, &con->c_forw_entry);
+			KASSERT(ret == con);
+			ret = npf_conndb_remove(cd, &con->c_back_entry);
+			KASSERT(ret == con);
+		}
+
+		/* Flag the removal and expiration. */
+		atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
+		mutex_exit(&con->c_lock);
+
+		/* Move to the G/C list. */
+		npf_conndb_dequeue(cd, con, prev);
+		con->c_next = gclist;
+		gclist = con;
+
+		/* Next.. */
+		con = next;
+	}
+	npf_conndb_settail(cd, prev);
+done:
+	/* Ensure we it is safe to destroy the connections. */
+	if (gclist) {
+		npf_config_enter();
+		npf_config_sync();
+		npf_config_exit();
+	}
+
+	/*
+	 * Garbage collect all expired connections.
+	 * May need to wait for the references to drain.
+	 */
+	con = gclist;
+	while (con) {
+		npf_conn_t *next = con->c_next;
+
+		/*
+		 * Destroy only if removed and no references.
+		 * Otherwise, wait for a tiny moment.
+		 */
+		if (__predict_false(con->c_refcnt)) {
+			kpause("npfcongc", false, 1, NULL);
+			continue;
+		}
+		npf_conn_destroy(con);
+		con = next;
+	}
+
+	if (conn_tracking == CONN_TRACKING_FLUSH) {
+		/* Flush was requested - indicate we are done. */
+		conn_tracking = CONN_TRACKING_OFF;
+		cv_broadcast(&conn_cv);
+	}
+	mutex_exit(&conn_lock);
+}
+
+void
+npf_conn_load(npf_conndb_t *cd)
+{
+	KASSERT(cd != NULL);
+	npf_conn_reload(cd, CONN_TRACKING_ON);
+}
+
+/*
+ * npf_conn_save: construct a list of connections prepared for saving.
+ * Note: this is expected to be an expensive operation.
+ */
+int
+npf_conn_save(prop_array_t conlist, prop_array_t nplist)
+{
+	npf_conn_t *con, *prev;
+	int error;
+
+	/*
+	 * Note: acquire conn_lock to prevent from the database
+	 * destruction and G/C thread.
+	 */
+	mutex_enter(&conn_lock);
+	if (!conn_db || conn_tracking != CONN_TRACKING_ON) {
+		mutex_exit(&conn_lock);
+		return 0;
+	}
+	prev = NULL;
+	con = npf_conndb_getlist(conn_db);
+	while (con) {
+		npf_conn_t *next = con->c_next;
+		prop_data_t d;
+
+		if ((con->c_flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE)
+			goto skip;
+
+		prop_dictionary_t cdict = prop_dictionary_create();
+		prop_dictionary_set_uint32(cdict, "flags", con->c_flags);
+		prop_dictionary_set_uint32(cdict, "proto", con->c_proto);
+		/* FIXME: interface-id */
+
+		d = prop_data_create_data(&con->c_state, sizeof(npf_state_t));
+		prop_dictionary_set_and_rel(cdict, "state", d);
+
+		const uint32_t *fkey = con->c_forw_entry.ck_key;
+		d = prop_data_create_data(fkey, NPF_CONN_MAXKEYLEN);
+		prop_dictionary_set_and_rel(cdict, "forw-key", d);
+
+		const uint32_t *bkey = con->c_back_entry.ck_key;
+		d = prop_data_create_data(bkey, NPF_CONN_MAXKEYLEN);
+		prop_dictionary_set_and_rel(cdict, "back-key", d);
+
+		CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));
+		prop_dictionary_set_uint64(cdict, "id-ptr", (uintptr_t)con);
+
+		if (con->c_nat) {
+			npf_nat_save(cdict, nplist, con->c_nat);
+		}
+		prop_array_add(conlist, cdict);
+		prop_object_release(cdict);
+skip:
+		prev = con;
+		con = next;
+	}
+	npf_conndb_settail(conn_db, prev);
+	mutex_exit(&conn_lock);
+
+	return error;
+}
+
+/*
+ * npf_conn_restore: fully reconstruct a single connection from a directory
+ * and insert into the given database.
+ */
+int
+npf_conn_restore(npf_conndb_t *cd, prop_dictionary_t cdict)
+{
+	npf_conn_t *con;
+	npf_connkey_t *fw, *bk;
+	prop_object_t obj;
+	const void *d;
+
+	/* Allocate a connection and initialise it (clear first). */
+	con = pool_cache_get(conn_cache, PR_WAITOK);
+	memset(con, 0, sizeof(npf_conn_t));
+	mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
+
+	prop_dictionary_get_uint32(cdict, "proto", &con->c_proto);
+	prop_dictionary_get_uint32(cdict, "flags", &con->c_flags);
+	con->c_flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS;
+	getnanouptime(&con->c_atime);
+
+	obj = prop_dictionary_get(cdict, "state");
+	if ((d = prop_data_data_nocopy(obj)) == NULL ||
+	    prop_data_size(obj) != sizeof(npf_state_t)) {
+		goto err;
+	}
+	memcpy(&con->c_state, d, sizeof(npf_state_t));
+
+	/* Reconstruct NAT association, if any, or return NULL. */
+	con->c_nat = npf_nat_restore(cdict, con);
+
+	/*
+	 * Fetch and copy the keys for each direction.
+	 */
+	obj = prop_dictionary_get(cdict, "forw-key");
+	if ((d = prop_data_data_nocopy(obj)) == NULL ||
+	    prop_data_size(obj) != NPF_CONN_MAXKEYLEN) {
+		goto err;
+	}
+	fw = &con->c_forw_entry;
+	memcpy(&fw->ck_key, d, NPF_CONN_MAXKEYLEN);
+
+	obj = prop_dictionary_get(cdict, "back-key");
+	if ((d = prop_data_data_nocopy(obj)) == NULL ||
+	    prop_data_size(obj) != NPF_CONN_MAXKEYLEN) {
+		goto err;
+	}
+	bk = &con->c_back_entry;
+	memcpy(&bk->ck_key, d, NPF_CONN_MAXKEYLEN);
+
+	fw->ck_backptr = bk->ck_backptr = con;
+
+	/* Insert the entries and the connection itself. */
+	if (!npf_conndb_insert(cd, fw, con)) {
+		goto err;
+	}
+	if (!npf_conndb_insert(cd, bk, con)) {
+		npf_conndb_remove(cd, fw);
+		goto err;
+	}
+	npf_conndb_enqueue(cd, con);
+	return 0;
+err:
+	npf_conn_destroy(con);
+	return EINVAL;
+}
+
+#if defined(DDB) || defined(_NPF_TESTING)
+
+void
+npf_conn_print(const npf_conn_t *con)
+{
+	const u_int alen = NPF_CONN_GETALEN(&con->c_forw_entry);
+	const uint32_t *fkey = con->c_forw_entry.ck_key;
+	const uint32_t *bkey = con->c_back_entry.ck_key;
+	const u_int proto = con->c_proto;
+	struct timespec tsnow, tsdiff;
+	const void *src, *dst;
+	int etime;
+
+	getnanouptime(&tsnow);
+	timespecsub(&tsnow, &con->c_atime, &tsdiff);
+	etime = npf_state_etime(&con->c_state, proto);
+
+	printf("%p:\n\tproto %d flags 0x%x tsdiff %d etime %d\n",
+	    con, proto, con->c_flags, (int)tsdiff.tv_sec, etime);
+
+	src = &fkey[2], dst = &fkey[2 + (alen >> 2)];
+	printf("\tforw %s:%d", npf_addr_dump(src, alen), ntohs(fkey[1] >> 16));
+	printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(fkey[1] & 0xffff));
+
+	src = &bkey[2], dst = &bkey[2 + (alen >> 2)];
+	printf("\tback %s:%d", npf_addr_dump(src, alen), ntohs(bkey[1] >> 16));
+	printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(bkey[1] & 0xffff));
+
+	npf_state_dump(&con->c_state);
+	if (con->c_nat) {
+		npf_nat_dump(con->c_nat);
+	}
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/npf_conndb.c	Sat Jul 19 18:24:16 2014 +0000
@@ -0,0 +1,268 @@
+/*	$NetBSD: npf_conndb.c,v 1.1 2014/07/19 18:24:16 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This material is based upon work partially supported by The
+ * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NPF connection storage.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.1 2014/07/19 18:24:16 rmind Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <sys/atomic.h>
+#include <sys/cprng.h>
+#include <sys/hash.h>
+#include <sys/kmem.h>
+
+#define __NPF_CONN_PRIVATE
+#include "npf_conn.h"
+#include "npf_impl.h"
+
+#define	CONNDB_HASH_BUCKETS	1024	/* XXX tune + make tunable */
+#define	CONNDB_HASH_MASK	(CONNDB_HASH_BUCKETS - 1)
+
+typedef struct {
+	rb_tree_t		hb_tree;
+	krwlock_t		hb_lock;
+	u_int			hb_count;
+} npf_hashbucket_t;
+
+struct npf_conndb {
+	npf_conn_t *		cd_recent;
+	npf_conn_t *		cd_list;
+	npf_conn_t *		cd_tail;
+	uint32_t		cd_seed;
+	npf_hashbucket_t	cd_hashtbl[];
+};
+
+/*
+ * Connection hash table and RB-tree helper routines.
+ * Note: (node1 < node2) shall return negative.
+ */
+
+static signed int
+conndb_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
+{
+	const npf_connkey_t * const ck1 = n1;
+	const npf_connkey_t * const ck2 = n2;
+	const u_int keylen = MIN(NPF_CONN_KEYLEN(ck1), NPF_CONN_KEYLEN(ck2));
+
+	KASSERT((keylen >> 2) <= NPF_CONN_NKEYWORDS);
+	return memcmp(ck1->ck_key, ck2->ck_key, keylen);
+}
+
+static signed int
+conndb_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
+{
+	const npf_connkey_t * const ck1 = n1;
+	const npf_connkey_t * const ck2 = key;
+	return conndb_rbtree_cmp_nodes(ctx, ck1, ck2);
+}
+
+static const rb_tree_ops_t conndb_rbtree_ops = {
+	.rbto_compare_nodes	= conndb_rbtree_cmp_nodes,
+	.rbto_compare_key	= conndb_rbtree_cmp_key,
+	.rbto_node_offset	= offsetof(npf_connkey_t, ck_rbnode),
+	.rbto_context		= NULL
+};
+
+static npf_hashbucket_t *
+conndb_hash_bucket(npf_conndb_t *cd, const npf_connkey_t *key)
+{
+	const u_int keylen = NPF_CONN_KEYLEN(key);
+	uint32_t hash = murmurhash2(key->ck_key, keylen, cd->cd_seed);
+	return &cd->cd_hashtbl[hash & CONNDB_HASH_MASK];
+}
+
+npf_conndb_t *
+npf_conndb_create(void)
+{
+	size_t len = offsetof(npf_conndb_t, cd_hashtbl[CONNDB_HASH_BUCKETS]);
+	npf_conndb_t *cd;
+
+	cd = kmem_zalloc(len, KM_SLEEP);
+	for (u_int i = 0; i < CONNDB_HASH_BUCKETS; i++) {
+		npf_hashbucket_t *hb = &cd->cd_hashtbl[i];
+
+		rb_tree_init(&hb->hb_tree, &conndb_rbtree_ops);
+		rw_init(&hb->hb_lock);
+		hb->hb_count = 0;
+	}
+	cd->cd_seed = cprng_fast32();
+	return cd;
+}
+
+void
+npf_conndb_destroy(npf_conndb_t *cd)
+{
+	size_t len = offsetof(npf_conndb_t, cd_hashtbl[CONNDB_HASH_BUCKETS]);
+
+	for (u_int i = 0; i < CONNDB_HASH_BUCKETS; i++) {
+		npf_hashbucket_t *hb = &cd->cd_hashtbl[i];
+
+		KASSERT(hb->hb_count == 0);
+		KASSERT(!rb_tree_iterate(&hb->hb_tree, NULL, RB_DIR_LEFT));
+		rw_destroy(&hb->hb_lock);
+	}
+	kmem_free(cd, len);
+}
+
+/*
+ * npf_conndb_lookup: find a connection given the key.
+ */
+npf_conn_t *
+npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *key, bool *forw)
+{
+	npf_connkey_t *foundkey;
+	npf_hashbucket_t *hb;
+	npf_conn_t *con;
+
+	/* Get a hash bucket from the cached key data. */
+	hb = conndb_hash_bucket(cd, key);
+	if (hb->hb_count == 0) {
+		return NULL;
+	}
+
+	/* Lookup the tree given the key and get the actual connection. */
+	rw_enter(&hb->hb_lock, RW_READER);
+	foundkey = rb_tree_find_node(&hb->hb_tree, key);
+	if (foundkey == NULL) {
+		rw_exit(&hb->hb_lock);
+		return NULL;
+	}
+	con = foundkey->ck_backptr;
+	*forw = (foundkey == &con->c_forw_entry);
+
+	/* Acquire the reference and return the connection. */
+	atomic_inc_uint(&con->c_refcnt);
+	rw_exit(&hb->hb_lock);
+	return con;
+}
+
+/*
+ * npf_conndb_insert: insert the key representing the connection.
+ */
+bool
+npf_conndb_insert(npf_conndb_t *cd, npf_connkey_t *key, npf_conn_t *con)
+{
+	npf_hashbucket_t *hb = conndb_hash_bucket(cd, key);
+	bool ok;
+
+	rw_enter(&hb->hb_lock, RW_WRITER);
+	ok = rb_tree_insert_node(&hb->hb_tree, key) == key;
+	hb->hb_count += (u_int)ok;
+	rw_exit(&hb->hb_lock);
+	return ok;
+}
+
+/*
+ * npf_conndb_remove: find and delete the key and return the connection
+ * it represents.
+ */
+npf_conn_t *
+npf_conndb_remove(npf_conndb_t *cd, const npf_connkey_t *key)
+{
+	npf_hashbucket_t *hb = conndb_hash_bucket(cd, key);
+	npf_connkey_t *foundkey;
+	npf_conn_t *con;
+
+	rw_enter(&hb->hb_lock, RW_WRITER);
+	if ((foundkey = rb_tree_find_node(&hb->hb_tree, key)) != NULL) {
+		rb_tree_remove_node(&hb->hb_tree, foundkey);
+		con = foundkey->ck_backptr;
+		hb->hb_count--;
+	} else {
+		con = NULL;
+	}
+	rw_exit(&hb->hb_lock);
+	return con;
+}
+
+/*
+ * npf_conndb_enqueue: atomically insert the connection into the
+ * singly-linked list of "recent" connections.
+ */
+void
+npf_conndb_enqueue(npf_conndb_t *cd, npf_conn_t *con)
+{
+	npf_conn_t *head;
+
+	do {
+		head = cd->cd_recent;
+		con->c_next = head;
+	} while (atomic_cas_ptr(&cd->cd_recent, head, con) != head);
+}
+
+/*
+ * npf_conndb_dequeue: remove the connection from a singly-linked list
+ * given the previous element; no concurrent writers are allowed here.
+ */
+void
+npf_conndb_dequeue(npf_conndb_t *cd, npf_conn_t *con, npf_conn_t *prev)
+{
+	if (prev == NULL) {
+		KASSERT(cd->cd_list == con);
+		cd->cd_list = con->c_next;
+	} else {
+		prev->c_next = con->c_next;
+	}
+}
+
+/*
+ * npf_conndb_getlist: atomically take the "recent" connections and add
+ * them to the singly-linked list of the connections.
+ */
+npf_conn_t *
+npf_conndb_getlist(npf_conndb_t *cd)
+{
+	npf_conn_t *con, *prev;
+
+	con = atomic_swap_ptr(&cd->cd_recent, NULL);
+	if ((prev = cd->cd_tail) == NULL) {
+		KASSERT(cd->cd_list == NULL);
+		cd->cd_list = con;
+	} else {
+		prev->c_next = con;
+	}
+	return cd->cd_list;
+}
+
+/*
+ * npf_conndb_settail: assign a new tail of the singly-linked list.
+ */
+void
+npf_conndb_settail(npf_conndb_t *cd, npf_conn_t *con)
+{
+	KASSERT(con || cd->cd_list == NULL);
+	cd->cd_tail = con;
+}
--- a/sys/net/npf/npf_ctl.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_ctl.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ctl.c,v 1.33 2014/02/06 02:51:28 rmind Exp $	*/
+/*	$NetBSD: npf_ctl.c,v 1.34 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.33 2014/02/06 02:51:28 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.34 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -47,6 +47,7 @@
 #include <prop/proplib.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
 
 #if defined(DEBUG) || defined(DIAGNOSTIC)
 #define	NPF_ERR_DEBUG(e) \
@@ -162,6 +163,9 @@
 			error = EINVAL;
 			break;
 		}
+		if (type == NPF_TABLE_HASH) {
+			size = 1024; /* XXX */
+		}
 
 		/* Create and insert the table. */
 		t = npf_table_create(name, tid, type, blob, size);
@@ -545,8 +549,8 @@
 	 */
 	npf_config_reload(npf_dict, rlset, tblset, nset, rpset, flush);
 
-	/* Turn on/off session tracking accordingly. */
-	npf_session_tracking(!flush);
+	/* Turn on/off connection tracking accordingly. */
+	npf_conn_tracking(!flush);
 
 	/* Done.  Since data is consumed now, we shall not destroy it. */
 	tblset = NULL;
@@ -705,83 +709,83 @@
 }
 
 /*
- * npfctl_sessions_save: construct a list of sessions and export for saving.
+ * npfctl_conn_save: construct a list of connections and export.
  */
 int
-npfctl_sessions_save(u_long cmd, void *data)
+npfctl_conn_save(u_long cmd, void *data)
 {
 	struct plistref *pref = data;
-	prop_dictionary_t sesdict;
-	prop_array_t selist, nplist;
+	prop_array_t conlist, nplist;
+	prop_dictionary_t dict;
 	int error;
 
 	/* Create a dictionary and two lists. */
-	sesdict = prop_dictionary_create();
-	selist = prop_array_create();
+	dict = prop_dictionary_create();
+	conlist = prop_array_create();
 	nplist = prop_array_create();
 
-	/* Save the sessions. */
-	error = npf_session_save(selist, nplist);
+	/* Save the connections. */
+	error = npf_conn_save(conlist, nplist);
 	if (error) {
 		goto fail;
 	}
 
-	/* Set the session list, NAT policy list and export the dictionary. */
-	prop_dictionary_set(sesdict, "session-list", selist);
-	prop_dictionary_set(sesdict, "nat-policy-list", nplist);
-	error = prop_dictionary_copyout_ioctl(pref, cmd, sesdict);
+	/* Set the connection list, NAT policy list and export. */
+	prop_dictionary_set(dict, "session-list", conlist);
+	prop_dictionary_set(dict, "nat-policy-list", nplist);
+	error = prop_dictionary_copyout_ioctl(pref, cmd, dict);
 fail:
-	prop_object_release(sesdict);
+	prop_object_release(dict);
 	return error;
 }
 
 /*
- * npfctl_sessions_load: import a list of sessions, reconstruct them and load.
+ * npfctl_conn_load: import a list of connections and load them.
  */
 int
-npfctl_sessions_load(u_long cmd, void *data)
+npfctl_conn_load(u_long cmd, void *data)
 {
 	const struct plistref *pref = data;
-	npf_sehash_t *sehasht = NULL;
-	prop_dictionary_t sesdict, sedict;
+	npf_conndb_t *conndb = NULL;
+	prop_dictionary_t dict, condict;
 	prop_object_iterator_t it;
-	prop_array_t selist;
+	prop_array_t conlist;
 	int error;
 
-	/* Retrieve the dictionary containing session and NAT policy lists. */
-	error = prop_dictionary_copyin_ioctl(pref, cmd, &sesdict);
+	/* Get the dictionary containing connections and NAT policies. */
+	error = prop_dictionary_copyin_ioctl(pref, cmd, &dict);
 	if (error)
 		return error;
 
 	/*
-	 * Note: session objects contain the references to the NAT policy
-	 * entries.  Therefore, no need to directly access it.
+	 * Note: connection objects contain the references to the NAT
+	 * policy entries.  Therefore, no need to directly access it.
 	 */
-	selist = prop_dictionary_get(sesdict, "session-list");
-	if (prop_object_type(selist) != PROP_TYPE_ARRAY) {
-		prop_object_release(selist);
+	conlist = prop_dictionary_get(dict, "session-list");
+	if (prop_object_type(conlist) != PROP_TYPE_ARRAY) {
+		prop_object_release(conlist);
 		return EINVAL;
 	}
 
-	/* Create a session hash table. */
-	sehasht = sess_htable_create();
+	/* Create a connection database. */
+	conndb = npf_conndb_create();
 
 	/*
-	 * Iterate through and construct each session.  Note: acquire the
-	 * config lock as we access NAT policies during the restore.
+	 * Iterate through and construct each connection.  Note: acquire
+	 * the config lock as we access NAT policies during the restore.
 	 */
 	error = 0;
-	it = prop_array_iterator(selist);
+	it = prop_array_iterator(conlist);
 
 	npf_config_enter();
-	while ((sedict = prop_object_iterator_next(it)) != NULL) {
-		/* Session - dictionary. */
-		if (prop_object_type(sedict) != PROP_TYPE_DICTIONARY) {
+	while ((condict = prop_object_iterator_next(it)) != NULL) {
+		/* Connection - dictionary. */
+		if (prop_object_type(condict) != PROP_TYPE_DICTIONARY) {
 			error = EINVAL;
 			break;
 		}
-		/* Construct and insert real session structure. */
-		error = npf_session_restore(sehasht, sedict);
+		/* Construct and insert real connection structure. */
+		error = npf_conn_restore(conndb, condict);
 		if (error) {
 			break;
 		}
@@ -789,14 +793,14 @@
 	npf_config_exit();
 
 	prop_object_iterator_release(it);
-	prop_object_release(selist);
+	prop_object_release(conlist);
 
 	if (!error) {
 		/* Finally, load the new table. */
-		npf_session_load(sehasht);
+		npf_conn_load(conndb);
 	} else {
-		/* Destroy session table. */
-		sess_htable_destroy(sehasht);
+		/* Destroy the connection database. */
+		npf_conndb_destroy(conndb);
 	}
 	return error;
 }
--- a/sys/net/npf/npf_handler.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_handler.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_handler.c,v 1.30 2014/05/19 18:45:51 jakllsch Exp $	*/
+/*	$NetBSD: npf_handler.c,v 1.31 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.30 2014/05/19 18:45:51 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.31 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -54,6 +54,7 @@
 #include <netinet6/ip6_var.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
 
 static bool		pfil_registered = false;
 static pfil_head_t *	npf_ph_if = NULL;
@@ -141,7 +142,7 @@
 {
 	nbuf_t nbuf;
 	npf_cache_t npc;
-	npf_session_t *se;
+	npf_conn_t *con;
 	npf_rule_t *rl;
 	npf_rproc_t *rp;
 	int error, retfl;
@@ -166,7 +167,7 @@
 		 */
 		error = npf_reassembly(&npc, &nbuf, mp);
 		if (error) {
-			se = NULL;
+			con = NULL;
 			goto out;
 		}
 		if (*mp == NULL) {
@@ -175,11 +176,11 @@
 		}
 	}
 
-	/* Inspect the list of sessions (if found, acquires a reference). */
-	se = npf_session_inspect(&npc, &nbuf, di, &error);
+	/* Inspect the list of connections (if found, acquires a reference). */
+	con = npf_conn_inspect(&npc, &nbuf, di, &error);
 
-	/* If "passing" session found - skip the ruleset inspection. */
-	if (se && npf_session_pass(se, &rp)) {
+	/* If "passing" connection found - skip the ruleset inspection. */
+	if (con && npf_conn_pass(con, &rp)) {
 		npf_stats_inc(NPF_STAT_PASS_SESSION);
 		KASSERT(error == 0);
 		goto pass;
@@ -209,7 +210,7 @@
 
 	/*
 	 * Get the rule procedure (acquires a reference) for association
-	 * with a session (if any) and execution.
+	 * with a connection (if any) and execution.
 	 */
 	KASSERT(rp == NULL);
 	rp = npf_rule_getrproc(rl);
@@ -225,19 +226,19 @@
 	npf_stats_inc(NPF_STAT_PASS_RULESET);
 
 	/*
-	 * Establish a "pass" session, if required.  Just proceed,
-	 * if session creation fails (e.g. due to unsupported protocol).
+	 * Establish a "pass" connection, if required.  Just proceed if
+	 * connection creation fails (e.g. due to unsupported protocol).
 	 */
-	if ((retfl & NPF_RULE_STATEFUL) != 0 && !se) {
-		se = npf_session_establish(&npc, &nbuf, di,
+	if ((retfl & NPF_RULE_STATEFUL) != 0 && !con) {
+		con = npf_conn_establish(&npc, &nbuf, di,
 		    (retfl & NPF_RULE_MULTIENDS) == 0);
-		if (se) {
+		if (con) {
 			/*
 			 * Note: the reference on the rule procedure is
-			 * transfered to the session.  It will be released
-			 * on session destruction.
+			 * transfered to the connection.  It will be
+			 * released on connection destruction.
 			 */
-			npf_session_setpass(se, rp);
+			npf_conn_setpass(con, rp);
 		}
 	}
 pass:
@@ -246,15 +247,15 @@
 	/*
 	 * Perform NAT.
 	 */
-	error = npf_do_nat(&npc, se, &nbuf, di);
+	error = npf_do_nat(&npc, con, &nbuf, di);
 block:
 	/*
 	 * Execute the rule procedure, if any is associated.
 	 * It may reverse the decision from pass to block.
 	 */
 	if (rp && !npf_rproc_run(&npc, &nbuf, rp, &decision)) {
-		if (se) {
-			npf_session_release(se);
+		if (con) {
+			npf_conn_release(con);
 		}
 		npf_rproc_release(rp);
 		*mp = NULL;
@@ -262,11 +263,11 @@
 	}
 out:
 	/*
-	 * Release the reference on a session.  Release the reference on a
-	 * rule procedure only if there was no association.
+	 * Release the reference on a connection.  Release the reference
+	 * on a rule procedure only if there was no association.
 	 */
-	if (se) {
-		npf_session_release(se);
+	if (con) {
+		npf_conn_release(con);
 	} else if (rp) {
 		npf_rproc_release(rp);
 	}
--- a/sys/net/npf/npf_if.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_if.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_if.c,v 1.2 2013/11/11 15:28:37 martin Exp $	*/
+/*	$NetBSD: npf_if.c,v 1.3 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2013 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_if.c,v 1.2 2013/11/11 15:28:37 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_if.c,v 1.3 2014/07/19 18:24:16 rmind Exp $");
 
 #ifdef _KERNEL_OPT
 #include "pf.h"
@@ -69,7 +69,7 @@
 static u_int		npf_ifmap_cnt			__read_mostly;
 
 /*
- * NOTE: IDs start from 1.  Zero is reseved for "no interface" and
+ * NOTE: IDs start from 1.  Zero is reserved for "no interface" and
  * (unsigned)-1 for "inactive interface".  Therefore, an interface
  * can have either INACTIVE_ID or non-zero ID.
  */
--- a/sys/net/npf/npf_impl.h	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_impl.h	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.53 2014/06/25 00:20:06 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.54 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -49,10 +49,7 @@
 
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/hash.h>
-#include <sys/rbtree.h>
 #include <sys/ptree.h>
-#include <sys/rwlock.h>
 
 #include <net/bpf.h>
 #include <net/bpfjit.h>
@@ -74,7 +71,7 @@
 struct npf_rule;
 struct npf_rprocset;
 struct npf_nat;
-struct npf_session;
+struct npf_conn;
 
 typedef struct npf_ruleset	npf_ruleset_t;
 typedef struct npf_rule		npf_rule_t;
@@ -82,13 +79,13 @@
 typedef struct npf_rprocset	npf_rprocset_t;
 typedef struct npf_alg		npf_alg_t;
 typedef struct npf_natpolicy	npf_natpolicy_t;
-typedef struct npf_session	npf_session_t;
+typedef struct npf_conn		npf_conn_t;
 
-struct npf_sehash;
+struct npf_conndb;
 struct npf_table;
 struct npf_tableset;
 
-typedef struct npf_sehash	npf_sehash_t;
+typedef struct npf_conndb	npf_conndb_t;
 typedef struct npf_table	npf_table_t;
 typedef struct npf_tableset	npf_tableset_t;
 
@@ -123,7 +120,6 @@
 } npf_tcpstate_t;
 
 typedef struct {
-	kmutex_t	nst_lock;
 	u_int		nst_state;
 	npf_tcpstate_t	nst_tcpst[2];
 } npf_state_t;
@@ -135,7 +131,7 @@
 typedef struct {
 	bool		(*match)(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
 	bool		(*translate)(npf_cache_t *, nbuf_t *, npf_nat_t *, bool);
-	npf_session_t * (*inspect)(npf_cache_t *, nbuf_t *, int);
+	npf_conn_t *	(*inspect)(npf_cache_t *, nbuf_t *, int);
 } npfa_funcs_t;
 
 /*
@@ -172,8 +168,8 @@
 int		npfctl_switch(void *);
 int		npfctl_reload(u_long, void *);
 int		npfctl_getconf(u_long, void *);
-int		npfctl_sessions_save(u_long, void *);
-int		npfctl_sessions_load(u_long, void *);
+int		npfctl_conn_save(u_long, void *);
+int		npfctl_conn_load(u_long, void *);
 int		npfctl_rule(u_long, void *);
 int		npfctl_table(void *);
 
@@ -300,29 +296,6 @@
 void		npf_rproc_release(npf_rproc_t *);
 bool		npf_rproc_run(npf_cache_t *, nbuf_t *, npf_rproc_t *, int *);
 
-/* Session handling interface. */
-void		npf_session_sysinit(void);
-void		npf_session_sysfini(void);
-void		npf_session_tracking(bool);
-
-npf_sehash_t *	sess_htable_create(void);
-void		sess_htable_destroy(npf_sehash_t *);
-
-npf_session_t *	npf_session_lookup(const npf_cache_t *, const nbuf_t *,
-		    const int, bool *);
-npf_session_t *	npf_session_inspect(npf_cache_t *, nbuf_t *, const int, int *);
-npf_session_t *	npf_session_establish(npf_cache_t *, nbuf_t *, int, bool);
-void		npf_session_release(npf_session_t *);
-void		npf_session_expire(npf_session_t *);
-bool		npf_session_pass(const npf_session_t *, npf_rproc_t **);
-void		npf_session_setpass(npf_session_t *, npf_rproc_t *);
-int		npf_session_setnat(npf_session_t *, npf_nat_t *, u_int);
-npf_nat_t *	npf_session_retnat(npf_session_t *, const int, bool *);
-
-void		npf_session_load(npf_sehash_t *);
-int		npf_session_save(prop_array_t, prop_array_t);
-int		npf_session_restore(npf_sehash_t *, prop_dictionary_t);
-
 /* State handling. */
 bool		npf_state_init(npf_cache_t *, nbuf_t *, npf_state_t *);
 bool		npf_state_inspect(npf_cache_t *, nbuf_t *, npf_state_t *,
@@ -342,14 +315,14 @@
 bool		npf_nat_sharepm(npf_natpolicy_t *, npf_natpolicy_t *);
 void		npf_nat_freealg(npf_natpolicy_t *, npf_alg_t *);
 
-int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
+int		npf_do_nat(npf_cache_t *, npf_conn_t *, nbuf_t *, const int);
 void		npf_nat_destroy(npf_nat_t *);
 void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
 
 int		npf_nat_save(prop_dictionary_t, prop_array_t, npf_nat_t *);
-npf_nat_t *	npf_nat_restore(prop_dictionary_t, npf_session_t *);
+npf_nat_t *	npf_nat_restore(prop_dictionary_t, npf_conn_t *);
 
 /* ALG interface. */
 void		npf_alg_sysinit(void);
@@ -359,11 +332,10 @@
 npf_alg_t *	npf_alg_construct(const char *);
 bool		npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *, int);
 void		npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, bool);
-npf_session_t *	npf_alg_session(npf_cache_t *, nbuf_t *, int);
+npf_conn_t *	npf_alg_conn(npf_cache_t *, nbuf_t *, int);
 
 /* Debugging routines. */
-void		npf_addr_dump(const npf_addr_t *);
-void		npf_sessions_dump(void);
+const char *	npf_addr_dump(const npf_addr_t *, int);
 void		npf_state_dump(const npf_state_t *);
 void		npf_nat_dump(const npf_nat_t *);
 void		npf_state_setsampler(void (*)(npf_state_t *, bool));
--- a/sys/net/npf/npf_inet.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_inet.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.31 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.31 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -735,12 +735,15 @@
 
 #if defined(DDB) || defined(_NPF_TESTING)
 
-void
-npf_addr_dump(const npf_addr_t *addr)
+const char *
+npf_addr_dump(const npf_addr_t *addr, int alen)
 {
-	printf("IP[%x:%x:%x:%x]\n",
-	    addr->s6_addr32[0], addr->s6_addr32[1],
-	    addr->s6_addr32[2], addr->s6_addr32[3]);
+	if (alen == sizeof(struct in_addr)) {
+		struct in_addr ip;
+		memcpy(&ip, addr, alen);
+		return inet_ntoa(ip);
+	}
+	return "[IPv6]"; // XXX
 }
 
 #endif
--- a/sys/net/npf/npf_nat.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_nat.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.28 2014/05/30 23:26:06 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.29 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org>
@@ -59,19 +59,19 @@
  *	the IP addresses, therefore multiple NAT policies with the same IP
  *	will share the same port map.
  *
- * Sessions, translation entries and their life-cycle
+ * Connections, translation entries and their life-cycle
  *
- *	NAT module relies on session management module.  Each translated
- *	session has an associated translation entry (npf_nat_t), which
+ *	NAT module relies on connection tracking module.  Each translated
+ *	connection has an associated translation entry (npf_nat_t), which
  *	contains information used for backwards stream translation, i.e.
  *	original IP address with port and translation port, allocated from
  *	the port map.  Each NAT entry is associated with the policy, which
  *	contains translation IP address.  Allocated port is returned to the
- *	port map and NAT entry is destroyed when session expires.
+ *	port map and NAT entry is destroyed when connection expires.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.28 2014/05/30 23:26:06 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.29 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -89,6 +89,7 @@
 #include <netinet/in.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
 
 /*
  * NPF portmap structure.
@@ -133,7 +134,7 @@
 #define	NPF_NP_CMP_SIZE		(sizeof(npf_natpolicy_t) - NPF_NP_CMP_START)
 
 /*
- * NAT translation entry for a session.
+ * NAT translation entry for a connection.
  */
 struct npf_nat {
 	/* Associated NAT policy. */
@@ -152,7 +153,7 @@
 	uintptr_t		nt_alg_arg;
 
 	LIST_ENTRY(npf_nat)	nt_entry;
-	npf_session_t *		nt_session;
+	npf_conn_t *		nt_conn;
 };
 
 static pool_cache_t		nat_cache	__read_mostly;
@@ -258,7 +259,7 @@
 npf_nat_freepolicy(npf_natpolicy_t *np)
 {
 	npf_portmap_t *pm = np->n_portmap;
-	npf_session_t *se;
+	npf_conn_t *con;
 	npf_nat_t *nt;
 
 	/*
@@ -268,9 +269,9 @@
 	while (np->n_refcnt) {
 		mutex_enter(&np->n_lock);
 		LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
-			se = nt->nt_session;
-			KASSERT(se != NULL);
-			npf_session_expire(se);
+			con = nt->nt_conn;
+			KASSERT(con != NULL);
+			npf_conn_expire(con);
 		}
 		mutex_exit(&np->n_lock);
 
@@ -485,7 +486,7 @@
  * npf_nat_create: create a new NAT translation entry.
  */
 static npf_nat_t *
-npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_session_t *se)
+npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_conn_t *con)
 {
 	const int proto = npc->npc_proto;
 	npf_nat_t *nt;
@@ -493,14 +494,14 @@
 	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_LAYER4));
 
-	/* Construct a new NAT entry and associate it with the session. */
+	/* Construct a new NAT entry and associate it with the connection. */
 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
 	if (nt == NULL){
 		return NULL;
 	}
 	npf_stats_inc(NPF_STAT_NAT_CREATE);
 	nt->nt_natpolicy = np;
-	nt->nt_session = se;
+	nt->nt_conn = con;
 	nt->nt_alg = NULL;
 
 	/* Save the original address which may be rewritten. */
@@ -587,7 +588,7 @@
 /*
  * npf_nat_algo: perform the translation given the algorithm.
  */
-static inline int 
+static inline int
 npf_nat_algo(npf_cache_t *npc, const npf_natpolicy_t *np, bool forw)
 {
 	const u_int which = npf_nat_which(np->n_type, forw);
@@ -608,17 +609,17 @@
 
 /*
  * npf_do_nat:
- *	- Inspect packet for a NAT policy, unless a session with a NAT
+ *	- Inspect packet for a NAT policy, unless a connection with a NAT
  *	  association already exists.  In such case, determine whether it
  *	  is a "forwards" or "backwards" stream.
  *	- Perform translation: rewrite source or destination fields,
  *	  depending on translation type and direction.
- *	- Associate a NAT policy with a session (may establish a new).
+ *	- Associate a NAT policy with a connection (may establish a new).
  */
 int
-npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int di)
+npf_do_nat(npf_cache_t *npc, npf_conn_t *con, nbuf_t *nbuf, const int di)
 {
-	npf_session_t *nse = NULL;
+	npf_conn_t *ncon = NULL;
 	npf_natpolicy_t *np;
 	npf_nat_t *nt;
 	int error;
@@ -631,17 +632,17 @@
 	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 
 	/*
-	 * Return the NAT entry associated with the session, if any.
+	 * Return the NAT entry associated with the connection, if any.
 	 * Determines whether the stream is "forwards" or "backwards".
-	 * Note: no need to lock, since reference on session is held.
+	 * Note: no need to lock, since reference on connection is held.
 	 */
-	if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
+	if (con && (nt = npf_conn_retnat(con, di, &forw)) != NULL) {
 		np = nt->nt_natpolicy;
 		goto translate;
 	}
 
 	/*
-	 * Inspect the packet for a NAT policy, if there is no session.
+	 * Inspect the packet for a NAT policy, if there is no connection.
 	 * Note: acquires a reference if found.
 	 */
 	np = npf_nat_inspect(npc, nbuf, di);
@@ -662,33 +663,33 @@
 	}
 
 	/*
-	 * If there is no local session (no "stateful" rule - unusual, but
-	 * possible configuration), establish one before translation.  Note
-	 * that it is not a "pass" session, therefore passing of "backwards"
-	 * stream depends on other, stateless filtering rules.
+	 * If there is no local connection (no "stateful" rule - unusual,
+	 * but possible configuration), establish one before translation.
+	 * Note that it is not a "pass" connection, therefore passing of
+	 * "backwards" stream depends on other, stateless filtering rules.
 	 */
-	if (se == NULL) {
-		nse = npf_session_establish(npc, nbuf, di, true);
-		if (nse == NULL) {
+	if (con == NULL) {
+		ncon = npf_conn_establish(npc, nbuf, di, true);
+		if (ncon == NULL) {
 			atomic_dec_uint(&np->n_refcnt);
 			return ENOMEM;
 		}
-		se = nse;
+		con = ncon;
 	}
 
 	/*
-	 * Create a new NAT entry and associate with the session.
+	 * Create a new NAT entry and associate with the connection.
 	 * We will consume the reference on success (release on error).
 	 */
-	nt = npf_nat_create(npc, np, se);
+	nt = npf_nat_create(npc, np, con);
 	if (nt == NULL) {
 		atomic_dec_uint(&np->n_refcnt);
 		error = ENOMEM;
 		goto out;
 	}
 
-	/* Associate the NAT translation entry with the session. */
-	error = npf_session_setnat(se, nt, np->n_type);
+	/* Associate the NAT translation entry with the connection. */
+	error = npf_conn_setnat(npc, con, nt, np->n_type);
 	if (error) {
 		/* Will release the reference. */
 		npf_nat_destroy(nt);
@@ -709,12 +710,12 @@
 	/* Perform the translation. */
 	error = npf_nat_translate(npc, nbuf, nt, forw);
 out:
-	if (__predict_false(nse)) {
+	if (__predict_false(ncon)) {
 		if (error) {
 			/* It created for NAT - just expire. */
-			npf_session_expire(nse);
+			npf_conn_expire(ncon);
 		}
-		npf_session_release(nse);
+		npf_conn_release(ncon);
 	}
 	return error;
 }
@@ -752,7 +753,7 @@
 }
 
 /*
- * npf_nat_destroy: destroy NAT structure (performed on session expiration).
+ * npf_nat_destroy: destroy NAT structure (performed on connection expiration).
  */
 void
 npf_nat_destroy(npf_nat_t *nt)
@@ -777,7 +778,7 @@
  * npf_nat_save: construct NAT entry and reference to the NAT policy.
  */
 int
-npf_nat_save(prop_dictionary_t sedict, prop_array_t natlist, npf_nat_t *nt)
+npf_nat_save(prop_dictionary_t condict, prop_array_t natlist, npf_nat_t *nt)
 {
 	npf_natpolicy_t *np = nt->nt_natpolicy;
 	prop_object_iterator_t it;
@@ -787,7 +788,7 @@
 
 	/* Set NAT entry data. */
 	nd = prop_data_create_data(nt, sizeof(npf_nat_t));
-	prop_dictionary_set(sedict, "nat-data", nd);
+	prop_dictionary_set(condict, "nat-data", nd);
 	prop_object_release(nd);
 
 	/* Find or create a NAT policy. */
@@ -811,7 +812,7 @@
 		prop_array_add(natlist, npdict);
 		prop_object_release(npdict);
 	}
-	prop_dictionary_set(sedict, "nat-policy", npdict);
+	prop_dictionary_set(condict, "nat-policy", npdict);
 	prop_object_release(npdict);
 	return 0;
 }
@@ -822,7 +823,7 @@
  * => Caller should lock the active NAT ruleset.
  */
 npf_nat_t *
-npf_nat_restore(prop_dictionary_t sedict, npf_session_t *se)
+npf_nat_restore(prop_dictionary_t condict, npf_conn_t *con)
 {
 	const npf_natpolicy_t *onp;
 	const npf_nat_t *ntraw;
@@ -832,7 +833,7 @@
 	npf_nat_t *nt;
 
 	/* Get raw NAT entry. */
-	obj = prop_dictionary_get(sedict, "nat-data");
+	obj = prop_dictionary_get(condict, "nat-data");
 	ntraw = prop_data_data_nocopy(obj);
 	if (ntraw == NULL || prop_data_size(obj) != sizeof(npf_nat_t)) {
 		return NULL;
@@ -840,7 +841,7 @@
 
 	/* Find a stored NAT policy information. */
 	obj = prop_dictionary_get(
-	    prop_dictionary_get(sedict, "nat-policy"), "nat-policy-data");
+	    prop_dictionary_get(condict, "nat-policy"), "nat-policy-data");
 	onp = prop_data_data_nocopy(obj);
 	if (onp == NULL || prop_data_size(obj) != sizeof(npf_natpolicy_t)) {
 		return NULL;
@@ -869,7 +870,7 @@
 	memcpy(nt, ntraw, sizeof(npf_nat_t));
 	LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
 	nt->nt_natpolicy = np;
-	nt->nt_session = se;
+	nt->nt_conn = con;
 	nt->nt_alg = NULL;
 	return nt;
 }
--- a/sys/net/npf/npf_session.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_session.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_session.c,v 1.32 2014/05/14 20:35:27 rmind Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.33 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -92,7 +92,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.32 2014/05/14 20:35:27 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.33 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -114,6 +114,31 @@
 #include <sys/systm.h>
 
 #include "npf_impl.h"
+#include "npf_conn.h"
+
+#define	npf_session_t		npf_conn_t
+#define	npf_session		npf_conn
+#define	npf_sehash_t		npf_conndb_t
+
+#define	npf_session_sysinit	npf_conn_sysinit
+#define	npf_session_sysfini	npf_conn_sysfini
+#define	npf_session_tracking	npf_conn_tracking
+#define	npf_session_lookup	npf_conn_lookup
+#define	npf_session_inspect	npf_conn_inspect
+#define	npf_session_release	npf_conn_release
+#define	npf_session_establish	npf_conn_establish
+#define	npf_session_setnat	npf_conn_setnat
+#define	npf_session_expire	npf_conn_expire
+#define	npf_session_pass	npf_conn_pass
+#define	npf_session_setpass	npf_conn_setpass
+#define	npf_session_release	npf_conn_release
+#define	npf_session_retnat	npf_conn_retnat
+#define	npf_session_load	npf_conn_load
+#define	npf_session_save	npf_conn_save
+#define	npf_session_restore	npf_conn_restore
+#define	sess_htable_create	npf_conndb_create
+#define	sess_htable_destroy	npf_conndb_destroy
+#define	npf_alg_session		npf_alg_conn
 
 /*
  * Session structures: entry for embedding and the main structure.
@@ -140,7 +165,7 @@
 	uint16_t		se_dst_id;
 } npf_sentry_t;
 
-struct npf_session {
+struct npf_conn {
 	/* Session "forwards" and "backwards" entries. */
 	npf_sentry_t		s_forw_entry;
 	npf_sentry_t		s_back_entry;
@@ -154,6 +179,7 @@
 	} s_common_id;
 	/* Flags and the protocol state. */
 	u_int			s_flags;
+	kmutex_t		s_lock;
 	npf_state_t		s_state;
 	/* Association of rule procedure data. */
 	npf_rproc_t *		s_rproc;
@@ -168,7 +194,7 @@
 
 LIST_HEAD(npf_sesslist, npf_session);
 
-struct npf_sehash {
+struct npf_conndb {
 	rb_tree_t		sh_tree;
 	struct npf_sesslist	sh_list;
 	krwlock_t		sh_lock;
@@ -589,12 +615,14 @@
 	}
 
 	/* Inspect the protocol data and handle state changes. */
+	mutex_enter(&se->s_lock);
 	if (!npf_state_inspect(npc, nbuf, &se->s_state, forw)) {
 		/* Invalid: let the rules deal with it. */
 		npf_session_release(se);
 		npf_stats_inc(NPF_STAT_INVALID_STATE);
 		se = NULL;
 	}
+	mutex_exit(&se->s_lock);
 	return se;
 }
 
@@ -628,6 +656,7 @@
 	npf_stats_inc(NPF_STAT_SESSION_CREATE);
 
 	/* Reference count and flags (indicate direction). */
+	mutex_init(&se->s_lock, MUTEX_DEFAULT, IPL_SOFTNET);
 	se->s_refcnt = 1;
 	se->s_flags = (di & PFIL_ALL);
 	se->s_rproc = NULL;
@@ -714,6 +743,7 @@
 
 	/* Destroy the state. */
 	npf_state_destroy(&se->s_state);
+	mutex_destroy(&se->s_lock);
 
 	/* Free the structure, increase the counter. */
 	pool_cache_put(sess_cache, se);
@@ -726,7 +756,8 @@
  * and re-insert session entry accordingly.
  */
 int
-npf_session_setnat(npf_session_t *se, npf_nat_t *nt, u_int ntype)
+npf_session_setnat(const npf_cache_t *npc,
+    npf_session_t *se, npf_nat_t *nt, u_int ntype)
 {
 	npf_sehash_t *sh;
 	npf_sentry_t *sen;
@@ -1080,7 +1111,6 @@
 	npf_sehash_t *fsh, *bsh;
 	npf_sentry_t *fw, *bk;
 	prop_object_t obj;
-	npf_state_t *nst;
 	const void *d;
 	int error = 0;
 
@@ -1097,12 +1127,10 @@
 	 */
 	se = pool_cache_get(sess_cache, PR_WAITOK);
 	memcpy(se, d, sizeof(npf_session_t));
+	mutex_init(&se->s_lock, MUTEX_DEFAULT, IPL_SOFTNET);
 	se->s_refcnt = 0;
 	se->s_rproc = NULL;
 
-	nst = &se->s_state;
-	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
-
 	/*
 	 * Reconstruct NAT association, if any, or return NULL.
 	 * Warning: must not leave stale entry.
@@ -1144,6 +1172,8 @@
 
 #if defined(DDB) || defined(_NPF_TESTING)
 
+void npf_sessions_dump(void);
+
 void
 npf_sessions_dump(void)
 {
--- a/sys/net/npf/npf_state.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_state.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state.c,v 1.15 2013/11/04 22:17:21 rmind Exp $	*/
+/*	$NetBSD: npf_state.c,v 1.16 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -30,11 +30,11 @@
  */
 
 /*
- * NPF state engine to track sessions.
+ * NPF state engine to track connection.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.15 2013/11/04 22:17:21 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.16 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -44,34 +44,34 @@
 #include "npf_impl.h"
 
 /*
- * Generic session states and timeout table.
+ * Generic connection states and timeout table.
  *
  * Note: used for connection-less protocols.
  */
 
-#define	NPF_ANY_SESSION_CLOSED		0
-#define	NPF_ANY_SESSION_NEW		1
-#define	NPF_ANY_SESSION_ESTABLISHED	2
-#define	NPF_ANY_SESSION_NSTATES		3
+#define	NPF_ANY_CONN_CLOSED		0
+#define	NPF_ANY_CONN_NEW		1
+#define	NPF_ANY_CONN_ESTABLISHED	2
+#define	NPF_ANY_CONN_NSTATES		3
 
-static const uint8_t npf_generic_fsm[NPF_ANY_SESSION_NSTATES][2] = {
-	[NPF_ANY_SESSION_CLOSED] = {
-		[NPF_FLOW_FORW]		= NPF_ANY_SESSION_NEW,
+static const uint8_t npf_generic_fsm[NPF_ANY_CONN_NSTATES][2] = {
+	[NPF_ANY_CONN_CLOSED] = {
+		[NPF_FLOW_FORW]		= NPF_ANY_CONN_NEW,
 	},
-	[NPF_ANY_SESSION_NEW] = {
-		[NPF_FLOW_FORW]		= NPF_ANY_SESSION_NEW,
-		[NPF_FLOW_BACK]		= NPF_ANY_SESSION_ESTABLISHED,
+	[NPF_ANY_CONN_NEW] = {
+		[NPF_FLOW_FORW]		= NPF_ANY_CONN_NEW,
+		[NPF_FLOW_BACK]		= NPF_ANY_CONN_ESTABLISHED,
 	},
-	[NPF_ANY_SESSION_ESTABLISHED] = {
-		[NPF_FLOW_FORW]		= NPF_ANY_SESSION_ESTABLISHED,
-		[NPF_FLOW_BACK]		= NPF_ANY_SESSION_ESTABLISHED,
+	[NPF_ANY_CONN_ESTABLISHED] = {
+		[NPF_FLOW_FORW]		= NPF_ANY_CONN_ESTABLISHED,
+		[NPF_FLOW_BACK]		= NPF_ANY_CONN_ESTABLISHED,
 	},
 };
 
 static u_int npf_generic_timeout[] __read_mostly = {
-	[NPF_ANY_SESSION_CLOSED]	= 0,
-	[NPF_ANY_SESSION_NEW]		= 30,
-	[NPF_ANY_SESSION_ESTABLISHED]	= 60,
+	[NPF_ANY_CONN_CLOSED]		= 0,
+	[NPF_ANY_CONN_NEW]		= 30,
+	[NPF_ANY_CONN_ESTABLISHED]	= 60,
 };
 
 /*
@@ -101,7 +101,6 @@
 	KASSERT(npf_iscached(npc, NPC_LAYER4));
 
 	memset(nst, 0, sizeof(npf_state_t));
-	mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
 
 	switch (proto) {
 	case IPPROTO_TCP:
@@ -125,7 +124,6 @@
 npf_state_destroy(npf_state_t *nst)
 {
 	nst->nst_state = 0;
-	mutex_destroy(&nst->nst_lock);
 }
 
 /*
@@ -142,7 +140,6 @@
 	const int di = forw ? NPF_FLOW_FORW : NPF_FLOW_BACK;
 	bool ret;
 
-	mutex_enter(&nst->nst_lock);
 	switch (proto) {
 	case IPPROTO_TCP:
 		/* Pass to TCP state tracking engine. */
@@ -158,13 +155,12 @@
 		ret = false;
 	}
 	NPF_STATE_SAMPLE(nst, ret);
-	mutex_exit(&nst->nst_lock);
 
 	return ret;
 }
 
 /*
- * npf_state_etime: return session expiration time according to the state.
+ * npf_state_etime: return connection expiration time according to the state.
  */
 int
 npf_state_etime(const npf_state_t *nst, const int proto)
--- a/sys/net/npf/npf_state_tcp.c	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/net/npf/npf_state_tcp.c	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_state_tcp.c,v 1.13 2013/11/04 22:17:21 rmind Exp $	*/
+/*	$NetBSD: npf_state_tcp.c,v 1.14 2014/07/19 18:24:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_state_tcp.c,v 1.13 2013/11/04 22:17:21 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_state_tcp.c,v 1.14 2014/07/19 18:24:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -463,7 +463,6 @@
 	const u_int tcpfl = th->th_flags, state = nst->nst_state;
 	u_int nstate;
 
-	KASSERT(nst->nst_state == 0 || mutex_owned(&nst->nst_lock));
 	KASSERT(nst->nst_state < NPF_TCP_NSTATES);
 
 	/* Look for a transition to a new state. */
--- a/sys/rump/net/lib/libnpf/Makefile	Sat Jul 19 18:18:31 2014 +0000
+++ b/sys/rump/net/lib/libnpf/Makefile	Sat Jul 19 18:24:16 2014 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.13 2014/04/04 13:57:33 njoly Exp $
+#	$NetBSD: Makefile,v 1.14 2014/07/19 18:24:16 rmind Exp $
 #
 # Public Domain.
 #
@@ -11,9 +11,11 @@
 
 SRCS=	npf.c npf_alg.c npf_conf.c npf_ctl.c npf_handler.c
 SRCS+=	npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
-SRCS+=	npf_ruleset.c npf_rproc.c npf_sendpkt.c npf_session.c
+SRCS+=	npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c 
 SRCS+=	npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+=	npf_tableset_ptree.c npf_worker.c if_npflog.c
+SRCS+=	npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
+
+SRCS+=	if_npflog.c
 
 SRCS+=	npf_alg_icmp.c