Pull up following revision(s) (requested by rmind in ticket #1319): netbsd-7
authorsnj <snj@NetBSD.org>
Sun, 18 Dec 2016 07:40:50 +0000
branchnetbsd-7
changeset 255052 76aff522e4c6
parent 255051 fe0791ca9de9
child 255053 8bcf13933b38
Pull up following revision(s) (requested by rmind in ticket #1319): sys/modules/npf/Makefile: revision 1.19 sys/net/npf/files.npf: revision 1.18 sys/net/npf/lpm.c: revision 1.1 sys/net/npf/lpm.h: revision 1.1 sys/net/npf/npf_impl.h: revision 1.62 sys/net/npf/npf_tableset.c: revision 1.24 sys/net/npf/npf_tableset_ptree.c: file removal sys/rump/net/lib/libnpf/Makefile: revision 1.18 This patches ditches the ptree(3) library, because it is broken (you can get missing entries!). Instead, as a temporary solution, we switch to a simple linear scan of the hash tables for the longest-prefix-match (lpm.c lpm.h) algorithm. In fact, with few unique prefixes in the set, on modern hardware this simple algorithm is pretty fast anyway! -- ditch ptree and use lpm -- remove ptree add lpm
sys/modules/npf/Makefile
sys/net/npf/files.npf
sys/net/npf/lpm.c
sys/net/npf/lpm.h
sys/net/npf/npf_impl.h
sys/net/npf/npf_tableset.c
sys/net/npf/npf_tableset_ptree.c
sys/rump/net/lib/libnpf/Makefile
--- a/sys/modules/npf/Makefile	Sun Dec 18 07:12:06 2016 +0000
+++ b/sys/modules/npf/Makefile	Sun Dec 18 07:40:50 2016 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.17 2014/07/19 18:24:17 rmind Exp $
+# $NetBSD: Makefile,v 1.17.2.1 2016/12/18 07:40:50 snj Exp $
 #
 # Public Domain.
 #
@@ -13,7 +13,7 @@
 SRCS+=		npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
 SRCS+=		npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c
 SRCS+=		npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+=		npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
+SRCS+=		lpm.c npf_sendpkt.c npf_worker.c
 
 CPPFLAGS+=	-DINET6
 
--- a/sys/net/npf/files.npf	Sun Dec 18 07:12:06 2016 +0000
+++ b/sys/net/npf/files.npf	Sun Dec 18 07:40:50 2016 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: files.npf,v 1.17.2.1 2016/12/18 07:40:50 snj Exp $
 #
 # Public Domain.
 #
@@ -19,7 +19,6 @@
 file	net/npf/npf_ruleset.c			npf
 file	net/npf/npf_rproc.c			npf
 file	net/npf/npf_tableset.c			npf
-file	net/npf/npf_tableset_ptree.c		npf
 file	net/npf/npf_if.c			npf
 file	net/npf/npf_inet.c			npf
 file	net/npf/npf_conn.c			npf
@@ -31,6 +30,9 @@
 file	net/npf/npf_sendpkt.c			npf
 file	net/npf/npf_worker.c			npf
 
+# LPM
+file	net/npf/lpm.c				npf
+
 # Built-in extensions.
 file	net/npf/npf_ext_log.c			npf
 file	net/npf/npf_ext_normalize.c		npf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/lpm.c	Sun Dec 18 07:40:50 2016 +0000
@@ -0,0 +1,401 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * TODO: Simple linear scan for now (works just well with a few prefixes).
+ * TBD on a better algorithm.
+ */
+
+#if defined(_KERNEL)
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.1.2.2 2016/12/18 07:40:50 snj Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#define kmem_alloc(a, b) malloc(a)
+#define kmem_free(a, b) free(a)
+#define kmem_zalloc(a, b) calloc(a, 1)
+#endif
+
+#include "lpm.h"
+
+#define	LPM_MAX_PREFIX		(128)
+#define	LPM_MAX_WORDS		(LPM_MAX_PREFIX >> 5)
+#define	LPM_TO_WORDS(x)		((x) >> 2)
+#define	LPM_HASH_STEP		(8)
+
+#ifdef DEBUG
+#define	ASSERT	assert
+#else
+#define	ASSERT
+#endif
+
+typedef struct lpm_ent {
+	struct lpm_ent *next;
+	void *		val;
+	unsigned	len;
+	uint8_t		key[];
+} lpm_ent_t;
+
+typedef struct {
+	uint32_t	hashsize;
+	uint32_t	nitems;
+	lpm_ent_t **bucket;
+} lpm_hmap_t;
+
+struct lpm {
+	uint32_t	bitmask[LPM_MAX_WORDS];
+	void *		defval;
+	lpm_hmap_t	prefix[LPM_MAX_PREFIX + 1];
+};
+
+lpm_t *
+lpm_create(void)
+{
+	return kmem_zalloc(sizeof(lpm_t), KM_SLEEP);
+}
+
+void
+lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg)
+{
+	for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) {
+		lpm_hmap_t *hmap = &lpm->prefix[n];
+
+		if (!hmap->hashsize) {
+			KASSERT(!hmap->bucket);
+			continue;
+		}
+		for (unsigned i = 0; i < hmap->hashsize; i++) {
+			lpm_ent_t *entry = hmap->bucket[i];
+
+			while (entry) {
+				lpm_ent_t *next = entry->next;
+
+				if (dtor) {
+					dtor(arg, entry->key,
+					    entry->len, entry->val);
+				}
+				kmem_free(entry, 
+				    offsetof(lpm_ent_t, key[entry->len]));
+				entry = next;
+			}
+		}
+		kmem_free(hmap->bucket, hmap->hashsize);
+		hmap->bucket = NULL;
+		hmap->hashsize = 0;
+		hmap->nitems = 0;
+	}
+	memset(lpm->bitmask, 0, sizeof(lpm->bitmask));
+	lpm->defval = NULL;
+}
+
+void
+lpm_destroy(lpm_t *lpm)
+{
+	lpm_clear(lpm, NULL, NULL);
+	kmem_free(lpm, sizeof(*lpm));
+}
+
+/*
+ * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant).
+ */
+static uint32_t
+fnv1a_hash(const void *buf, size_t len)
+{
+	uint32_t hash = 2166136261UL;
+	const uint8_t *p = buf;
+
+	while (len--) {
+		hash ^= *p++;
+		hash *= 16777619U;
+	}
+	return hash;
+}
+
+static bool
+hashmap_rehash(lpm_hmap_t *hmap, uint32_t size)
+{
+	lpm_ent_t **bucket;
+	uint32_t hashsize;
+
+	for (hashsize = 1; hashsize < size; hashsize <<= 1) {
+		continue;
+	}
+	bucket = kmem_zalloc(hashsize * sizeof(*bucket), KM_SLEEP);
+	if (bucket == NULL)
+		return false;
+	for (unsigned n = 0; n < hmap->hashsize; n++) {
+		lpm_ent_t *list = hmap->bucket[n];
+
+		while (list) {
+			lpm_ent_t *entry = list;
+			uint32_t hash = fnv1a_hash(entry->key, entry->len);
+			const size_t i = hash & (hashsize - 1);
+
+			list = entry->next;
+			entry->next = bucket[i];
+			bucket[i] = entry;
+		}
+	}
+	if (hmap->bucket)
+		kmem_free(hmap->bucket, hmap->hashsize);
+	hmap->bucket = bucket;
+	hmap->hashsize = hashsize;
+	return true;
+}
+
+static lpm_ent_t *
+hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t target = hmap->nitems + LPM_HASH_STEP;
+	const size_t entlen = offsetof(lpm_ent_t, key[len]);
+	uint32_t hash, i;
+	lpm_ent_t *entry;
+
+	if (hmap->hashsize < target && !hashmap_rehash(hmap, target)) {
+		return NULL;
+	}
+
+	hash = fnv1a_hash(key, len);
+	i = hash & (hmap->hashsize - 1);
+	entry = hmap->bucket[i];
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			return entry;
+		}
+		entry = entry->next;
+	}
+
+	if ((entry = kmem_alloc(entlen, KM_SLEEP)) == NULL)
+		return NULL;
+
+	memcpy(entry->key, key, len);
+	entry->next = hmap->bucket[i];
+	entry->len = len;
+
+	hmap->bucket[i] = entry;
+	hmap->nitems++;
+	return entry;
+}
+
+static lpm_ent_t *
+hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t hash = fnv1a_hash(key, len);
+	const uint32_t i = hash & (hmap->hashsize - 1);
+	lpm_ent_t *entry = hmap->bucket[i];
+
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			return entry;
+		}
+		entry = entry->next;
+	}
+	return NULL;
+}
+
+static int
+hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t hash = fnv1a_hash(key, len);
+	const uint32_t i = hash & (hmap->hashsize - 1);
+	lpm_ent_t *prev = NULL, *entry = hmap->bucket[i];
+
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			if (prev) {
+				prev->next = entry->next;
+			} else {
+				hmap->bucket[i] = entry->next;
+			}
+			free(entry, M_TEMP);
+			return 0;
+		}
+		prev = entry;
+		entry = entry->next;
+	}
+	return -1;
+}
+
+/*
+ * compute_prefix: given the address and prefix length, compute and
+ * return the address prefix.
+ */
+static inline void
+compute_prefix(const unsigned nwords, const uint32_t *addr,
+    unsigned preflen, uint32_t *prefix)
+{
+	uint32_t addr2[4];
+
+	if ((uintptr_t)addr & 3) {
+		/* Unaligned address: just copy for now. */
+		memcpy(addr2, addr, nwords * 4);
+		addr = addr2;
+	}
+	for (unsigned i = 0; i < nwords; i++) {
+		if (preflen == 0) {
+			prefix[i] = 0;
+			continue;
+		}
+		if (preflen < 32) {
+			uint32_t mask = htonl(0xffffffff << (32 - preflen));
+			prefix[i] = addr[i] & mask;
+			preflen = 0;
+		} else {
+			prefix[i] = addr[i];
+			preflen -= 32;
+		}
+	}
+}
+
+/*
+ * lpm_insert: insert the CIDR into the LPM table.
+ *
+ * => Returns zero on success and -1 on failure.
+ */
+int
+lpm_insert(lpm_t *lpm, const void *addr,
+    size_t len, unsigned preflen, void *val)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	uint32_t prefix[LPM_MAX_WORDS];
+	lpm_ent_t *entry;
+
+	if (preflen == 0) {
+		/* Default is a special case. */
+		lpm->defval = val;
+		return 0;
+	}
+	compute_prefix(nwords, addr, preflen, prefix);
+	entry = hashmap_insert(&lpm->prefix[preflen], prefix, len);
+	if (entry) {
+		const unsigned n = --preflen >> 5;
+		lpm->bitmask[n] |= 0x80000000U >> (preflen & 31);
+		entry->val = val;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * lpm_remove: remove the specified prefix.
+ */
+int
+lpm_remove(lpm_t *lpm, const void *addr, size_t len, unsigned preflen)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	uint32_t prefix[LPM_MAX_WORDS];
+
+	if (preflen == 0) {
+		lpm->defval = NULL;
+		return 0;
+	}
+	compute_prefix(nwords, addr, preflen, prefix);
+	return hashmap_remove(&lpm->prefix[preflen], prefix, len);
+}
+
+/*
+ * lpm_lookup: find the longest matching prefix given the IP address.
+ *
+ * => Returns the associated value on success or NULL on failure.
+ */
+void *
+lpm_lookup(lpm_t *lpm, const void *addr, size_t len)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	unsigned i, n = nwords;
+	uint32_t prefix[LPM_MAX_WORDS];
+
+	while (n--) {
+		uint32_t bitmask = lpm->bitmask[n];
+
+		while ((i = ffs(bitmask)) != 0) {
+			const unsigned preflen = (32 * n) + (32 - --i);
+			lpm_hmap_t *hmap = &lpm->prefix[preflen];
+			lpm_ent_t *entry;
+
+			compute_prefix(nwords, addr, preflen, prefix);
+			entry = hashmap_lookup(hmap, prefix, len);
+			if (entry) {
+				return entry->val;
+			}
+			bitmask &= ~(1U << i);
+		}
+	}
+	return lpm->defval;
+}
+
+#if !defined(_KERNEL)
+/*
+ * lpm_strtobin: convert CIDR string to the binary IP address and mask.
+ *
+ * => The address will be in the network byte order.
+ * => Returns 0 on success or -1 on failure.
+ */
+int
+lpm_strtobin(const char *cidr, void *addr, size_t *len, unsigned *preflen)
+{
+	char *p, buf[INET6_ADDRSTRLEN];
+
+	strncpy(buf, cidr, sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	if ((p = strchr(buf, '/')) != NULL) {
+		const ptrdiff_t off = p - buf;
+		*preflen = atoi(&buf[off + 1]);
+		buf[off] = '\0';
+	} else {
+		*preflen = LPM_MAX_PREFIX;
+	}
+
+	if (inet_pton(AF_INET6, buf, addr) == 1) {
+		*len = 16;
+		return 0;
+	}
+	if (inet_pton(AF_INET, buf, addr) == 1) {
+		if (*preflen == LPM_MAX_PREFIX) {
+			*preflen = 32;
+		}
+		*len = 4;
+		return 0;
+	}
+	return -1;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/lpm.h	Sun Dec 18 07:40:50 2016 +0000
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LPM_H_
+#define _LPM_H_
+
+__BEGIN_DECLS
+
+typedef struct lpm lpm_t;
+typedef void (*lpm_dtor_t)(void *, const void *, size_t, void *);
+
+lpm_t *		lpm_create(void);
+void		lpm_destroy(lpm_t *);
+void		lpm_clear(lpm_t *, lpm_dtor_t, void *);
+
+int		lpm_insert(lpm_t *, const void *, size_t, unsigned, void *);
+int		lpm_remove(lpm_t *, const void *, size_t, unsigned);
+void *		lpm_lookup(lpm_t *, const void *, size_t);
+int		lpm_strtobin(const char *, void *, size_t *, unsigned *);
+
+__END_DECLS
+
+#endif
--- a/sys/net/npf/npf_impl.h	Sun Dec 18 07:12:06 2016 +0000
+++ b/sys/net/npf/npf_impl.h	Sun Dec 18 07:40:50 2016 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.58.2.3 2015/02/04 07:13:04 snj Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.58.2.4 2016/12/18 07:40:50 snj Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -49,7 +49,6 @@
 
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/ptree.h>
 
 #include <net/bpf.h>
 #include <net/bpfjit.h>
@@ -228,8 +227,6 @@
 void		npf_tableset_sysinit(void);
 void		npf_tableset_sysfini(void);
 
-extern const pt_tree_ops_t npf_table_ptree_ops;
-
 npf_tableset_t *npf_tableset_create(u_int);
 void		npf_tableset_destroy(npf_tableset_t *);
 int		npf_tableset_insert(npf_tableset_t *, npf_table_t *);
--- a/sys/net/npf/npf_tableset.c	Sun Dec 18 07:12:06 2016 +0000
+++ b/sys/net/npf/npf_tableset.c	Sun Dec 18 07:40:50 2016 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $	*/
+/*	$NetBSD: npf_tableset.c,v 1.22.2.1 2016/12/18 07:40:50 snj Exp $	*/
 
 /*-
- * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.22.2.1 2016/12/18 07:40:50 snj Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -58,13 +58,12 @@
 #include <sys/types.h>
 
 #include "npf_impl.h"
+#include "lpm.h"
 
 typedef struct npf_tblent {
-	union {
-		LIST_ENTRY(npf_tblent) te_hashent;
-		pt_node_t	te_node;
-	} /* C11 */;
-	int			te_alen;
+	LIST_ENTRY(npf_tblent)	te_listent;
+	uint16_t		te_preflen;
+	uint16_t		te_alen;
 	npf_addr_t		te_addr;
 } npf_tblent_t;
 
@@ -81,7 +80,8 @@
 			u_long		t_hashmask;
 		};
 		struct {
-			pt_tree_t	t_tree[2];
+			lpm_t *		t_lpm;
+			LIST_HEAD(, npf_tblent) t_list;
 		};
 		struct {
 			void *		t_blob;
@@ -294,7 +294,7 @@
 	 * Lookup the hash table and check for duplicates.
 	 * Note: mask is ignored for the hash storage.
 	 */
-	LIST_FOREACH(ent, htbl, te_hashent) {
+	LIST_FOREACH(ent, htbl, te_listent) {
 		if (ent->te_alen != alen) {
 			continue;
 		}
@@ -307,27 +307,28 @@
 }
 
 static void
-table_hash_destroy(npf_table_t *t)
+table_hash_flush(npf_table_t *t)
 {
 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
 		npf_tblent_t *ent;
 
 		while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
-			LIST_REMOVE(ent, te_hashent);
+			LIST_REMOVE(ent, te_listent);
 			pool_cache_put(tblent_cache, ent);
 		}
 	}
 }
 
 static void
-table_tree_destroy(pt_tree_t *tree)
+table_tree_flush(npf_table_t *t)
 {
 	npf_tblent_t *ent;
 
-	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
-		ptree_remove_node(tree, ent);
+	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
+		LIST_REMOVE(ent, te_listent);
 		pool_cache_put(tblent_cache, ent);
 	}
+	lpm_clear(t->t_lpm, NULL, NULL);
 }
 
 /*
@@ -339,35 +340,27 @@
 {
 	npf_table_t *t;
 
-	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
+	t = kmem_zalloc(sizeof(*t), KM_SLEEP);
 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
 
 	switch (type) {
 	case NPF_TABLE_TREE:
-		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
-		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
-		    offsetof(npf_tblent_t, te_node),
-		    offsetof(npf_tblent_t, te_addr));
-		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
-		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
-		    offsetof(npf_tblent_t, te_node),
-		    offsetof(npf_tblent_t, te_addr));
+		if ((t->t_lpm = lpm_create()) == NULL)
+			goto out;
+		LIST_INIT(&t->t_list);
 		break;
 	case NPF_TABLE_HASH:
 		t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
-		if (t->t_hashl == NULL) {
-			kmem_free(t, sizeof(npf_table_t));
-			return NULL;
-		}
+		if (t->t_hashl == NULL)
+			goto out;
 		break;
 	case NPF_TABLE_CDB:
 		t->t_blob = blob;
 		t->t_bsize = size;
 		t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
 		if (t->t_cdb == NULL) {
-			kmem_free(t, sizeof(npf_table_t));
 			free(blob, M_TEMP);
-			return NULL;
+			goto out;
 		}
 		t->t_nitems = cdbr_entries(t->t_cdb);
 		break;
@@ -379,6 +372,10 @@
 	t->t_id = tid;
 
 	return t;
+out:
+	kmem_free(t, sizeof(*t));
+	return NULL;
+	
 }
 
 /*
@@ -391,12 +388,12 @@
 
 	switch (t->t_type) {
 	case NPF_TABLE_HASH:
-		table_hash_destroy(t);
+		table_hash_flush(t);
 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
 		break;
 	case NPF_TABLE_TREE:
-		table_tree_destroy(&t->t_tree[0]);
-		table_tree_destroy(&t->t_tree[1]);
+		table_tree_flush(t);
+		lpm_destroy(t->t_lpm);
 		break;
 	case NPF_TABLE_CDB:
 		cdbr_close(t->t_cdb);
@@ -406,7 +403,7 @@
 		KASSERT(false);
 	}
 	rw_destroy(&t->t_lock);
-	kmem_free(t, sizeof(npf_table_t));
+	kmem_free(t, sizeof(*t));
 }
 
 /*
@@ -494,7 +491,7 @@
 			break;
 		}
 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
-			LIST_INSERT_HEAD(htbl, ent, te_hashent);
+			LIST_INSERT_HEAD(htbl, ent, te_listent);
 			t->t_nitems++;
 		} else {
 			error = EEXIST;
@@ -502,16 +499,12 @@
 		break;
 	}
 	case NPF_TABLE_TREE: {
-		pt_tree_t *tree = &t->t_tree[aidx];
-		bool ok;
-
-		/*
-		 * If no mask specified, use maximum mask.
-		 */
-		ok = (mask != NPF_NO_NETMASK) ?
-		    ptree_insert_mask_node(tree, ent, mask) :
-		    ptree_insert_node(tree, ent);
-		if (ok) {
+		const unsigned preflen =
+		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
+		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
+		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
+			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
+			ent->te_preflen = preflen;
 			t->t_nitems++;
 			error = 0;
 		} else {
@@ -556,17 +549,17 @@
 
 		ent = table_hash_lookup(t, addr, alen, &htbl);
 		if (__predict_true(ent != NULL)) {
-			LIST_REMOVE(ent, te_hashent);
+			LIST_REMOVE(ent, te_listent);
 			t->t_nitems--;
 		}
 		break;
 	}
 	case NPF_TABLE_TREE: {
-		pt_tree_t *tree = &t->t_tree[aidx];
-
-		ent = ptree_find_node(tree, addr);
+		ent = lpm_lookup(t->t_lpm, addr, alen);
 		if (__predict_true(ent != NULL)) {
-			ptree_remove_node(tree, ent);
+			LIST_REMOVE(ent, te_listent);
+			lpm_remove(t->t_lpm, &ent->te_addr,
+			    ent->te_alen, ent->te_preflen);
 			t->t_nitems--;
 		}
 		break;
@@ -611,7 +604,7 @@
 		break;
 	case NPF_TABLE_TREE:
 		rw_enter(&t->t_lock, RW_READER);
-		found = ptree_find_node(&t->t_tree[aidx], addr) != NULL;
+		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
 		rw_exit(&t->t_lock);
 		break;
 	case NPF_TABLE_CDB:
@@ -655,7 +648,7 @@
 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
 		npf_tblent_t *ent;
 
-		LIST_FOREACH(ent, &t->t_hashl[n], te_hashent) {
+		LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
 			error = table_ent_copyout(&ent->te_addr,
 			    ent->te_alen, 0, ubuf, len, &off);
 			if (error)
@@ -666,20 +659,15 @@
 }
 
 static int
-table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
-    size_t len, size_t *off)
+table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
 {
-	npf_tblent_t *ent = NULL;
+	npf_tblent_t *ent;
+	size_t off = 0;
 	int error = 0;
 
-	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
-		pt_bitlen_t blen;
-
-		if (!ptree_mask_node_p(tree, ent, &blen)) {
-			blen = maxmask;
-		}
-		error = table_ent_copyout(&ent->te_addr, ent->te_alen,
-		    blen, ubuf, len, off);
+	LIST_FOREACH(ent, &t->t_list, te_listent) {
+		error = table_ent_copyout(&ent->te_addr,
+		    ent->te_alen, 0, ubuf, len, &off);
 		if (error)
 			break;
 	}
@@ -710,7 +698,6 @@
 int
 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
 {
-	size_t off = 0;
 	int error = 0;
 
 	rw_enter(&t->t_lock, RW_READER);
@@ -719,10 +706,7 @@
 		error = table_hash_list(t, ubuf, len);
 		break;
 	case NPF_TABLE_TREE:
-		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
-		if (error)
-			break;
-		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
+		error = table_tree_list(t, ubuf, len);
 		break;
 	case NPF_TABLE_CDB:
 		error = table_cdb_list(t, ubuf, len);
@@ -746,12 +730,11 @@
 	rw_enter(&t->t_lock, RW_WRITER);
 	switch (t->t_type) {
 	case NPF_TABLE_HASH:
-		table_hash_destroy(t);
+		table_hash_flush(t);
 		t->t_nitems = 0;
 		break;
 	case NPF_TABLE_TREE:
-		table_tree_destroy(&t->t_tree[0]);
-		table_tree_destroy(&t->t_tree[1]);
+		table_tree_flush(t);
 		t->t_nitems = 0;
 		break;
 	case NPF_TABLE_CDB:
--- a/sys/net/npf/npf_tableset_ptree.c	Sun Dec 18 07:12:06 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-/*	$NetBSD: npf_tableset_ptree.c,v 1.1 2012/07/15 00:23:01 rmind Exp $	*/
-
-/*-
- * Copyright (c) 2012 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Matt Thomas <matt@3am-software.com>.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Patricia/RADIX tree comparators for NPF tables.
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_tableset_ptree.c,v 1.1 2012/07/15 00:23:01 rmind Exp $");
-
-#include <sys/param.h>
-#include <sys/types.h>
-
-#include <sys/bitops.h>
-#include <sys/ptree.h>
-
-#include "npf_impl.h"
-
-#define	DIV32		5
-
-static pt_slot_t
-npf_ptree_testkey(const void *vkey, pt_bitoff_t bitoff,
-    pt_bitlen_t bitlen, void *ctx)
-{
-	const uint32_t *addr = (const uint32_t *)vkey;
-
-	KASSERT(bitoff < 32 * (const uintptr_t)ctx);
-	KASSERT(bitlen == 1);
-
-	const uint32_t bits = ntohl(addr[bitoff >> DIV32]);
-	const uint32_t mask = 0x80000000U >> (bitoff & 31);
-	return (bits & mask) ? PT_SLOT_RIGHT : PT_SLOT_LEFT;
-}
-
-static bool
-npf_ptree_matchkey(const void *vleft, const void *vright,
-    pt_bitoff_t bitoff, pt_bitlen_t bitlen, void *ctx)
-{
-	const uint32_t *left = (const uint32_t *)vleft;
-	const uint32_t *right = (const uint32_t *)vright;
-	const u_int nwords = (const uintptr_t)ctx;
-	size_t i = bitoff >> DIV32;
-
-	/* Constrain bitlen to a reasonable value. */
-	if (nwords * 32 < bitoff + bitlen || nwords * 32 < bitlen) {
-		bitlen = nwords * 32 - bitoff;
-	}
-
-	/* Find the first word from the offset. */
-	left += i;
-	right += i;
-	bitoff -= i * 32;
-
-	for (; i < nwords; i++, left++, right++, bitoff = 0) {
-		const uint32_t bits = ntohl(*left ^ *right);
-		const signed int xbitlen = 32 - (bitoff + bitlen);
-		uint32_t mask = UINT32_MAX >> bitoff;
-
-		/*
-		 * We have the mask up to the lowest bit.  Overlap with the
-		 * mask up to required lowest bit, if extacting the middle.
-		 */
-		KASSERT((size_t)bitoff < 32);
-		if (xbitlen > 0) {
-			mask &= UINT32_MAX << xbitlen;
-		}
-
-		/* Compare the masked part of the word. */
-		if (bits & mask) {
-			return false;
-		}
-
-		/* Done if extracting the middle or exactly up to the LSB. */
-		if (xbitlen >= 0) {
-			break;
-		}
-		bitlen = -xbitlen;
-	}
-
-	return true;
-}
-
-static bool
-npf_ptree_matchnode(const void *vleft, const void *vright,
-    pt_bitoff_t maxbitoff, pt_bitoff_t *bitoffp, pt_bitoff_t *slotp, void *ctx)
-{
-	static const uint32_t zeroes[4] = { 0, 0, 0, 0 };
-	const uint32_t *left = (const uint32_t *)vleft;
-	const uint32_t *right = vright ? (const uint32_t *)vright : zeroes;
-	pt_bitoff_t bitoff = *bitoffp;
-	const u_int nwords = (const uintptr_t)ctx;
-	size_t i = bitoff >> DIV32;
-
-	/* Constrain maxbitoff & bitlen to reasonable value. */
-	if (maxbitoff > nwords * 32) {
-		maxbitoff = nwords * 32;
-	}
-	pt_bitoff_t bitlen = maxbitoff - bitoff;
-
-	/* Find the first word from the offset. */
-	*slotp = PT_SLOT_LEFT;
-	left += i;
-	right += i;
-	bitoff -= i * 32;
-
-	for (; i < nwords; i++, left++, right++, bitoff = 0) {
-		const signed int xbitlen = 32 - (bitoff + bitlen);
-		uint32_t bits = ntohl(*left ^ *right);
-		uint32_t mask = UINT32_MAX >> bitoff;
-
-		KASSERT((size_t)bitoff < 32);
-		if (xbitlen > 0) {
-			mask &= UINT32_MAX << xbitlen;
-		}
-
-		/* Compare the masked part of the word. */
-		bits &= mask;
-		if (bits) {
-			/*
-			 * Did not match.  Find the bit where the difference
-			 * occured.  Also, determine the slot.
-			 */
-			bitoff = 32 * i + (32 - fls32(bits));
-
-			KASSERT(bitoff < nwords * 32);
-			KASSERT(bitoff >= *bitoffp);
-			KASSERT(bitoff <= maxbitoff);
-
-			*bitoffp = bitoff;
-			if ((ntohl(*left) >> (31 - bitoff)) & 1) {
-				*slotp = PT_SLOT_RIGHT;
-			}
-
-			KASSERT(npf_ptree_testkey(vleft, bitoff, 1, ctx)
-			    == *slotp);
-			return false;
-		}
-		if (xbitlen >= 0) {
-			i++;
-			break;
-		}
-		bitlen = -xbitlen;
-	}
-
-	bitoff = 32 * i;
-	*bitoffp = bitoff < maxbitoff ? bitoff : maxbitoff;
-	return true;
-}
-
-const pt_tree_ops_t npf_table_ptree_ops = {
-	.ptto_matchnode	= npf_ptree_matchnode,
-	.ptto_matchkey	= npf_ptree_matchkey,
-	.ptto_testnode	= npf_ptree_testkey,
-	.ptto_testkey	= npf_ptree_testkey,
-};
--- a/sys/rump/net/lib/libnpf/Makefile	Sun Dec 18 07:12:06 2016 +0000
+++ b/sys/rump/net/lib/libnpf/Makefile	Sun Dec 18 07:40:50 2016 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.14 2014/07/19 18:24:16 rmind Exp $
+#	$NetBSD: Makefile,v 1.14.2.1 2016/12/18 07:40:50 snj Exp $
 #
 # Public Domain.
 #
@@ -13,7 +13,7 @@
 SRCS+=	npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
 SRCS+=	npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c 
 SRCS+=	npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+=	npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
+SRCS+=	lpm.c npf_sendpkt.c npf_worker.c
 
 SRCS+=	if_npflog.c