- npf_do_nat: fix a race condition and simplify the logic. trunk
authorrmind <rmind@NetBSD.org>
Wed, 04 Dec 2013 01:38:49 +0000
branchtrunk
changeset 222926 dc7b345b2c95
parent 222925 0a6e02618209
child 222927 b5dbe27b3018
- npf_do_nat: fix a race condition and simplify the logic. - npf_session_setnat: clear the NAT association on failure.
sys/net/npf/npf_impl.h
sys/net/npf/npf_nat.c
sys/net/npf/npf_ruleset.c
sys/net/npf/npf_session.c
--- a/sys/net/npf/npf_impl.h	Wed Dec 04 01:03:15 2013 +0000
+++ b/sys/net/npf/npf_impl.h	Wed Dec 04 01:38:49 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.43 2013/11/23 19:32:20 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.44 2013/12/04 01:38:49 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -267,7 +267,7 @@
 uint64_t	npf_rule_getid(const npf_rule_t *);
 npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
 void		npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *);
-npf_rproc_t *	npf_rule_getrproc(npf_rule_t *);
+npf_rproc_t *	npf_rule_getrproc(const npf_rule_t *);
 
 void		npf_ext_sysinit(void);
 void		npf_ext_sysfini(void);
@@ -329,7 +329,7 @@
 int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
 int		npf_nat_translate(npf_cache_t *, nbuf_t *, npf_nat_t *,
 		    const bool, const int);
-void		npf_nat_expire(npf_nat_t *);
+void		npf_nat_destroy(npf_nat_t *);
 void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
--- a/sys/net/npf/npf_nat.c	Wed Dec 04 01:03:15 2013 +0000
+++ b/sys/net/npf/npf_nat.c	Wed Dec 04 01:38:49 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.21 2013/10/29 16:39:10 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.22 2013/12/04 01:38:49 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.21 2013/10/29 16:39:10 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.22 2013/12/04 01:38:49 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -246,13 +246,14 @@
 	npf_session_t *se;
 	npf_nat_t *nt;
 
-	/* De-associate all entries from the policy. */
+	/*
+	 * Disassociate all entries from the policy.  At this point,
+	 * new entries can no longer be created for this policy.
+	 */
 	mutex_enter(&np->n_lock);
 	LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
-		se = nt->nt_session; /* XXXSMP */
-		if (se == NULL) {
-			continue;
-		}
+		se = nt->nt_session;
+		KASSERT(se != NULL);
 		npf_session_expire(se);
 	}
 	while (!LIST_EMPTY(&np->n_nat_list)) {
@@ -265,6 +266,7 @@
 	while (np->n_refcnt) {
 		kpause("npfgcnat", false, 1, NULL);
 	}
+	KASSERT(LIST_EMPTY(&np->n_nat_list));
 
 	/* Destroy the port map, on last reference. */
 	if (pm && --pm->p_refcnt == 0) {
@@ -449,7 +451,7 @@
  * npf_nat_create: create a new NAT translation entry.
  */
 static npf_nat_t *
-npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
+npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_session_t *se)
 {
 	const int proto = npc->npc_proto;
 	npf_nat_t *nt;
@@ -457,14 +459,14 @@
 	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_LAYER4));
 
-	/* New NAT association. */
+	/* Construct a new NAT entry and associate it with the session. */
 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
 	if (nt == NULL){
 		return NULL;
 	}
 	npf_stats_inc(NPF_STAT_NAT_CREATE);
 	nt->nt_natpolicy = np;
-	nt->nt_session = NULL;
+	nt->nt_session = se;
 	nt->nt_alg = NULL;
 
 	/* Save the original address which may be rewritten. */
@@ -604,7 +606,7 @@
 	npf_natpolicy_t *np;
 	npf_nat_t *nt;
 	int error;
-	bool forw, new;
+	bool forw;
 
 	/* All relevant IPv4 data should be already cached. */
 	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
@@ -619,7 +621,6 @@
 	 */
 	if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
 		np = nt->nt_natpolicy;
-		new = false;
 		goto translate;
 	}
 
@@ -635,22 +636,6 @@
 	forw = true;
 
 	/*
-	 * Create a new NAT entry (not yet associated with any session).
-	 * We will consume the reference on success (release on error).
-	 */
-	nt = npf_nat_create(npc, np);
-	if (nt == NULL) {
-		atomic_dec_uint(&np->n_refcnt);
-		return ENOMEM;
-	}
-	new = true;
-
-	/* Determine whether any ALG matches. */
-	if (npf_alg_match(npc, nbuf, nt, di)) {
-		KASSERT(nt->nt_alg != NULL);
-	}
-
-	/*
 	 * If there is no local session (no "stateful" rule - unusual, but
 	 * possible configuration), establish one before translation.  Note
 	 * that it is not a "pass" session, therefore passing of "backwards"
@@ -659,37 +644,46 @@
 	if (se == NULL) {
 		nse = npf_session_establish(npc, nbuf, di);
 		if (nse == NULL) {
-			error = ENOMEM;
-			goto out;
+			atomic_dec_uint(&np->n_refcnt);
+			return ENOMEM;
 		}
 		se = nse;
 	}
-translate:
-	/* Perform the translation. */
-	error = npf_nat_translate(npc, nbuf, nt, forw, di);
-	if (error) {
+
+	/*
+	 * Create a new NAT entry and associate with the session.
+	 * We will consume the reference on success (release on error).
+	 */
+	nt = npf_nat_create(npc, np, se);
+	if (nt == NULL) {
+		atomic_dec_uint(&np->n_refcnt);
+		error = ENOMEM;
 		goto out;
 	}
 
-	if (__predict_false(new)) {
-		/*
-		 * Associate NAT translation entry with the session.
-		 * Note: packet now has a translated address in the cache.
-		 */
-		nt->nt_session = se;
-		error = npf_session_setnat(se, nt, np->n_type);
+	/* Associate the NAT translation entry with the session. */
+	error = npf_session_setnat(se, nt, np->n_type);
+	if (error) {
+		/* Will release the reference. */
+		npf_nat_destroy(nt);
+		goto out;
+	}
+
+	/* Determine whether any ALG matches. */
+	if (npf_alg_match(npc, nbuf, nt, di)) {
+		KASSERT(nt->nt_alg != NULL);
+	}
+
+translate:
+	/* Perform the translation. */
+	error = npf_nat_translate(npc, nbuf, nt, forw, di);
 out:
-		if (error) {
-			/* If session was for NAT only - expire it. */
-			if (nse) {
-				npf_session_expire(nse);
-			}
-			/* Will free the structure and return the port. */
-			npf_nat_expire(nt);
-		}
-		if (nse) {
-			npf_session_release(nse);
-		}
+	if (error && nse) {
+		/* It created for NAT - just expire. */
+		npf_session_expire(nse);
+	}
+	if (nse) {
+		npf_session_release(nse);
 	}
 	return error;
 }
@@ -712,7 +706,6 @@
 void
 npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
 {
-
 	*addr = &nt->nt_oaddr;
 	*port = nt->nt_oport;
 }
@@ -723,16 +716,15 @@
 void
 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
 {
-
 	nt->nt_alg = alg;
 	nt->nt_alg_arg = arg;
 }
 
 /*
- * npf_nat_expire: free NAT-related data structures on session expiration.
+ * npf_nat_destroy: destroy NAT structure (performed on session expiration).
  */
 void
-npf_nat_expire(npf_nat_t *nt)
+npf_nat_destroy(npf_nat_t *nt)
 {
 	npf_natpolicy_t *np = nt->nt_natpolicy;
 
@@ -741,16 +733,15 @@
 		npf_nat_putport(np, nt->nt_tport);
 	}
 
-	/* Remove NAT entry from the list, notify any waiters if last entry. */
 	mutex_enter(&np->n_lock);
 	LIST_REMOVE(nt, nt_entry);
 	if (LIST_EMPTY(&np->n_nat_list)) {
+		/* Notify any waiters if empty. */
 		cv_broadcast(&np->n_cv);
 	}
 	atomic_dec_uint(&np->n_refcnt);
 	mutex_exit(&np->n_lock);
 
-	/* Free structure, increase the counter. */
 	pool_cache_put(nat_cache, nt);
 	npf_stats_inc(NPF_STAT_NAT_DESTROY);
 }
--- a/sys/net/npf/npf_ruleset.c	Wed Dec 04 01:03:15 2013 +0000
+++ b/sys/net/npf/npf_ruleset.c	Wed Dec 04 01:38:49 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ruleset.c,v 1.29 2013/11/23 19:32:20 rmind Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.30 2013/12/04 01:38:49 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.29 2013/11/23 19:32:20 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.30 2013/12/04 01:38:49 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -469,10 +469,8 @@
 }
 
 /*
- * npf_ruleset_natreload: minimum reload of NAT policies by maching
+ * npf_ruleset_natreload: minimum reload of NAT policies by matching
  * two (active and new) NAT rulesets.
- *
- * => Active ruleset should be exclusively locked.
  */
 void
 npf_ruleset_natreload(npf_ruleset_t *nrlset, npf_ruleset_t *arlset)
@@ -480,6 +478,8 @@
 	npf_natpolicy_t *np, *anp;
 	npf_rule_t *rl, *arl;
 
+	KASSERT(npf_config_locked_p());
+
 	/* Scan a new NAT ruleset against NAT policies in old ruleset. */
 	LIST_FOREACH(rl, &nrlset->rs_all, r_aentry) {
 		np = rl->r_natp;
@@ -629,7 +629,7 @@
 }
 
 npf_rproc_t *
-npf_rule_getrproc(npf_rule_t *rl)
+npf_rule_getrproc(const npf_rule_t *rl)
 {
 	npf_rproc_t *rp = rl->r_rproc;
 
--- a/sys/net/npf/npf_session.c	Wed Dec 04 01:03:15 2013 +0000
+++ b/sys/net/npf/npf_session.c	Wed Dec 04 01:38:49 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_session.c,v 1.28 2013/11/22 01:24:21 rmind Exp $	*/
+/*	$NetBSD: npf_session.c,v 1.29 2013/12/04 01:38:49 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -92,7 +92,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.28 2013/11/22 01:24:21 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.29 2013/12/04 01:38:49 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -701,7 +701,7 @@
 {
 	if (se->s_nat) {
 		/* Release any NAT related structures. */
-		npf_nat_expire(se->s_nat);
+		npf_nat_destroy(se->s_nat);
 	}
 	if (se->s_rproc) {
 		/* Release rule procedure. */
@@ -788,6 +788,7 @@
 		/* Race: mark a removed entry and explicitly expire. */
 		atomic_or_uint(&se->s_flags, SE_REMBACK | SE_EXPIRE);
 		npf_stats_inc(NPF_STAT_RACE_NAT);
+		se->s_nat = NULL;
 	}
 	rw_exit(&sh->sh_lock);
 	return ok ? 0 : EISCONN;
@@ -845,7 +846,7 @@
 npf_session_release(npf_session_t *se)
 {
 	KASSERT(se->s_refcnt > 0);
-	if ((se->s_flags & SE_ACTIVE) == 0) {
+	if ((se->s_flags & (SE_ACTIVE | SE_EXPIRE)) == 0) {
 		/* Activate: after this point, session is globally visible. */
 		se->s_flags |= SE_ACTIVE;
 	}