NPF: fix the recent breakage of the traceroute ALG. Also, simplify and trunk
authorrmind <rmind@NetBSD.org>
Wed, 19 Feb 2014 03:51:31 +0000
branchtrunk
changeset 224655 7aa48b8a7d3a
parent 224654 f9811f5131c2
child 224656 762c5580ba83
NPF: fix the recent breakage of the traceroute ALG. Also, simplify and refactor a little bit.
sys/net/npf/npf_alg_icmp.c
sys/net/npf/npf_impl.h
sys/net/npf/npf_inet.c
sys/net/npf/npf_nat.c
--- a/sys/net/npf/npf_alg_icmp.c	Wed Feb 19 01:43:16 2014 +0000
+++ b/sys/net/npf/npf_alg_icmp.c	Wed Feb 19 03:51:31 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_alg_icmp.c,v 1.19 2014/02/16 22:10:40 rmind Exp $	*/
+/*	$NetBSD: npf_alg_icmp.c,v 1.20 2014/02/19 03:51:31 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.19 2014/02/16 22:10:40 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.20 2014/02/19 03:51:31 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -305,6 +305,7 @@
 static bool
 npfa_icmp_nat(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw)
 {
+	const u_int which = NPF_SRC;
 	npf_cache_t enpc;
 
 	if (forw || !npf_iscached(npc, NPC_ICMP))
@@ -315,6 +316,9 @@
 	KASSERT(npf_iscached(&enpc, NPC_IP46));
 	KASSERT(npf_iscached(&enpc, NPC_LAYER4));
 
+	/*
+	 * ICMP: fetch the current checksum we are going to fixup.
+	 */
 	struct icmp *ic = npc->npc_l4.icmp;
 	uint16_t cksum = ic->icmp_cksum;
 
@@ -322,12 +326,9 @@
 	    offsetof(struct icmp6_hdr, icmp6_cksum));
 
 	/*
-	 * Retrieve the original address and port, then calculate ICMP
-	 * checksum for these changes in the embedded packet.  While data
-	 * is not rewritten in the cache, save IP and TCP/UDP checksums.
-	 *
-	 * XXX: Assumes NPF_NATOUT (source address/port).  Currently,
-	 * npfa_icmp_match() matches only for the PFIL_OUT traffic.
+	 * Fetch the IP and port in the _embedded_ packet.  Also, fetch
+	 * the IPv4 and TCP/UDP checksums before they are rewritten.
+	 * Calculate the part of the ICMP checksum fixup.
 	 */
 	const int proto = enpc.npc_proto;
 	uint16_t ipcksum = 0, l4cksum = 0;
@@ -340,7 +341,7 @@
 		const struct ip *eip = enpc.npc_ip.v4;
 		ipcksum = eip->ip_sum;
 	}
-	cksum = npf_addr_cksum(cksum, enpc.npc_alen, enpc.npc_ips[NPF_SRC], addr);
+	cksum = npf_addr_cksum(cksum, enpc.npc_alen, enpc.npc_ips[which], addr);
 
 	switch (proto) {
 	case IPPROTO_TCP: {
@@ -363,17 +364,23 @@
 	}
 
 	/*
-	 * Rewrite the source IP address and port of the embedded IP header,
-	 * which represents the original packet.  This updates the checksums
-	 * in the embedded packet.
+	 * Translate the embedded packet.  The following changes will
+	 * be performed by npf_napt_rwr():
+	 *
+	 *	1) Rewrite the IP address and, if not ICMP, port.
+	 *	2) Rewrite the TCP/UDP checksum (if not ICMP).
+	 *	3) Rewrite the IPv4 checksum for (1) and (2).
+	 *
+	 * XXX: Assumes NPF_NATOUT (source address/port).  Currently,
+	 * npfa_icmp_match() matches only for the PFIL_OUT traffic.
 	 */
-	if (npf_nat_translate(&enpc, nbuf, nt, forw)) {
+	if (npf_napt_rwr(&enpc, which, addr, port)) {
 		return false;
 	}
 
 	/*
-	 * Finish calculation of the ICMP checksum: include the checksum
-	 * change in the embedded packet.
+	 * Finally, finish the ICMP checksum fixup: include the checksum
+	 * changes in the embedded packet.
 	 */
 	if (npf_iscached(&enpc, NPC_IP4)) {
 		const struct ip *eip = enpc.npc_ip.v4;
--- a/sys/net/npf/npf_impl.h	Wed Feb 19 01:43:16 2014 +0000
+++ b/sys/net/npf/npf_impl.h	Wed Feb 19 03:51:31 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.48 2014/02/16 22:10:40 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.49 2014/02/19 03:51:31 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -200,6 +200,8 @@
 bool		npf_rwrport(const npf_cache_t *, u_int, const in_port_t);
 bool		npf_rwrcksum(const npf_cache_t *, u_int,
 		    const npf_addr_t *, const in_port_t);
+int		npf_napt_rwr(const npf_cache_t *, u_int, const npf_addr_t *,
+		    const in_addr_t);
 int		npf_npt66_rwr(const npf_cache_t *, u_int, const npf_addr_t *,
 		    npf_netmask_t, uint16_t);
 
@@ -341,7 +343,6 @@
 void		npf_nat_freealg(npf_natpolicy_t *, npf_alg_t *);
 
 int		npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *, const int);
-int		npf_nat_translate(npf_cache_t *, nbuf_t *, npf_nat_t *, bool);
 void		npf_nat_destroy(npf_nat_t *);
 void		npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
 void		npf_nat_gettrans(npf_nat_t *, npf_addr_t **, in_port_t *);
--- a/sys/net/npf/npf_inet.c	Wed Feb 19 01:43:16 2014 +0000
+++ b/sys/net/npf/npf_inet.c	Wed Feb 19 03:51:31 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_inet.c,v 1.29 2014/02/13 03:34:40 rmind Exp $	*/
+/*	$NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.29 2014/02/13 03:34:40 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.30 2014/02/19 03:51:31 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -578,7 +578,7 @@
 	}
 
 	/* Nothing else to do for ICMP. */
-	if (proto == IPPROTO_ICMP) {
+	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
 		return true;
 	}
 	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
@@ -617,6 +617,50 @@
 }
 
 /*
+ * npf_napt_rwr: perform address and/or port translation.
+ */
+int
+npf_napt_rwr(const npf_cache_t *npc, u_int which,
+    const npf_addr_t *addr, const in_addr_t port)
+{
+	const unsigned proto = npc->npc_proto;
+
+	/*
+	 * Rewrite IP and/or TCP/UDP checksums first, since we need the
+	 * current (old) address/port for the calculations.  Then perform
+	 * the address translation i.e. rewrite source or destination.
+	 */
+	if (!npf_rwrcksum(npc, which, addr, port)) {
+		return EINVAL;
+	}
+	if (!npf_rwrip(npc, which, addr)) {
+		return EINVAL;
+	}
+	if (port == 0) {
+		/* Done. */
+		return 0;
+	}
+
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		/* Rewrite source/destination port. */
+		if (!npf_rwrport(npc, which, port)) {
+			return EINVAL;
+		}
+		break;
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		KASSERT(npf_iscached(npc, NPC_ICMP));
+		/* Nothing. */
+		break;
+	default:
+		return ENOTSUP;
+	}
+	return 0;
+}
+
+/*
  * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296.
  */
 
--- a/sys/net/npf/npf_nat.c	Wed Feb 19 01:43:16 2014 +0000
+++ b/sys/net/npf/npf_nat.c	Wed Feb 19 03:51:31 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_nat.c,v 1.25 2014/02/13 03:34:40 rmind Exp $	*/
+/*	$NetBSD: npf_nat.c,v 1.26 2014/02/19 03:51:31 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2014 Mindaugas Rasiukevicius <rmind at netbsd org>
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.25 2014/02/13 03:34:40 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.26 2014/02/19 03:51:31 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -549,62 +549,18 @@
 }
 
 /*
- * npf_nat_rwr: perform address and/or port translation.
- */
-static int
-npf_nat_rwr(npf_cache_t *npc, const npf_natpolicy_t *np,
-    const npf_addr_t *addr, const in_addr_t port, bool forw)
-{
-	const unsigned proto = npc->npc_proto;
-	const u_int which = npf_nat_which(np->n_type, forw);
-
-	/*
-	 * Rewrite IP and/or TCP/UDP checksums first, since we need the
-	 * current (old) address/port for the calculations.  Then perform
-	 * the address translation i.e. rewrite source or destination.
-	 */
-	if (!npf_rwrcksum(npc, which, addr, port)) {
-		return EINVAL;
-	}
-	if (!npf_rwrip(npc, which, addr)) {
-		return EINVAL;
-	}
-	if ((np->n_flags & NPF_NAT_PORTS) == 0) {
-		/* Done. */
-		return 0;
-	}
-
-	switch (proto) {
-	case IPPROTO_TCP:
-	case IPPROTO_UDP:
-		/* Rewrite source/destination port. */
-		if (!npf_rwrport(npc, which, port)) {
-			return EINVAL;
-		}
-		break;
-	case IPPROTO_ICMP:
-		KASSERT(npf_iscached(npc, NPC_ICMP));
-		/* Nothing. */
-		break;
-	default:
-		return ENOTSUP;
-	}
-	return 0;
-}
-
-/*
  * npf_nat_translate: perform translation given the state data.
  */
-int
+static inline int
 npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, bool forw)
 {
 	const npf_natpolicy_t *np = nt->nt_natpolicy;
+	const u_int which = npf_nat_which(np->n_type, forw);
 	const npf_addr_t *addr;
 	in_port_t port;
 
 	KASSERT(npf_iscached(npc, NPC_IP46));
 	KASSERT(npf_iscached(npc, NPC_LAYER4));
-	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 
 	if (forw) {
 		/* "Forwards" stream: use translation address/port. */
@@ -617,14 +573,16 @@
 	}
 	KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0);
 
-	/* Execute ALG hook first. */
+	/* Execute ALG translation first. */
 	if ((npc->npc_info & NPC_ALG_EXEC) == 0) {
 		npc->npc_info |= NPC_ALG_EXEC;
 		npf_alg_exec(npc, nbuf, nt, forw);
+		npf_recache(npc, nbuf);
 	}
+	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
 
 	/* Finally, perform the translation. */
-	return npf_nat_rwr(npc, np, addr, port, forw);
+	return npf_napt_rwr(npc, which, addr, port);
 }
 
 /*
@@ -633,17 +591,16 @@
 static inline int 
 npf_nat_algo(npf_cache_t *npc, const npf_natpolicy_t *np, bool forw)
 {
-	u_int which;
+	const u_int which = npf_nat_which(np->n_type, forw);
 	int error;
 
 	switch (np->n_algo) {
 	case NPF_ALGO_NPT66:
-		which = npf_nat_which(np->n_type, forw);
 		error = npf_npt66_rwr(npc, which, &np->n_taddr,
 		    np->n_tmask, np->n_npt66_adj);
 		break;
 	default:
-		error = npf_nat_rwr(npc, np, &np->n_taddr, np->n_tport, forw);
+		error = npf_napt_rwr(npc, which, &np->n_taddr, np->n_tport);
 		break;
 	}