Merge the socket locking patch: trunk
authorad <ad@NetBSD.org>
Thu, 24 Apr 2008 11:38:36 +0000
branchtrunk
changeset 169089 2d15cef7502c
parent 169088 a501ad86d4b6
child 169090 c681dbdc976e
Merge the socket locking patch: - Socket layer becomes MP safe. - Unix protocols become MP safe. - Allows protocol processing interrupts to safely block on locks. - Fixes a number of race conditions. With much feedback from matt@ and plunky@.
sys/compat/common/uipc_syscalls_43.c
sys/compat/linux/common/linux_socket.c
sys/compat/svr4/svr4_net.c
sys/dev/bluetooth/bthidev.c
sys/dev/bluetooth/btsco.c
sys/dev/kttcp.c
sys/kern/kern_softint.c
sys/kern/subr_pool.c
sys/kern/subr_tftproot.c
sys/kern/sys_socket.c
sys/kern/uipc_domain.c
sys/kern/uipc_proto.c
sys/kern/uipc_socket.c
sys/kern/uipc_socket2.c
sys/kern/uipc_syscalls.c
sys/kern/uipc_usrreq.c
sys/miscfs/fifofs/fifo_vnops.c
sys/miscfs/portal/portal_vnops.c
sys/net/if.c
sys/net/if_etherip.c
sys/net/if_gre.c
sys/net/if_ppp.c
sys/net/if_pppoe.c
sys/net/if_sl.c
sys/net/if_strip.c
sys/net/raw_cb.c
sys/net/raw_usrreq.c
sys/net/rtsock.c
sys/netatalk/aarp.c
sys/netatalk/at_proto.c
sys/netatalk/ddp_input.c
sys/netatalk/ddp_usrreq.c
sys/netbt/bluetooth.h
sys/netbt/bt_proto.c
sys/netbt/hci.h
sys/netbt/hci_event.c
sys/netbt/hci_link.c
sys/netbt/hci_socket.c
sys/netbt/hci_unit.c
sys/netbt/l2cap_misc.c
sys/netbt/l2cap_socket.c
sys/netbt/rfcomm_dlc.c
sys/netbt/rfcomm_session.c
sys/netbt/rfcomm_socket.c
sys/netbt/sco_socket.c
sys/netinet/if_arp.c
sys/netinet/igmp.c
sys/netinet/in_pcb.c
sys/netinet/in_proto.c
sys/netinet/ip_encap.c
sys/netinet/ip_encap.h
sys/netinet/ip_flow.c
sys/netinet/ip_input.c
sys/netinet/raw_ip.c
sys/netinet/tcp_input.c
sys/netinet/tcp_subr.c
sys/netinet/tcp_timer.c
sys/netinet/tcp_usrreq.c
sys/netinet/tcp_var.h
sys/netinet/udp_usrreq.c
sys/netinet6/ah.h
sys/netinet6/ah_input.c
sys/netinet6/esp.h
sys/netinet6/esp_input.c
sys/netinet6/frag6.c
sys/netinet6/in6_gif.c
sys/netinet6/in6_gif.h
sys/netinet6/in6_ifattach.c
sys/netinet6/in6_pcb.c
sys/netinet6/in6_proto.c
sys/netinet6/ip6_flow.c
sys/netinet6/ip6_input.c
sys/netinet6/ip6_mroute.c
sys/netinet6/ip6_var.h
sys/netinet6/ip6protosw.h
sys/netinet6/mld6.c
sys/netinet6/nd6.c
sys/netinet6/nd6_nbr.c
sys/netinet6/raw_ip6.c
sys/netinet6/udp6_usrreq.c
sys/netinet6/udp6_var.h
sys/netipsec/key.c
sys/netipsec/keysock.c
sys/netipsec/xform_ipip.c
sys/netiso/clnp_timer.c
sys/netiso/cltp_usrreq.c
sys/netiso/iso_pcb.c
sys/netiso/iso_proto.c
sys/netiso/tp_timer.c
sys/netiso/tp_usrreq.c
sys/netkey/key.c
sys/netkey/keysock.c
sys/netnatm/natm.c
sys/netnatm/natm_proto.c
sys/netsmb/smb_trantcp.c
sys/nfs/krpc_subr.c
sys/nfs/nfs_boot.c
sys/nfs/nfs_bootdhcp.c
sys/nfs/nfs_socket.c
sys/nfs/nfs_syscalls.c
sys/sys/protosw.h
sys/sys/socketvar.h
sys/sys/un.h
sys/sys/unpcb.h
--- a/sys/compat/common/uipc_syscalls_43.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/compat/common/uipc_syscalls_43.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_syscalls_43.c,v 1.41 2008/01/15 09:25:26 martin Exp $	*/
+/*	$NetBSD: uipc_syscalls_43.c,v 1.42 2008/04/24 11:38:36 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls_43.c,v 1.41 2008/01/15 09:25:26 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls_43.c,v 1.42 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -492,6 +492,8 @@
 	struct ifnet *ifp = ifunit(ifr->ifr_name);
 	struct sockaddr *sa;
 
+	KASSERT(solocked(so));
+
 	if (ifp == NULL)
 		return ENXIO;
 
--- a/sys/compat/linux/common/linux_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/compat/linux/common/linux_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_socket.c,v 1.90 2008/04/23 13:13:25 ad Exp $	*/
+/*	$NetBSD: linux_socket.c,v 1.91 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.90 2008/04/23 13:13:25 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.91 2008/04/24 11:38:36 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_inet.h"
@@ -1212,17 +1212,17 @@
 
 	if (error == EISCONN) {
 		struct socket *so;
-		int s, state, prflags, nbio;
+		int state, prflags, nbio;
 
 		/* getsock() will use the descriptor for us */
 	    	if (fd_getsock(SCARG(uap, s), &so) != 0)
 		    	return EISCONN;
 
-		s = splsoftnet();
+		solock(so);
 		state = so->so_state;
 		nbio = so->so_nbio;
 		prflags = so->so_proto->pr_flags;
-		splx(s);
+		sounlock(so);
 		fd_putfile(SCARG(uap, s));
 		/*
 		 * We should only let this call succeed once per
--- a/sys/compat/svr4/svr4_net.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/compat/svr4/svr4_net.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: svr4_net.c,v 1.51 2008/03/21 21:54:59 ad Exp $	*/
+/*	$NetBSD: svr4_net.c,v 1.52 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1994, 2008 The NetBSD Foundation, Inc.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: svr4_net.c,v 1.51 2008/03/21 21:54:59 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: svr4_net.c,v 1.52 2008/04/24 11:38:36 ad Exp $");
 
 #define COMPAT_SVR4 1
 
@@ -190,7 +190,7 @@
 	if ((error = fd_allocfile(&fp, &fd)) != 0)
 		return error;
 
-	if ((error = socreate(family, &so, type, protocol, l)) != 0) {
+	if ((error = socreate(family, &so, type, protocol, l, NULL)) != 0) {
 		DPRINTF(("socreate error %d\n", error));
 		fd_abort(curproc, fp, fd);
 		return error;
--- a/sys/dev/bluetooth/bthidev.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/dev/bluetooth/bthidev.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: bthidev.c,v 1.14 2008/03/28 21:17:37 plunky Exp $	*/
+/*	$NetBSD: bthidev.c,v 1.15 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bthidev.c,v 1.14 2008/03/28 21:17:37 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: bthidev.c,v 1.15 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -97,7 +97,6 @@
 #define BTHID_WAIT_CTL		1
 #define BTHID_WAIT_INT		2
 #define BTHID_OPEN		3
-#define BTHID_DETACHING		4
 
 #define	BTHID_RETRY_INTERVAL	5	/* seconds between connection attempts */
 
@@ -180,7 +179,7 @@
 	struct hid_item h;
 	const void *desc;
 	int locs[BTHIDBUSCF_NLOCS];
-	int maxid, rep, s, dlen;
+	int maxid, rep, dlen;
 
 	/*
 	 * Init softc
@@ -302,13 +301,13 @@
 	/*
 	 * start bluetooth connections
 	 */
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	if ((sc->sc_flags & BTHID_RECONNECT) == 0)
 		bthidev_listen(sc);
 
 	if (sc->sc_flags & BTHID_CONNECTING)
 		bthidev_connect(sc);
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 static int
@@ -316,9 +315,8 @@
 {
 	struct bthidev_softc *sc = device_private(self);
 	struct bthidev *hidev;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	sc->sc_flags = 0;	/* disable reconnecting */
 
 	/* release interrupt listen */
@@ -347,15 +345,10 @@
 		sc->sc_ctl = NULL;
 	}
 
-	/* remove callout */
-	sc->sc_state = BTHID_DETACHING;
-	callout_stop(&sc->sc_reconnect);
-	if (callout_invoking(&sc->sc_reconnect))
-		tsleep(sc, PWAIT, "bthidetach", 0);
-
+	callout_halt(&sc->sc_reconnect, bt_lock);
 	callout_destroy(&sc->sc_reconnect);
 
-	splx(s);
+	mutex_exit(bt_lock);
 
 	/* detach children */
 	while ((hidev = LIST_FIRST(&sc->sc_list)) != NULL) {
@@ -396,9 +389,8 @@
 bthidev_timeout(void *arg)
 {
 	struct bthidev_softc *sc = arg;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	callout_ack(&sc->sc_reconnect);
 
 	switch (sc->sc_state) {
@@ -430,14 +422,10 @@
 	case BTHID_OPEN:
 		break;
 
-	case BTHID_DETACHING:
-		wakeup(sc);
-		break;
-
 	default:
 		break;
 	}
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 /*
@@ -865,7 +853,7 @@
 {
 	struct bthidev_softc *sc = device_private(hidev->sc_parent);
 	struct mbuf *m;
-	int s, err;
+	int err;
 
 	if (sc == NULL || sc->sc_state != BTHID_OPEN)
 		return ENOTCONN;
@@ -896,9 +884,9 @@
 	memcpy(mtod(m, uint8_t *) + 2, report, rlen);
 	m->m_pkthdr.len = m->m_len = rlen + 2;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	err = l2cap_send(sc->sc_int, m);
-	splx(s);
+	mutex_exit(bt_lock);
 
 	return err;
 }
--- a/sys/dev/bluetooth/btsco.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/dev/bluetooth/btsco.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: btsco.c,v 1.19 2008/03/28 21:17:37 plunky Exp $	*/
+/*	$NetBSD: btsco.c,v 1.20 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: btsco.c,v 1.19 2008/03/28 21:17:37 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: btsco.c,v 1.20 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/audioio.h>
@@ -91,6 +91,7 @@
 
 	device_t		 sc_audio;	/* MI audio device */
 	void			*sc_intr;	/* interrupt cookie */
+	kcondvar_t		 sc_connect;	/* connect wait */
 
 	/* Bluetooth */
 	bdaddr_t		 sc_laddr;	/* local address */
@@ -286,6 +287,7 @@
 	sc->sc_vgm = 200;
 	sc->sc_state = BTSCO_CLOSED;
 	sc->sc_name = device_xname(self);
+	cv_init(&sc->sc_connect, "connect");
 
 	/*
 	 * copy in our configuration info
@@ -339,11 +341,10 @@
 btsco_detach(device_t self, int flags)
 {
 	struct btsco_softc *sc = device_private(self);
-	int s;
 
 	DPRINTF("sc=%p\n", sc);
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	if (sc->sc_sco != NULL) {
 		DPRINTF("sc_sco=%p\n", sc->sc_sco);
 		sco_disconnect(sc->sc_sco, 0);
@@ -356,7 +357,7 @@
 		sco_detach(&sc->sc_sco_l);
 		sc->sc_sco_l = NULL;
 	}
-	splx(s);
+	mutex_exit(bt_lock);
 
 	if (sc->sc_audio != NULL) {
 		DPRINTF("sc_audio=%p\n", sc->sc_audio);
@@ -381,6 +382,8 @@
 			return EAGAIN;
 	}
 
+	cv_destroy(&sc->sc_connect);
+
 	return 0;
 }
 
@@ -417,7 +420,7 @@
 		sco_detach(&sc->sc_sco_l);
 
 	sc->sc_state = BTSCO_OPEN;
-	wakeup(sc);
+	cv_broadcast(&sc->sc_connect);
 }
 
 static void
@@ -438,7 +441,7 @@
 		break;
 
 	case BTSCO_WAIT_CONNECT:	/* connect failed */
-		wakeup(sc);
+		cv_broadcast(&sc->sc_connect);
 		break;
 
 	case BTSCO_OPEN:		/* link lost */
@@ -557,7 +560,7 @@
 {
 	struct sockaddr_bt sa;
 	struct btsco_softc *sc = hdl;
-	int err, s, timo;
+	int err, timo;
 
 	DPRINTF("%s flags 0x%x\n", sc->sc_name, flags);
 	/* flags FREAD & FWRITE? */
@@ -565,7 +568,7 @@
 	if (sc->sc_sco != NULL || sc->sc_sco_l != NULL)
 		return EIO;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 
 	memset(&sa, 0, sizeof(sa));
 	sa.bt_len = sizeof(sa);
@@ -613,7 +616,7 @@
 
 	sc->sc_state = BTSCO_WAIT_CONNECT;
 	while (err == 0 && sc->sc_state == BTSCO_WAIT_CONNECT)
-		err = tsleep(sc, PWAIT | PCATCH, "btsco", timo);
+		err = cv_timedwait_sig(&sc->sc_connect, bt_lock, timo);
 
 	switch (sc->sc_state) {
 	case BTSCO_CLOSED:		/* disconnected */
@@ -639,7 +642,7 @@
 	}
 
 done:
-	splx(s);
+	mutex_exit(bt_lock);
 
 	DPRINTF("done err=%d, sc_state=%d, sc_mtu=%d\n",
 			err, sc->sc_state, sc->sc_mtu);
@@ -650,11 +653,10 @@
 btsco_close(void *hdl)
 {
 	struct btsco_softc *sc = hdl;
-	int s;
 
 	DPRINTF("%s\n", sc->sc_name);
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	if (sc->sc_sco != NULL) {
 		sco_disconnect(sc->sc_sco, 0);
 		sco_detach(&sc->sc_sco);
@@ -663,7 +665,7 @@
 	if (sc->sc_sco_l != NULL) {
 		sco_detach(&sc->sc_sco_l);
 	}
-	splx(s);
+	mutex_exit(bt_lock);
 
 	if (sc->sc_rx_mbuf != NULL) {
 		m_freem(sc->sc_rx_mbuf);
--- a/sys/dev/kttcp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/dev/kttcp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: kttcp.c,v 1.27 2008/03/27 19:06:51 ad Exp $	*/
+/*	$NetBSD: kttcp.c,v 1.28 2008/04/24 11:38:36 ad Exp $	*/
 
 /*
  * Copyright (c) 2002 Wasabi Systems, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.27 2008/03/27 19:06:51 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.28 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -190,7 +190,7 @@
 {
 	struct mbuf **mp, *m, *top;
 	long space, len, mlen;
-	int error, s, dontroute, atomic;
+	int error, dontroute, atomic;
 	long long resid;
 
 	atomic = sosendallatonce(so);
@@ -211,19 +211,17 @@
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	l->l_ru.ru_msgsnd++;
-#define	snderr(errno)	{ error = errno; splx(s); goto release; }
-
+#define	snderr(errno)	{ error = errno; goto release; }
+	solock(so);
  restart:
 	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
 		goto out;
 	do {
-		s = splsoftnet();
 		if (so->so_state & SS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
@@ -247,14 +245,13 @@
 			    "kttcp_soreceive sbwait 1");
 			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
-			splx(s);
 			if (error)
 				goto out;
 			goto restart;
 		}
-		splx(s);
 		mp = &top;
 		do {
+			sounlock(so);
 			do {
 				if (top == 0) {
 					m = m_gethdr(M_WAIT, MT_DATA);
@@ -305,12 +302,10 @@
 					break;
 				}
 			} while (space > 0 && atomic);
-
-			s = splsoftnet();
+			solock(so);
 
 			if (so->so_state & SS_CANTSENDMORE)
 				snderr(EPIPE);
-
 			if (dontroute)
 				so->so_options |= SO_DONTROUTE;
 			if (resid > 0)
@@ -322,8 +317,6 @@
 				so->so_options &= ~SO_DONTROUTE;
 			if (resid > 0)
 				so->so_state &= ~SS_MORETOCOME;
-			splx(s);
-
 			top = 0;
 			mp = &top;
 			if (error)
@@ -334,6 +327,7 @@
  release:
 	sbunlock(&so->so_snd);
  out:
+ 	sounlock(so);
 	if (top)
 		m_freem(top);
 	*done = slen - resid;
@@ -348,7 +342,7 @@
     unsigned long long *done, struct lwp *l, int *flagsp)
 {
 	struct mbuf *m, **mp;
-	int flags, len, error, s, offset, moff, type;
+	int flags, len, error, offset, moff, type;
 	long long orig_resid, resid;
 	const struct protosw *pr;
 	struct mbuf *nextrecord;
@@ -363,8 +357,10 @@
  		flags = 0;
 	if (flags & MSG_OOB) {
 		m = m_get(M_WAIT, MT_DATA);
+		solock(so);
 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
 		    (struct mbuf *)(long)(flags & MSG_PEEK), NULL, NULL);
+		sounlock(so);
 		if (error)
 			goto bad;
 		do {
@@ -378,14 +374,12 @@
 	}
 	if (mp)
 		*mp = NULL;
+	solock(so);
 	if (so->so_state & SS_ISCONFIRMING && resid)
 		(*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, NULL);
-
  restart:
 	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
 		return (error);
-	s = splsoftnet();
-
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
@@ -439,9 +433,10 @@
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		splx(s);
-		if (error)
+		if (error) {
+			sounlock(so);
 			return (error);
+		}
 		goto restart;
 	}
  dontblock:
@@ -581,8 +576,11 @@
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
-				if (mp)
+				if (mp) {
+					sounlock(so);
 					*mp = m_copym(m, 0, len, M_WAIT);
+					solock(so);
+				}
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
@@ -635,7 +633,7 @@
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
-				splx(s);
+				sounlock(so);
 				return (0);
 			}
 			if ((m = so->so_rcv.sb_mb) != NULL)
@@ -671,7 +669,6 @@
 	if (orig_resid == resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
-		splx(s);
 		goto restart;
 	}
 
@@ -679,7 +676,7 @@
 		*flagsp |= flags;
  release:
 	sbunlock(&so->so_rcv);
-	splx(s);
+	sounlock(so);
 	*done = slen - resid;
 #if 0
 	printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
--- a/sys/kern/kern_softint.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/kern_softint.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_softint.c,v 1.15 2008/04/12 18:22:03 ad Exp $	*/
+/*	$NetBSD: kern_softint.c,v 1.16 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
@@ -183,7 +183,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.15 2008/04/12 18:22:03 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.16 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
@@ -324,8 +324,8 @@
 		 * XXX Needs to go away.
 		 */
 #define DONETISR(n, f)							\
-    softint_netisrs[(n)] = 						\
-        softint_establish(SOFTINT_NET, (void (*)(void *))(f), NULL)
+    softint_netisrs[(n)] = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE,\
+        (void (*)(void *))(f), NULL)
 #include <net/netisr_dispatch.h>
 	}
 }
--- a/sys/kern/subr_pool.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/subr_pool.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_pool.c,v 1.156 2008/03/27 18:30:15 ad Exp $	*/
+/*	$NetBSD: subr_pool.c,v 1.157 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1999, 2000, 2002, 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.156 2008/03/27 18:30:15 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.157 2008/04/24 11:38:36 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pool.h"
@@ -814,14 +814,7 @@
 	pp->pr_entered_file = NULL;
 	pp->pr_entered_line = 0;
 
-	/*
-	 * XXXAD hack to prevent IP input processing from blocking.
-	 */
-	if (ipl == IPL_SOFTNET) {
-		mutex_init(&pp->pr_lock, MUTEX_DEFAULT, IPL_VM);
-	} else {
-		mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl);
-	}
+	mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl);
 	cv_init(&pp->pr_cv, wchan);
 	pp->pr_ipl = ipl;
 
@@ -1629,9 +1622,8 @@
 	}
 
 	/*
-	 * XXXSMP Because mutexes at IPL_SOFTXXX are still spinlocks,
-	 * and we are called from the pagedaemon without kernel_lock.
-	 * Does not apply to IPL_SOFTBIO.
+	 * XXXSMP Because we do not want to cause non-MPSAFE code
+	 * to block.
 	 */
 	if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK ||
 	    pp->pr_ipl == IPL_SOFTSERIAL) {
@@ -2074,15 +2066,7 @@
 	if (palloc == NULL && ipl == IPL_NONE)
 		palloc = &pool_allocator_nointr;
 	pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl);
-
-	/*
-	 * XXXAD hack to prevent IP input processing from blocking.
-	 */
-	if (ipl == IPL_SOFTNET) {
-		mutex_init(&pc->pc_lock, MUTEX_DEFAULT, IPL_VM);
-	} else {
-		mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl);
-	}
+	mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl);
 
 	if (ctor == NULL) {
 		ctor = (int (*)(void *, void *, int))nullop;
--- a/sys/kern/subr_tftproot.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/subr_tftproot.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_tftproot.c,v 1.4 2008/04/04 20:13:18 cegger Exp $ */
+/*	$NetBSD: subr_tftproot.c,v 1.5 2008/04/24 11:38:36 ad Exp $ */
 
 /*-
  * Copyright (c) 2007 Emmanuel Dreyfus, all rights reserved.
@@ -39,7 +39,7 @@
 #include "opt_md.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_tftproot.c,v 1.4 2008/04/04 20:13:18 cegger Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_tftproot.c,v 1.5 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -215,7 +215,7 @@
 	char *cp;
 	/* struct device *dv; */
 	
-	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, l)) != 0) {
+	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, l, NULL)) != 0) {
 		DPRINTF(("%s():%d socreate returned %d\n", 
 		    __func__, __LINE__, error));
 		goto out;
--- a/sys/kern/sys_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/sys_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,37 @@
-/*	$NetBSD: sys_socket.c,v 1.55 2008/03/21 21:55:00 ad Exp $	*/
+/*	$NetBSD: sys_socket.c,v 1.56 2008/04/24 11:38:36 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1982, 1986, 1990, 1993
@@ -32,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_socket.c,v 1.55 2008/03/21 21:55:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_socket.c,v 1.56 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -64,10 +97,8 @@
 	struct socket *so = fp->f_data;
 	int error;
 
-	KERNEL_LOCK(1, NULL);
 	error = (*so->so_receive)(so, (struct mbuf **)0,
 	    uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
-	KERNEL_UNLOCK_ONE(NULL);
 
 	return error;
 }
@@ -80,10 +111,8 @@
 	struct socket *so = fp->f_data;
 	int error;
 
-	KERNEL_LOCK(1, NULL);
 	error = (*so->so_send)(so, (struct mbuf *)0,
 		uio, (struct mbuf *)0, (struct mbuf *)0, 0, curlwp);
-	KERNEL_UNLOCK_ONE(NULL);
 
 	return error;
 }
@@ -99,11 +128,10 @@
 		return 0;
 	}
 
-	KERNEL_LOCK(1, NULL);
-
 	switch (cmd) {
 
 	case FIOASYNC:
+		solock(so);
 		if (*(int *)data) {
 			so->so_state |= SS_ASYNC;
 			so->so_rcv.sb_flags |= SB_ASYNC;
@@ -113,6 +141,7 @@
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
 			so->so_snd.sb_flags &= ~SB_ASYNC;
 		}
+		sounlock(so);
 		break;
 
 	case FIONREAD:
@@ -130,11 +159,13 @@
 		 * to understand the following test. We detect overflow
 		 * and return zero.
 		 */
+		solock(so);
 		if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc)
 		    || (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
 			*(int *)data = 0;
 		else
 			*(int *)data = sbspace(&so->so_snd);
+		sounlock(so);
 		break;
 
 	case SIOCSPGRP:
@@ -159,18 +190,22 @@
 		 * interface and routing ioctls should have a
 		 * different entry since a socket's unnecessary
 		 */
+		KERNEL_LOCK(1, NULL);
 		if (IOCGROUP(cmd) == 'i')
 			error = ifioctl(so, cmd, data, curlwp);
 		else if (IOCGROUP(cmd) == 'r')
 			error = rtioctl(cmd, data, curlwp);
-		else
+		else {
+			solock(so);
 			error = (*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
 			    (struct mbuf *)cmd, (struct mbuf *)data, NULL,
 			     curlwp);
+			sounlock(so);
+		}
+		KERNEL_UNLOCK_ONE(NULL);
 		break;
 	}
 
-	KERNEL_UNLOCK_ONE(NULL);
 
 	return error;
 }
@@ -201,11 +236,11 @@
 	memset((void *)ub, 0, sizeof(*ub));
 	ub->st_mode = S_IFSOCK;
 
-	KERNEL_LOCK(1, NULL);
+	solock(so);
 	error = (*so->so_proto->pr_usrreq)(so, PRU_SENSE,
 	    (struct mbuf *)ub, (struct mbuf *)0, (struct mbuf *)0,
 	    curlwp);
-	KERNEL_UNLOCK_ONE(NULL);
+	sounlock(so);
 
 	return error;
 }
@@ -216,11 +251,9 @@
 {
 	int error = 0;
 
-	KERNEL_LOCK(1, NULL);
 	if (fp->f_data)
 		error = soclose(fp->f_data);
 	fp->f_data = 0;
-	KERNEL_UNLOCK_ONE(NULL);
 
 	return error;
 }
--- a/sys/kern/uipc_domain.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_domain.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_domain.c,v 1.75 2008/03/21 21:55:00 ad Exp $	*/
+/*	$NetBSD: uipc_domain.c,v 1.76 2008/04/24 11:38:36 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_domain.c,v 1.75 2008/03/21 21:55:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_domain.c,v 1.76 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/socket.h>
@@ -93,8 +93,8 @@
 	if (rt_domain)
 		domain_attach(rt_domain);
 
-	callout_init(&pffasttimo_ch, 0);
-	callout_init(&pfslowtimo_ch, 0);
+	callout_init(&pffasttimo_ch, CALLOUT_MPSAFE);
+	callout_init(&pfslowtimo_ch, CALLOUT_MPSAFE);
 
 	callout_reset(&pffasttimo_ch, 1, pffasttimo, NULL);
 	callout_reset(&pfslowtimo_ch, 1, pfslowtimo, NULL);
@@ -563,7 +563,7 @@
 			if (pr->pr_slowtimo)
 				(*pr->pr_slowtimo)();
 	}
-	callout_reset(&pfslowtimo_ch, hz / 2, pfslowtimo, NULL);
+	callout_schedule(&pfslowtimo_ch, hz / 2);
 }
 
 void
@@ -579,5 +579,5 @@
 			if (pr->pr_fasttimo)
 				(*pr->pr_fasttimo)();
 	}
-	callout_reset(&pffasttimo_ch, hz / 5, pffasttimo, NULL);
+	callout_schedule(&pffasttimo_ch, hz / 5);
 }
--- a/sys/kern/uipc_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_proto.c,v 1.20 2007/02/18 23:16:59 matt Exp $	*/
+/*	$NetBSD: uipc_proto.c,v 1.21 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_proto.c,v 1.20 2007/02/18 23:16:59 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_proto.c,v 1.21 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/socket.h>
@@ -74,6 +74,7 @@
 
 struct domain unixdomain = {
 	.dom_family = AF_LOCAL,
+	.dom_init = uipc_init,
 	.dom_name = "unix",
 	.dom_externalize = unp_externalize,
 	.dom_dispose = unp_dispose,
--- a/sys/kern/uipc_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_socket.c,v 1.159 2008/04/14 15:42:20 ad Exp $	*/
+/*	$NetBSD: uipc_socket.c,v 1.160 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2007, 2008 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.159 2008/04/14 15:42:20 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.160 2008/04/24 11:38:36 ad Exp $");
 
 #include "opt_sock_counters.h"
 #include "opt_sosend_loan.h"
@@ -91,7 +91,6 @@
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
-#include <sys/pool.h>
 #include <sys/event.h>
 #include <sys/poll.h>
 #include <sys/kauth.h>
@@ -100,9 +99,6 @@
 
 #include <uvm/uvm.h>
 
-POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL,
-    IPL_SOFTNET);
-
 MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options");
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 
@@ -110,6 +106,7 @@
 
 extern int	somaxconn;			/* patchable (XXX sysctl) */
 int		somaxconn = SOMAXCONN;
+kmutex_t	*softnet_lock;
 
 #ifdef SOSEND_COUNTERS
 #include <sys/device.h>
@@ -277,6 +274,9 @@
 {
 	size_t rv;
 
+	if (__predict_true(so_pendfree == NULL))
+		return 0;
+
 	mutex_enter(&so_pendfree_lock);
 	rv = sodopendfreel();
 	mutex_exit(&so_pendfree_lock);
@@ -365,7 +365,6 @@
 	len = eva - sva;
 	npgs = len >> PAGE_SHIFT;
 
-	/* XXX KDASSERT */
 	KASSERT(npgs <= M_EXT_MAXPAGES);
 
 	lva = sokvaalloc(len, so);
@@ -441,6 +440,7 @@
 {
 
 	mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM);
+	softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 	cv_init(&socurkva_cv, "sokva");
 
 	/* Set the initial adjusted socket buffer size. */
@@ -460,12 +460,14 @@
  */
 /*ARGSUSED*/
 int
-socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l)
+socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l,
+	 struct socket *lockso)
 {
 	const struct protosw	*prp;
 	struct socket	*so;
 	uid_t		uid;
-	int		error, s;
+	int		error;
+	kmutex_t	*lock;
 
 	error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
 	    KAUTH_REQ_NETWORK_SOCKET_OPEN, KAUTH_ARG(dom), KAUTH_ARG(type),
@@ -490,11 +492,8 @@
 		return EPROTONOSUPPORT;
 	if (prp->pr_type != type)
 		return EPROTOTYPE;
-	s = splsoftnet();
-	so = pool_get(&socket_pool, PR_WAITOK);
-	memset(so, 0, sizeof(*so));
-	TAILQ_INIT(&so->so_q0);
-	TAILQ_INIT(&so->so_q);
+
+	so = soget(true);
 	so->so_type = type;
 	so->so_proto = prp;
 	so->so_send = sosend;
@@ -504,19 +503,26 @@
 	so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner;
 	so->so_mowner = &prp->pr_domain->dom_mowner;
 #endif
-	selinit(&so->so_rcv.sb_sel);
-	selinit(&so->so_snd.sb_sel);
 	uid = kauth_cred_geteuid(l->l_cred);
 	so->so_uidinfo = uid_find(uid);
+	if (lockso != NULL) {
+		/* Caller wants us to share a lock. */
+		lock = lockso->so_lock;
+		so->so_lock = lock;
+		mutex_obj_hold(lock);
+		mutex_enter(lock);
+	} else {
+		/* Lock assigned and taken during PRU_ATTACH. */
+	}
 	error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
 	    (struct mbuf *)(long)proto, NULL, l);
+	KASSERT(solocked(so));
 	if (error != 0) {
 		so->so_state |= SS_NOFDREF;
 		sofree(so);
-		splx(s);
 		return error;
 	}
-	splx(s);
+	sounlock(so);
 	*aso = so;
 	return 0;
 }
@@ -537,7 +543,7 @@
 	fp->f_flag = FREAD|FWRITE;
 	fp->f_type = DTYPE_SOCKET;
 	fp->f_ops = &socketops;
-	error = socreate(domain, &so, type, protocol, l);
+	error = socreate(domain, &so, type, protocol, l, NULL);
 	if (error != 0) {
 		fd_abort(curproc, fp, fd);
 	} else {
@@ -553,27 +559,27 @@
 int
 sobind(struct socket *so, struct mbuf *nam, struct lwp *l)
 {
-	int	s, error;
+	int	error;
 
-	s = splsoftnet();
+	solock(so);
 	error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, l);
-	splx(s);
+	sounlock(so);
 	return error;
 }
 
 int
 solisten(struct socket *so, int backlog, struct lwp *l)
 {
-	int	s, error;
+	int	error;
 
-	s = splsoftnet();
+	solock(so);
 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | 
 	    SS_ISDISCONNECTING)) != 0)
 		return (EOPNOTSUPP);
 	error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL,
 	    NULL, NULL, l);
 	if (error != 0) {
-		splx(s);
+		sounlock(so);
 		return error;
 	}
 	if (TAILQ_EMPTY(&so->so_q))
@@ -581,7 +587,7 @@
 	if (backlog < 0)
 		backlog = 0;
 	so->so_qlimit = min(backlog, somaxconn);
-	splx(s);
+	sounlock(so);
 	return 0;
 }
 
@@ -589,16 +595,22 @@
 sofree(struct socket *so)
 {
 
-	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+	KASSERT(solocked(so));
+
+	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
+		sounlock(so);
 		return;
+	}
 	if (so->so_head) {
 		/*
 		 * We must not decommission a socket that's on the accept(2)
 		 * queue.  If we do, then accept(2) may hang after select(2)
 		 * indicated that the listening socket was ready.
 		 */
-		if (!soqremque(so, 0))
+		if (!soqremque(so, 0)) {
+			sounlock(so);
 			return;
+		}
 	}
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_uidinfo, &so->so_rcv.sb_hiwat, 0,
@@ -607,10 +619,12 @@
 		(void)chgsbsize(so->so_uidinfo, &so->so_snd.sb_hiwat, 0,
 		    RLIM_INFINITY);
 	sbrelease(&so->so_snd, so);
+	KASSERT(!cv_has_waiters(&so->so_cv));
+	KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
+	KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
 	sorflush(so);
-	seldestroy(&so->so_rcv.sb_sel);
-	seldestroy(&so->so_snd.sb_sel);
-	pool_put(&socket_pool, so);
+	sounlock(so);
+	soput(so);
 }
 
 /*
@@ -622,19 +636,30 @@
 soclose(struct socket *so)
 {
 	struct socket	*so2;
-	int		s, error;
+	int		error;
+	int		error2;
 
 	error = 0;
-	s = splsoftnet();		/* conservative */
+	solock(so);
 	if (so->so_options & SO_ACCEPTCONN) {
-		while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) {
-			(void) soqremque(so2, 0);
-			(void) soabort(so2);
-		}
-		while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) {
-			(void) soqremque(so2, 1);
-			(void) soabort(so2);
-		}
+		do {
+			if ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) {
+				KASSERT(solocked2(so, so2));
+				(void) soqremque(so2, 0);
+				/* soabort drops the lock. */
+				(void) soabort(so2);
+				solock(so);
+				continue;
+			}
+			if ((so2 = TAILQ_FIRST(&so->so_q)) != 0) {
+				KASSERT(solocked2(so, so2));
+				(void) soqremque(so2, 1);
+				/* soabort drops the lock. */
+				(void) soabort(so2);
+				solock(so);
+				continue;
+			}
+		} while (0);
 	}
 	if (so->so_pcb == 0)
 		goto discard;
@@ -648,9 +673,7 @@
 			if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio)
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
-				error = tsleep((void *)&so->so_timeo,
-					       PSOCK | PCATCH, netcls,
-					       so->so_linger * hz);
+				error = sowait(so, so->so_linger * hz);
 				if (error)
 					break;
 			}
@@ -658,7 +681,7 @@
 	}
  drop:
 	if (so->so_pcb) {
-		int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+		error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
 		    NULL, NULL, NULL, NULL);
 		if (error == 0)
 			error = error2;
@@ -668,23 +691,26 @@
 		panic("soclose: NOFDREF");
 	so->so_state |= SS_NOFDREF;
 	sofree(so);
-	splx(s);
 	return (error);
 }
 
 /*
- * Must be called at splsoftnet...
+ * Must be called with the socket locked..  Will return with it unlocked.
  */
 int
 soabort(struct socket *so)
 {
 	int error;
+	
+	KASSERT(solocked(so));
+	KASSERT(so->so_head == NULL);
 
-	KASSERT(so->so_head == NULL);
 	error = (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL,
 	    NULL, NULL, NULL);
 	if (error) {
 		sofree(so);
+	} else {
+		sounlock(so);
 	}
 	return error;
 }
@@ -692,10 +718,11 @@
 int
 soaccept(struct socket *so, struct mbuf *nam)
 {
-	int	s, error;
+	int	error;
+
+	KASSERT(solocked(so));
 
 	error = 0;
-	s = splsoftnet();
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
@@ -706,18 +733,18 @@
 	else
 		error = ECONNABORTED;
 
-	splx(s);
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct mbuf *nam, struct lwp *l)
 {
-	int		s, error;
+	int		error;
+
+	KASSERT(solocked(so));
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
-	s = splsoftnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
@@ -731,40 +758,36 @@
 	else
 		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
 		    NULL, nam, NULL, l);
-	splx(s);
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
-	int	s, error;
+	int	error;
 
-	s = splsoftnet();
+	KASSERT(solocked2(so1, so2));
+
 	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
 	    NULL, (struct mbuf *)so2, NULL, NULL);
-	splx(s);
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
-	int	s, error;
+	int	error;
 
-	s = splsoftnet();
+	KASSERT(solocked(so));
+
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
-		goto bad;
-	}
-	if (so->so_state & SS_ISDISCONNECTING) {
+	} else if (so->so_state & SS_ISDISCONNECTING) {
 		error = EALREADY;
-		goto bad;
+	} else {
+		error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+		    NULL, NULL, NULL, NULL);
 	}
-	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
-	    NULL, NULL, NULL, NULL);
- bad:
-	splx(s);
 	sodopendfree();
 	return (error);
 }
@@ -798,8 +821,15 @@
 
 	p = l->l_proc;
 	sodopendfree();
+	clen = 0;
 
-	clen = 0;
+	/*
+	 * solock() provides atomicity of access.  splsoftnet() prevents
+	 * protocol processing soft interrupts from interrupting us and
+	 * blocking (expensive).
+	 */
+	s = splsoftnet();
+	solock(so);
 	atomic = sosendallatonce(so) || top;
 	if (uio)
 		resid = uio->uio_resid;
@@ -823,47 +853,51 @@
 		l->l_ru.ru_msgsnd++;
 	if (control)
 		clen = control->m_len;
-#define	snderr(errno)	{ error = errno; splx(s); goto release; }
-
  restart:
 	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
 		goto out;
 	do {
-		s = splsoftnet();
-		if (so->so_state & SS_CANTSENDMORE)
-			snderr(EPIPE);
+		if (so->so_state & SS_CANTSENDMORE) {
+			error = EPIPE;
+			goto release;
+		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
-				    !(resid == 0 && clen != 0))
-					snderr(ENOTCONN);
-			} else if (addr == 0)
-				snderr(EDESTADDRREQ);
+				    !(resid == 0 && clen != 0)) {
+					error = ENOTCONN;
+					goto release;
+				}
+			} else if (addr == 0) {
+				error = EDESTADDRREQ;
+				goto release;
+			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
-		    clen > so->so_snd.sb_hiwat)
-			snderr(EMSGSIZE);
+		    clen > so->so_snd.sb_hiwat) {
+			error = EMSGSIZE;
+			goto release;
+		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
-			if (so->so_nbio)
-				snderr(EWOULDBLOCK);
+			if (so->so_nbio) {
+				error = EWOULDBLOCK;
+				goto release;
+			}
 			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
-			splx(s);
 			if (error)
 				goto out;
 			goto restart;
 		}
-		splx(s);
 		mp = &top;
 		space -= clen;
 		do {
@@ -875,6 +909,8 @@
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else do {
+				sounlock(so);
+				splx(s);
 				if (top == NULL) {
 					m = m_gethdr(M_WAIT, MT_DATA);
 					mlen = MHLEN;
@@ -925,6 +961,8 @@
 				m->m_len = len;
 				*mp = m;
 				top->m_pkthdr.len += len;
+				s = splsoftnet();
+				solock(so);
 				if (error != 0)
 					goto release;
 				mp = &m->m_next;
@@ -935,24 +973,21 @@
 				}
 			} while (space > 0 && atomic);
 
-			s = splsoftnet();
-
-			if (so->so_state & SS_CANTSENDMORE)
-				snderr(EPIPE);
-
+			if (so->so_state & SS_CANTSENDMORE) {
+				error = EPIPE;
+				goto release;
+			}
 			if (dontroute)
 				so->so_options |= SO_DONTROUTE;
 			if (resid > 0)
 				so->so_state |= SS_MORETOCOME;
 			error = (*so->so_proto->pr_usrreq)(so,
 			    (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
-			    top, addr, control, curlwp);	/* XXX */
+			    top, addr, control, curlwp);
 			if (dontroute)
 				so->so_options &= ~SO_DONTROUTE;
 			if (resid > 0)
 				so->so_state &= ~SS_MORETOCOME;
-			splx(s);
-
 			clen = 0;
 			control = NULL;
 			top = NULL;
@@ -965,6 +1000,8 @@
  release:
 	sbunlock(&so->so_snd);
  out:
+	sounlock(so);
+	splx(s);
 	if (top)
 		m_freem(top);
 	if (control)
@@ -984,6 +1021,8 @@
 sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
+	KASSERT(solocked(sb->sb_so));
+
 	/*
 	 * First, update for the new value of nextrecord.  If necessary,
 	 * make it the first record.
@@ -1028,7 +1067,7 @@
 	struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct lwp *l = curlwp;
-	struct mbuf	*m, **mp;
+	struct mbuf	*m, **mp, *mt;
 	int atomic, flags, len, error, s, offset, moff, type, orig_resid;
 	const struct protosw	*pr;
 	struct mbuf	*nextrecord;
@@ -1056,8 +1095,10 @@
 
 	if (flags & MSG_OOB) {
 		m = m_get(M_WAIT, MT_DATA);
+		solock(so);
 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
 		    (struct mbuf *)(long)(flags & MSG_PEEK), NULL, l);
+		sounlock(so);
 		if (error)
 			goto bad;
 		do {
@@ -1072,13 +1113,23 @@
 	}
 	if (mp != NULL)
 		*mp = NULL;
+
+	/*
+	 * solock() provides atomicity of access.  splsoftnet() prevents
+	 * protocol processing soft interrupts from interrupting us and
+	 * blocking (expensive).
+	 */
+	s = splsoftnet();
+	solock(so);
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, l);
 
  restart:
-	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
+	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) {
+		sounlock(so);
+		splx(s);
 		return error;
-	s = splsoftnet();
+	}
 
 	m = so->so_rcv.sb_mb;
 	/*
@@ -1137,9 +1188,11 @@
 		SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		splx(s);
-		if (error != 0)
+		if (error != 0) {
+			sounlock(so);
+			splx(s);
 			return error;
+		}
 		goto restart;
 	}
  dontblock:
@@ -1149,7 +1202,7 @@
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
-	 * IPL, and re-reading them when picking it up.
+	 * socket lock, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
@@ -1226,9 +1279,11 @@
 			if (controlp != NULL) {
 				if (dom->dom_externalize != NULL &&
 				    type == SCM_RIGHTS) {
+					sounlock(so);
 					splx(s);
 					error = (*dom->dom_externalize)(cm, l);
 					s = splsoftnet();
+					solock(so);
 				}
 				*controlp = cm;
 				while (*controlp != NULL)
@@ -1240,9 +1295,9 @@
 				 */
 				if (dom->dom_dispose != NULL &&
 				    type == SCM_RIGHTS) {
-				    	splx(s);
+				    	sounlock(so);
 					(*dom->dom_dispose)(cm);
-					s = splsoftnet();
+					solock(so);
 				}
 				m_freem(cm);
 			}
@@ -1292,9 +1347,11 @@
 		if (mp == NULL) {
 			SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
 			SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
+			sounlock(so);
 			splx(s);
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			s = splsoftnet();
+			solock(so);
 			if (error != 0) {
 				/*
 				 * If any part of the record has been removed
@@ -1351,8 +1408,15 @@
 		} else if (flags & MSG_PEEK)
 			moff += len;
 		else {
-			if (mp != NULL)
-				*mp = m_copym(m, 0, len, M_WAIT);
+			if (mp != NULL) {
+				mt = m_copym(m, 0, len, M_NOWAIT);
+				if (__predict_false(mt == NULL)) {
+					sounlock(so);
+					mt = m_copym(m, 0, len, M_WAIT);
+					solock(so);
+				}
+				*mp = mt;
+			}
 			m->m_data += len;
 			m->m_len -= len;
 			so->so_rcv.sb_cc -= len;
@@ -1402,6 +1466,7 @@
 			error = sbwait(&so->so_rcv);
 			if (error != 0) {
 				sbunlock(&so->so_rcv);
+				sounlock(so);
 				splx(s);
 				return 0;
 			}
@@ -1438,7 +1503,6 @@
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
-		splx(s);
 		goto restart;
 	}
 
@@ -1446,6 +1510,7 @@
 		*flagsp |= flags;
  release:
 	sbunlock(&so->so_rcv);
+	sounlock(so);
 	splx(s);
 	return error;
 }
@@ -1454,17 +1519,23 @@
 soshutdown(struct socket *so, int how)
 {
 	const struct protosw	*pr;
+	int	error;
+
+	KASSERT(solocked(so));
 
 	pr = so->so_proto;
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 
-	if (how == SHUT_RD || how == SHUT_RDWR)
+	if (how == SHUT_RD || how == SHUT_RDWR) {
 		sorflush(so);
+		error = 0;
+	}
 	if (how == SHUT_WR || how == SHUT_RDWR)
-		return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL,
+		error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL,
 		    NULL, NULL, NULL);
-	return 0;
+
+	return error;
 }
 
 void
@@ -1472,14 +1543,14 @@
 {
 	struct sockbuf	*sb, asb;
 	const struct protosw	*pr;
-	int		s;
+
+	KASSERT(solocked(so));
 
 	sb = &so->so_rcv;
 	pr = so->so_proto;
+	socantrcvmore(so);
 	sb->sb_flags |= SB_NOINTR;
-	(void) sblock(sb, M_WAITOK);
-	s = splnet();
-	socantrcvmore(so);
+	(void )sblock(sb, M_WAITOK);
 	sbunlock(sb);
 	asb = *sb;
 	/*
@@ -1488,9 +1559,11 @@
 	 */
 	memset(&sb->sb_startzero, 0,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
-	splx(s);
-	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) {
+		sounlock(so);
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
+		solock(so);
+	}
 	sbrelease(&asb, so);
 }
 
@@ -1611,6 +1684,7 @@
 {
 	int error, prerr;
 
+	solock(so);
 	if (level == SOL_SOCKET)
 		error = sosetopt1(so, level, optname, m);
 	else
@@ -1627,6 +1701,7 @@
 			error = prerr;
 	} else if (m != NULL)
 		(void)m_free(m);
+	sounlock(so);
 	return error;
 }
 
@@ -1634,13 +1709,15 @@
 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp)
 {
 	struct mbuf	*m;
+	int		error;
 
+	solock(so);
 	if (level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
-			return ((*so->so_proto->pr_ctloutput)
+			error = ((*so->so_proto->pr_ctloutput)
 				  (PRCO_GETOPT, so, level, optname, mp));
 		} else
-			return (ENOPROTOOPT);
+			error = (ENOPROTOOPT);
 	} else {
 		m = m_get(M_WAIT, MT_SOOPTS);
 		m->m_len = sizeof(int);
@@ -1709,12 +1786,16 @@
 			break;
 
 		default:
+			sounlock(so);
 			(void)m_free(m);
 			return (ENOPROTOOPT);
 		}
 		*mp = m;
-		return (0);
+		error = 0;
 	}
+
+	sounlock(so);
+	return (error);
 }
 
 void
@@ -1731,9 +1812,11 @@
 	struct socket	*so;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
+	solock(so);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
+	sounlock(so);
 }
 
 /*ARGSUSED*/
@@ -1741,19 +1824,25 @@
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket	*so;
+	int rv;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
+	if (hint != NOTE_SUBMIT)
+		solock(so);
 	kn->kn_data = so->so_rcv.sb_cc;
 	if (so->so_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
-	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (kn->kn_data >= so->so_rcv.sb_lowat);
+		rv = 1;
+	} else if (so->so_error)	/* temporary udp error */
+		rv = 1;
+	else if (kn->kn_sfflags & NOTE_LOWAT)
+		rv = (kn->kn_data >= kn->kn_sdata);
+	else 
+		rv = (kn->kn_data >= so->so_rcv.sb_lowat);
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
+	return rv;
 }
 
 static void
@@ -1762,9 +1851,11 @@
 	struct socket	*so;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
+	solock(so);
 	SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
+	sounlock(so);
 }
 
 /*ARGSUSED*/
@@ -1772,22 +1863,28 @@
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket	*so;
+	int rv;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
+	if (hint != NOTE_SUBMIT)
+		solock(so);
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
-	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (((so->so_state & SS_ISCONNECTED) == 0) &&
+		rv = 1;
+	} else if (so->so_error)	/* temporary udp error */
+		rv = 1;
+	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
-		return (0);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+		rv = 0;
+	else if (kn->kn_sfflags & NOTE_LOWAT)
+		rv = (kn->kn_data >= kn->kn_sdata);
+	else
+		rv = (kn->kn_data >= so->so_snd.sb_lowat);
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
+	return rv;
 }
 
 /*ARGSUSED*/
@@ -1795,6 +1892,7 @@
 filt_solisten(struct knote *kn, long hint)
 {
 	struct socket	*so;
+	int rv;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
 
@@ -1802,8 +1900,13 @@
 	 * Set kn_data to number of incoming connections, not
 	 * counting partial (incomplete) connections.
 	 */
+	if (hint != NOTE_SUBMIT)
+		solock(so);
 	kn->kn_data = so->so_qlen;
-	return (kn->kn_data > 0);
+	rv = (kn->kn_data > 0);
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
+	return rv;
 }
 
 static const struct filterops solisten_filtops =
@@ -1820,6 +1923,7 @@
 	struct sockbuf	*sb;
 
 	so = ((file_t *)kn->kn_obj)->f_data;
+	solock(so);
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		if (so->so_options & SO_ACCEPTCONN)
@@ -1833,10 +1937,12 @@
 		sb = &so->so_snd;
 		break;
 	default:
+		sounlock(so);
 		return (EINVAL);
 	}
 	SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
+	sounlock(so);
 	return (0);
 }
 
@@ -1866,28 +1972,30 @@
 sopoll(struct socket *so, int events)
 {
 	int revents = 0;
-	int s;
 
+#ifndef DIAGNOSTIC
+	/*
+	 * Do a quick, unlocked check in expectation that the socket
+	 * will be ready for I/O.  Don't do this check if DIAGNOSTIC,
+	 * as the solocked() assertions will fail.
+	 */
 	if ((revents = sodopoll(so, events)) != 0)
 		return revents;
+#endif
 
-	KERNEL_LOCK(1, curlwp);
-	s = splsoftnet();
-
+	solock(so);
 	if ((revents = sodopoll(so, events)) == 0) {
 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 			selrecord(curlwp, &so->so_rcv.sb_sel);
-			so->so_rcv.sb_flags |= SB_SEL;
+			so->so_rcv.sb_flags |= SB_NOTIFY;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(curlwp, &so->so_snd.sb_sel);
-			so->so_snd.sb_flags |= SB_SEL;
+			so->so_snd.sb_flags |= SB_NOTIFY;
 		}
 	}
-
-	splx(s);
-	KERNEL_UNLOCK_ONE(curlwp);
+	sounlock(so);
 
 	return revents;
 }
--- a/sys/kern/uipc_socket2.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_socket2.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,37 @@
-/*	$NetBSD: uipc_socket2.c,v 1.90 2008/03/01 14:16:51 rmind Exp $	*/
+/*	$NetBSD: uipc_socket2.c,v 1.91 2008/04/24 11:38:36 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
@@ -32,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.90 2008/03/01 14:16:51 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.91 2008/04/24 11:38:36 ad Exp $");
 
 #include "opt_mbuftrace.h"
 #include "opt_sb_max.h"
@@ -45,21 +78,57 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
+#include <sys/domain.h>
 #include <sys/poll.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/kauth.h>
+#include <sys/pool.h>
 
 /*
- * Primitive routines for operating on sockets and socket buffers
+ * Primitive routines for operating on sockets and socket buffers.
+ *
+ * Locking rules and assumptions:
+ *
+ * o socket::so_lock can change on the fly.  The low level routines used
+ *   to lock sockets are aware of this.  When so_lock is acquired, the
+ *   routine locking must check to see if so_lock still points to the
+ *   lock that was acquired.  If so_lock has changed in the meantime, the
+ *   now irellevant lock that was acquired must be dropped and the lock
+ *   operation retried.  Although not proven here, this is completely safe
+ *   on a multiprocessor system, even with relaxed memory ordering, given
+ *   the next two rules:
+ *
+ * o In order to mutate so_lock, the lock pointed to by the current value
+ *   of so_lock must be held: i.e., the socket must be held locked by the
+ *   changing thread.  The thread must issue membar_exit() to prevent
+ *   memory accesses being reordered, and can set so_lock to the desired
+ *   value.  If the lock pointed to by the new value of so_lock is not
+ *   held by the changing thread, the socket must then be considered
+ *   unlocked.
+ *
+ * o If so_lock is mutated, and the previous lock referred to by so_lock
+ *   could still be visible to other threads in the system (e.g. via file
+ *   descriptor or protocol-internal reference), then the old lock must
+ *   remain valid until the socket and/or protocol control block has been
+ *   torn down.
+ *
+ * o If a socket has a non-NULL so_head value (i.e. is in the process of
+ *   connecting), then locking the socket must also lock the socket pointed
+ *   to by so_head: their lock pointers must match.
+ *
+ * o If a socket has connections in progress (so_q, so_q0 not empty) then
+ *   locking the socket must also lock the sockets attached to both queues.
+ *   Again, their lock pointers must match.
+ *
+ * o Beyond the initial lock assigment in socreate(), assigning locks to
+ *   sockets is the responsibility of the individual protocols / protocol
+ *   domains.
  */
 
-/* strings for sleep message: */
-const char	netcon[] = "netcon";
-const char	netcls[] = "netcls";
-const char	netio[] = "netio";
-const char	netlck[] = "netlck";
+static POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL,
+    IPL_SOFTNET);
 
 u_long	sb_max = SB_MAX;	/* maximum socket buffer size */
 static u_long sb_max_adj;	/* adjusted sb_max */
@@ -98,6 +167,8 @@
 soisconnecting(struct socket *so)
 {
 
+	KASSERT(solocked(so));
+
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 }
@@ -108,14 +179,18 @@
 	struct socket	*head;
 
 	head = so->so_head;
+
+	KASSERT(solocked(so));
+	KASSERT(head == NULL || solocked2(so, head));
+
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	if (head && soqremque(so, 0)) {
 		soqinsque(head, so, 1);
 		sorwakeup(head);
-		wakeup((void *)&head->so_timeo);
+		cv_broadcast(&head->so_cv);
 	} else {
-		wakeup((void *)&so->so_timeo);
+		cv_broadcast(&so->so_cv);
 		sorwakeup(so);
 		sowwakeup(so);
 	}
@@ -125,9 +200,11 @@
 soisdisconnecting(struct socket *so)
 {
 
+	KASSERT(solocked(so));
+
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
-	wakeup((void *)&so->so_timeo);
+	cv_broadcast(&so->so_cv);
 	sowwakeup(so);
 	sorwakeup(so);
 }
@@ -136,9 +213,11 @@
 soisdisconnected(struct socket *so)
 {
 
+	KASSERT(solocked(so));
+
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
-	wakeup((void *)&so->so_timeo);
+	cv_broadcast(&so->so_cv);
 	sowwakeup(so);
 	sorwakeup(so);
 }
@@ -155,15 +234,18 @@
 sonewconn(struct socket *head, int connstatus)
 {
 	struct socket	*so;
-	int		soqueue;
+	int		soqueue, error;
+
+	KASSERT(solocked(head));
 
 	soqueue = connstatus ? 1 : 0;
 	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
 		return ((struct socket *)0);
-	so = pool_get(&socket_pool, PR_NOWAIT);
+	so = soget(false);
 	if (so == NULL)
 		return (NULL);
-	memset((void *)so, 0, sizeof(*so));
+	mutex_obj_hold(head->so_lock);
+	so->so_lock = head->so_lock;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
@@ -180,8 +262,6 @@
 	so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
 	so->so_snd.sb_mowner = head->so_snd.sb_mowner;
 #endif
-	selinit(&so->so_rcv.sb_sel);
-	selinit(&so->so_snd.sb_sel);
 	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
@@ -190,27 +270,65 @@
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 	soqinsque(head, so, soqueue);
-	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
-	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
-	    (struct lwp *)0)) {
+	error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL,
+	    NULL, NULL);
+	KASSERT(solocked(so));
+	if (error != 0) {
 		(void) soqremque(so, soqueue);
-		seldestroy(&so->so_rcv.sb_sel);
-		seldestroy(&so->so_snd.sb_sel);
-		pool_put(&socket_pool, so);
+		soput(so);
 		return (NULL);
 	}
 	if (connstatus) {
 		sorwakeup(head);
-		wakeup((void *)&head->so_timeo);
+		cv_broadcast(&head->so_cv);
 		so->so_state |= connstatus;
 	}
 	return (so);
 }
 
+struct socket *
+soget(bool waitok)
+{
+	struct socket *so;
+
+	so = pool_get(&socket_pool, (waitok ? PR_WAITOK : PR_NOWAIT));
+	if (__predict_false(so == NULL))
+		return (NULL);
+	memset(so, 0, sizeof(*so));
+	TAILQ_INIT(&so->so_q0);
+	TAILQ_INIT(&so->so_q);
+	cv_init(&so->so_cv, "socket");
+	cv_init(&so->so_rcv.sb_cv, "netio");
+	cv_init(&so->so_snd.sb_cv, "netio");
+	selinit(&so->so_rcv.sb_sel);
+	selinit(&so->so_snd.sb_sel);
+	so->so_rcv.sb_so = so;
+	so->so_snd.sb_so = so;
+	return so;
+}
+
+void
+soput(struct socket *so)
+{
+
+	KASSERT(!cv_has_waiters(&so->so_cv));
+	KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
+	KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
+	seldestroy(&so->so_rcv.sb_sel);
+	seldestroy(&so->so_snd.sb_sel);
+	mutex_obj_free(so->so_lock);
+	cv_destroy(&so->so_cv);
+	cv_destroy(&so->so_rcv.sb_cv);
+	cv_destroy(&so->so_snd.sb_cv);
+	pool_put(&socket_pool, so);
+}
+
 void
 soqinsque(struct socket *head, struct socket *so, int q)
 {
 
+	KASSERT(solocked2(head, so));
+
 #ifdef DIAGNOSTIC
 	if (so->so_onq != NULL)
 		panic("soqinsque");
@@ -233,6 +351,8 @@
 	struct socket	*head;
 
 	head = so->so_head;
+
+	KASSERT(solocked(so));
 	if (q == 0) {
 		if (so->so_onq != &head->so_q0)
 			return (0);
@@ -242,6 +362,7 @@
 			return (0);
 		head->so_qlen--;
 	}
+	KASSERT(solocked2(so, head));
 	TAILQ_REMOVE(so->so_onq, so, so_qe);
 	so->so_onq = NULL;
 	so->so_head = NULL;
@@ -262,6 +383,8 @@
 socantsendmore(struct socket *so)
 {
 
+	KASSERT(solocked(so));
+
 	so->so_state |= SS_CANTSENDMORE;
 	sowwakeup(so);
 }
@@ -270,6 +393,8 @@
 socantrcvmore(struct socket *so)
 {
 
+	KASSERT(solocked(so));
+
 	so->so_state |= SS_CANTRCVMORE;
 	sorwakeup(so);
 }
@@ -280,32 +405,23 @@
 int
 sbwait(struct sockbuf *sb)
 {
+	struct socket *so;
+	kmutex_t *lock;
+	int error;
 
-	sb->sb_flags |= SB_WAIT;
-	return (tsleep((void *)&sb->sb_cc,
-	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
-	    sb->sb_timeo));
-}
+	so = sb->sb_so;
+
+	KASSERT(solocked(so));
 
-/*
- * Lock a sockbuf already known to be locked;
- * return any error returned from sleep (EINTR).
- */
-int
-sb_lock(struct sockbuf *sb)
-{
-	int	error;
-
-	while (sb->sb_flags & SB_LOCK) {
-		sb->sb_flags |= SB_WANT;
-		error = tsleep((void *)&sb->sb_flags,
-		    (sb->sb_flags & SB_NOINTR) ?  PSOCK : PSOCK|PCATCH,
-		    netlck, 0);
-		if (error)
-			return (error);
-	}
-	sb->sb_flags |= SB_LOCK;
-	return (0);
+	sb->sb_flags |= SB_NOTIFY;
+	lock = so->so_lock;
+	if ((sb->sb_flags & SB_NOINTR) != 0)
+		error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo);
+	else
+		error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo);
+	if (__predict_false(lock != so->so_lock))
+		solockretry(so, lock);
+	return error;
 }
 
 /*
@@ -318,17 +434,16 @@
 {
 	int band;
 
+	KASSERT(solocked(so));
+	KASSERT(sb->sb_so == so);
+
 	if (code == POLL_IN)
 		band = POLLIN|POLLRDNORM;
 	else
 		band = POLLOUT|POLLWRNORM;
-	selnotify(&sb->sb_sel, band, 0);
-
-	sb->sb_flags &= ~SB_SEL;
-	if (sb->sb_flags & SB_WAIT) {
-		sb->sb_flags &= ~SB_WAIT;
-		wakeup((void *)&sb->sb_cc);
-	}
+	sb->sb_flags &= ~SB_NOTIFY;
+	selnotify(&sb->sb_sel, band, NOTE_SUBMIT);
+	cv_broadcast(&sb->sb_cv);
 	if (sb->sb_flags & SB_ASYNC)
 		fownsignal(so->so_pgid, SIGIO, code, band, so);
 	if (sb->sb_flags & SB_UPCALL)
@@ -386,6 +501,9 @@
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
+
+	KASSERT(so->so_lock == NULL || solocked(so));
+
 	/*
 	 * there's at least one application (a configure script of screen)
 	 * which expects a fifo is writable even if it has "some" bytes
@@ -429,7 +547,10 @@
 	rlim_t maxcc;
 	struct uidinfo *uidinfo;
 
-	KDASSERT(sb_max_adj != 0);
+	KASSERT(so->so_lock == NULL || solocked(so));
+	KASSERT(sb->sb_so == so);
+	KASSERT(sb_max_adj != 0);
+
 	if (cc == 0 || cc > sb_max_adj)
 		return (0);
 	if (so) {
@@ -451,12 +572,15 @@
 }
 
 /*
- * Free mbufs held by a socket, and reserved mbuf space.
+ * Free mbufs held by a socket, and reserved mbuf space.  We do not assert
+ * that the socket is held locked here: see sorflush().
  */
 void
 sbrelease(struct sockbuf *sb, struct socket *so)
 {
 
+	KASSERT(sb->sb_so == so);
+
 	sbflush(sb);
 	(void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY);
 	sb->sb_mbmax = 0;
@@ -493,6 +617,8 @@
 {
 	struct mbuf *m = sb->sb_mb;
 
+	KASSERT(solocked(sb->sb_so));
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -512,6 +638,8 @@
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
+	KASSERT(solocked(sb->sb_so));
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -560,6 +688,8 @@
 {
 	struct mbuf	*n;
 
+	KASSERT(solocked(sb->sb_so));
+
 	if (m == 0)
 		return;
 
@@ -601,6 +731,7 @@
 sbappendstream(struct sockbuf *sb, struct mbuf *m)
 {
 
+	KASSERT(solocked(sb->sb_so));
 	KDASSERT(m->m_nextpkt == NULL);
 	KASSERT(sb->sb_mb == sb->sb_lastrecord);
 
@@ -620,18 +751,22 @@
 void
 sbcheck(struct sockbuf *sb)
 {
-	struct mbuf	*m;
+	struct mbuf	*m, *m2;
 	u_long		len, mbcnt;
 
+	KASSERT(solocked(sb->sb_so));
+
 	len = 0;
 	mbcnt = 0;
-	for (m = sb->sb_mb; m; m = m->m_next) {
-		len += m->m_len;
-		mbcnt += MSIZE;
-		if (m->m_flags & M_EXT)
-			mbcnt += m->m_ext.ext_size;
-		if (m->m_nextpkt)
-			panic("sbcheck nextpkt");
+	for (m = sb->sb_mb; m; m = m->m_nextpkt) {
+		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
+			len += m2->m_len;
+			mbcnt += MSIZE;
+			if (m2->m_flags & M_EXT)
+				mbcnt += m2->m_ext.ext_size;
+			if (m2->m_nextpkt != NULL)
+				panic("sbcheck nextpkt");
+		}
 	}
 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 		printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
@@ -650,6 +785,8 @@
 {
 	struct mbuf	*m;
 
+	KASSERT(solocked(sb->sb_so));
+
 	if (m0 == 0)
 		return;
 
@@ -683,6 +820,8 @@
 {
 	struct mbuf	*m, **mp;
 
+	KASSERT(solocked(sb->sb_so));
+
 	if (m0 == 0)
 		return;
 
@@ -735,6 +874,8 @@
 	struct mbuf	*m, *n, *nlast;
 	int		space, len;
 
+	KASSERT(solocked(sb->sb_so));
+
 	space = asa->sa_len;
 
 	if (m0 != NULL) {
@@ -787,7 +928,6 @@
 
 	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb, "sbappendaddr");
-
 	SBLASTRECORDCHK(sb, "sbappendaddr 2");
 
 	return (1);
@@ -804,6 +944,8 @@
 	struct mbuf *m;
 	const int salen = asa->sa_len;
 
+	KASSERT(solocked(sb->sb_so));
+
 	/* only the first in each chain need be a pkthdr */
 	MGETHDR(m, M_DONTWAIT, MT_SONAME);
 	if (m == 0)
@@ -836,6 +978,8 @@
 	struct mbuf *m, *n, *n0, *nlast;
 	int error;
 
+	KASSERT(solocked(sb->sb_so));
+
 	/*
 	 * XXX sbprio reserved for encoding priority of this* request:
 	 *  SB_PRIO_NONE --> honour normal sb limits
@@ -931,6 +1075,8 @@
 	struct mbuf	*m, *mlast, *n;
 	int		space;
 
+	KASSERT(solocked(sb->sb_so));
+
 	space = 0;
 	if (control == 0)
 		panic("sbappendcontrol");
@@ -959,7 +1105,6 @@
 
 	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb, "sbappendcontrol");
-
 	SBLASTRECORDCHK(sb, "sbappendcontrol 2");
 
 	return (1);
@@ -976,6 +1121,8 @@
 	int		eor;
 	struct mbuf	*o;
 
+	KASSERT(solocked(sb->sb_so));
+
 	eor = 0;
 	while (m) {
 		eor |= m->m_flags & M_EOR;
@@ -1028,6 +1175,7 @@
 sbflush(struct sockbuf *sb)
 {
 
+	KASSERT(solocked(sb->sb_so));
 	KASSERT((sb->sb_flags & SB_LOCK) == 0);
 
 	while (sb->sb_mbcnt)
@@ -1047,6 +1195,8 @@
 {
 	struct mbuf	*m, *mn, *next;
 
+	KASSERT(solocked(sb->sb_so));
+
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	while (len > 0) {
 		if (m == 0) {
@@ -1099,6 +1249,8 @@
 {
 	struct mbuf	*m, *mn;
 
+	KASSERT(solocked(sb->sb_so));
+
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
@@ -1142,3 +1294,116 @@
 	cp->cmsg_type = type;
 	return (m);
 }
+
+void
+solockretry(struct socket *so, kmutex_t *lock)
+{
+
+	while (lock != so->so_lock) {
+		mutex_exit(lock);
+		lock = so->so_lock;
+		mutex_enter(lock);
+	}
+}
+
+bool
+solocked(struct socket *so)
+{
+
+	return mutex_owned(so->so_lock);
+}
+
+bool
+solocked2(struct socket *so1, struct socket *so2)
+{
+	kmutex_t *lock;
+
+	lock = so1->so_lock;
+	if (lock != so2->so_lock)
+		return false;
+	return mutex_owned(lock);
+}
+
+/*
+ * Assign a default lock to a new socket.  For PRU_ATTACH, and done by
+ * protocols that do not have special locking requirements.
+ */
+void
+sosetlock(struct socket *so)
+{
+	kmutex_t *lock;
+
+	if (so->so_lock == NULL) {
+		lock = softnet_lock;
+		so->so_lock = lock;
+		mutex_obj_hold(lock);
+		mutex_enter(lock);
+	}
+
+	/* In all cases, lock must be held on return from PRU_ATTACH. */
+	KASSERT(solocked(so));
+}
+
+/*
+ * Set lock on sockbuf sb; sleep if lock is already held.
+ * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
+ * Returns error without lock if sleep is interrupted.
+ */
+int
+sblock(struct sockbuf *sb, int wf)
+{
+	struct socket *so;
+	kmutex_t *lock;
+	int error;
+
+	KASSERT(solocked(sb->sb_so));
+
+	for (;;) {
+		if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) {
+			sb->sb_flags |= SB_LOCK;
+			return 0;
+		}
+		if (wf != M_WAITOK)
+			return EWOULDBLOCK;
+		so = sb->sb_so;
+		lock = so->so_lock;
+		if ((sb->sb_flags & SB_NOINTR) != 0) {
+			cv_wait(&so->so_cv, lock);
+			error = 0;
+		} else
+			error = cv_wait_sig(&so->so_cv, lock);
+		if (__predict_false(lock != so->so_lock))
+			solockretry(so, lock);
+		if (error != 0)
+			return error;
+	}
+}
+
+void
+sbunlock(struct sockbuf *sb)
+{
+	struct socket *so;
+
+	so = sb->sb_so;
+
+	KASSERT(solocked(so));
+	KASSERT((sb->sb_flags & SB_LOCK) != 0);
+
+	sb->sb_flags &= ~SB_LOCK;
+	cv_broadcast(&so->so_cv);
+}
+
+int
+sowait(struct socket *so, int timo)
+{
+	kmutex_t *lock;
+	int error;
+
+	KASSERT(solocked(so));
+
+	lock = so->so_lock;
+	error = cv_timedwait_sig(&so->so_cv, lock, timo);
+	if (__predict_false(lock != so->so_lock))
+		solockretry(so, lock);
+	return error;
+}
--- a/sys/kern/uipc_syscalls.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_syscalls.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,37 @@
-/*	$NetBSD: uipc_syscalls.c,v 1.128 2008/03/21 21:55:00 ad Exp $	*/
+/*	$NetBSD: uipc_syscalls.c,v 1.129 2008/04/24 11:38:36 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
@@ -32,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.128 2008/03/21 21:55:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.129 2008/04/24 11:38:36 ad Exp $");
 
 #include "opt_pipe.h"
 
@@ -139,7 +172,7 @@
 {
 	file_t		*fp, *fp2;
 	struct mbuf	*nam;
-	int		error, s, fd;
+	int		error, fd;
 	struct socket	*so, *so2;
 
 	if ((fp = fd_getfile(sock)) == NULL)
@@ -150,9 +183,8 @@
 		return (error);
 	nam = m_get(M_WAIT, MT_SONAME);
 	*new_sock = fd;
-	s = splsoftnet();
 	so = fp->f_data;
-	fd_putfile(sock);	/* XXX wrong, socket can disappear */
+	solock(so);
 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
 		error = EOPNOTSUPP;
 		goto bad;
@@ -170,8 +202,7 @@
 			so->so_error = ECONNABORTED;
 			break;
 		}
-		error = tsleep(&so->so_timeo, PSOCK | PCATCH,
-		    netcon, 0);
+		error = sowait(so, 0);
 		if (error) {
 			goto bad;
 		}
@@ -182,7 +213,7 @@
 		goto bad;
 	}
 	/* connection has been removed from the listen queue */
-	KNOTE(&so->so_rcv.sb_sel.sel_klist, 0);
+	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
 	so2 = TAILQ_FIRST(&so->so_q);
 	if (soqremque(so2, 1) == 0)
 		panic("accept");
@@ -191,7 +222,7 @@
 	fp2->f_ops = &socketops;
 	fp2->f_data = so2;
 	error = soaccept(so2, nam);
-	splx(s);
+	sounlock(so);
 	if (error) {
 		/* an error occurred, free the file descriptor and mbuf */
 		m_freem(nam);
@@ -204,10 +235,12 @@
 		fd_affix(curproc, fp2, fd);
 		*name = nam;
 	}
+	fd_putfile(sock);
 	return (error);
  bad:
-	splx(s);
+ 	sounlock(so);
  	m_freem(nam);
+	fd_putfile(sock);
  	fd_abort(curproc, fp2, fd);
  	return (error);
 }
@@ -263,12 +296,12 @@
 	struct socket	*so;
 	int		error;
 	int		interrupted = 0;
-	int		s;
 
 	if ((error = fd_getsock(fd, &so)) != 0) {
 		m_freem(nam);
 		return (error);
 	}
+	solock(so);
 	MCLAIM(nam, so->so_mowner);
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
@@ -282,10 +315,8 @@
 		error = EINPROGRESS;
 		goto out;
 	}
-	s = splsoftnet();
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		error = tsleep(&so->so_timeo, PSOCK | PCATCH,
-			       netcon, 0);
+		error = sowait(so, 0);
 		if (error) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
@@ -296,13 +327,13 @@
 		error = so->so_error;
 		so->so_error = 0;
 	}
-	splx(s);
  bad:
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
 		error = EINTR;
  out:
+ 	sounlock(so);
  	fd_putfile(fd);
 	m_freem(nam);
 	return (error);
@@ -324,11 +355,11 @@
 
 	p = curproc;
 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
-	    SCARG(uap, protocol), l);
+	    SCARG(uap, protocol), l, NULL);
 	if (error)
 		return (error);
 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
-	    SCARG(uap, protocol), l);
+	    SCARG(uap, protocol), l, so1);
 	if (error)
 		goto free1;
 	if ((error = fd_allocfile(&fp1, &fd)) != 0)
@@ -345,20 +376,22 @@
 	fp2->f_ops = &socketops;
 	fp2->f_data = so2;
 	sv[1] = fd;
-	if ((error = soconnect2(so1, so2)) != 0)
-		goto free4;
-	if (SCARG(uap, type) == SOCK_DGRAM) {
+	solock(so1);
+	error = soconnect2(so1, so2);
+	if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
-		 if ((error = soconnect2(so2, so1)) != 0)
-			goto free4;
+		error = soconnect2(so2, so1);
 	}
-	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
-	fd_affix(p, fp2, sv[1]);
-	fd_affix(p, fp1, sv[0]);
-	return (error);
- free4:
+	sounlock(so1);
+	if (error == 0)
+		error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
+	if (error == 0) {
+		fd_affix(p, fp2, sv[1]);
+		fd_affix(p, fp1, sv[0]);
+		return (0);
+	}
 	fd_abort(p, fp2, sv[1]);
  free3:
 	fd_abort(p, fp1, sv[0]);
@@ -519,9 +552,7 @@
 		MCLAIM(control, so->so_mowner);
 
 	len = auio.uio_resid;
-	KERNEL_LOCK(1, NULL);
 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
-	KERNEL_UNLOCK_ONE(NULL);
 	/* Protocol is responsible for freeing 'control' */
 	control = NULL;
 
@@ -798,10 +829,8 @@
 
 	len = auio.uio_resid;
 	mp->msg_flags &= MSG_USERFLAGS;
-	KERNEL_LOCK(1, NULL);
 	error = (*so->so_receive)(so, from, &auio, NULL, control,
-			  &mp->msg_flags);
-	KERNEL_UNLOCK_ONE(NULL);
+	    &mp->msg_flags);
 	len -= auio.uio_resid;
 	*retsize = len;
 	if (error != 0 && len != 0
@@ -843,7 +872,9 @@
 
 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
 		return (error);
+	solock(so);
 	error = soshutdown(so, SCARG(uap, how));
+	sounlock(so);
 	fd_putfile(SCARG(uap, s));
 	return (error);
 }
@@ -951,9 +982,9 @@
 	proc_t		*p;
 
 	p = curproc;
-	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l)) != 0)
+	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
 		return (error);
-	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l)) != 0)
+	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
 		goto free1;
 	/* remember this socket pair implements a pipe */
 	wso->so_state |= SS_ISAPIPE;
@@ -972,7 +1003,10 @@
 	wf->f_ops = &socketops;
 	wf->f_data = wso;
 	retval[1] = fd;
-	if ((error = unp_connect2(wso, rso, PRU_CONNECT2)) != 0)
+	solock(wso);
+	error = unp_connect2(wso, rso, PRU_CONNECT2);
+	sounlock(wso);
+	if (error != 0)
 		goto free4;
 	fd_affix(p, wf, (int)retval[1]);
 	fd_affix(p, rf, (int)retval[0]);
@@ -1003,20 +1037,21 @@
 	if ((error = fd_getsock(fd, &so)) != 0)
 		return error;
 
+	m = m_getclr(M_WAIT, MT_SONAME);
+	MCLAIM(m, so->so_mowner);
+
+	solock(so);
 	if (which == PRU_PEERADDR
 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
-		goto bad;
+	} else {
+		*nam = m;
+		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
+		    NULL);
 	}
-
-	m = m_getclr(M_WAIT, MT_SONAME);
-	*nam = m;
-	MCLAIM(m, so->so_mowner);
-	error = (*so->so_proto->pr_usrreq)(so, which, (struct mbuf *)0,
-	    m, (struct mbuf *)0, (struct lwp *)0);
+ 	sounlock(so);
 	if (error != 0)
 		m_free(m);
- bad:
  	fd_putfile(fd);
 	return error;
 }
--- a/sys/kern/uipc_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/kern/uipc_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_usrreq.c,v 1.111 2008/04/20 07:47:18 mlelstv Exp $	*/
+/*	$NetBSD: uipc_usrreq.c,v 1.112 2008/04/24 11:38:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2004, 2008 The NetBSD Foundation, Inc.
@@ -103,7 +103,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.111 2008/04/20 07:47:18 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.112 2008/04/24 11:38:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -131,6 +131,42 @@
  *	SEQPACKET, RDM
  *	rethink name space problems
  *	need a proper out-of-band
+ *
+ * Notes on locking:
+ *
+ * The generic rules noted in uipc_socket2.c apply.  In addition:
+ *
+ * o We have a global lock, uipc_lock.
+ *
+ * o All datagram sockets are locked by uipc_lock.
+ *
+ * o For stream socketpairs, the two endpoints are created sharing the same
+ *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
+ *   matching locks.
+ *
+ * o Stream sockets created via socket() start life with their own
+ *   independent lock.
+ * 
+ * o Stream connections to a named endpoint are slightly more complicated.
+ *   Sockets that have called listen() have their lock pointer mutated to
+ *   the global uipc_lock.  When establishing a connection, the connecting
+ *   socket also has its lock mutated to uipc_lock, which matches the head
+ *   (listening socket).  We create a new socket for accept() to return, and
+ *   that also shares the head's lock.  Until the connection is completely
+ *   done on both ends, all three sockets are locked by uipc_lock.  Once the
+ *   connection is complete, the association with the head's lock is broken.
+ *   The connecting socket and the socket returned from accept() have their
+ *   lock pointers mutated away from uipc_lock, and back to the connecting
+ *   socket's original, independent lock.  The head continues to be locked
+ *   by uipc_lock.
+ *
+ * o If uipc_lock is determined to be a significant source of contention,
+ *   it could easily be hashed out.  It is difficult to simply make it an
+ *   independent lock because of visibility / garbage collection issues:
+ *   if a socket has been associated with a lock at any point, that lock
+ *   must remain valid until the socket is no longer visible in the system.
+ *   The lock must not be freed or otherwise destroyed until any sockets
+ *   that had referenced it have also been destroyed.
  */
 const struct sockaddr_un sun_noname = {
 	.sun_len = sizeof(sun_noname),
@@ -139,6 +175,106 @@
 ino_t	unp_ino;			/* prototype for fake inode numbers */
 
 struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
+static kmutex_t *uipc_lock;
+
+/*
+ * Initialize Unix protocols.
+ */
+void
+uipc_init(void)
+{
+
+	uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+}
+
+/*
+ * A connection succeeded: disassociate both endpoints from the head's
+ * lock, and make them share their own lock.  There is a race here: for
+ * a very brief time one endpoint will be locked by a different lock
+ * than the other end.  However, since the current thread holds the old
+ * lock (the listening socket's lock, the head) access can still only be
+ * made to one side of the connection.
+ */
+static void
+unp_setpeerlocks(struct socket *so, struct socket *so2)
+{
+	struct unpcb *unp;
+	kmutex_t *lock;
+
+	KASSERT(solocked2(so, so2));
+
+	/*
+	 * Bail out if either end of the socket is not yet fully
+	 * connected or accepted.  We only break the lock association
+	 * with the head when the pair of sockets stand completely
+	 * on their own.
+	 */
+	if (so->so_head != NULL || so2->so_head != NULL)
+		return;
+
+	/*
+	 * Drop references to old lock.  A third reference (from the
+	 * queue head) must be held as we still hold its lock.  Bonus:
+	 * we don't need to worry about garbage collecting the lock.
+	 */
+	lock = so->so_lock;
+	KASSERT(lock == uipc_lock);
+	mutex_obj_free(lock);
+	mutex_obj_free(lock);
+
+	/*
+	 * Grab stream lock from the initiator and share between the two
+	 * endpoints.  Issue memory barrier to ensure all modifications
+	 * become globally visible before the lock change.  so2 is
+	 * assumed not to have a stream lock, because it was created
+	 * purely for the server side to accept this connection and
+	 * started out life using the domain-wide lock.
+	 */
+	unp = sotounpcb(so);
+	KASSERT(unp->unp_streamlock != NULL);
+	KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
+	lock = unp->unp_streamlock;
+	unp->unp_streamlock = NULL;
+	mutex_obj_hold(lock);
+	membar_exit();
+	so->so_lock = lock;
+	so2->so_lock = lock;
+}
+
+/*
+ * Reset a socket's lock back to the domain-wide lock.
+ */
+static void
+unp_resetlock(struct socket *so)
+{
+	kmutex_t *olock, *nlock;
+	struct unpcb *unp;
+
+	KASSERT(solocked(so));
+
+	olock = so->so_lock;
+	nlock = uipc_lock;
+	if (olock == nlock)
+		return;
+	unp = sotounpcb(so);
+	KASSERT(unp->unp_streamlock == NULL);
+	unp->unp_streamlock = olock;
+	mutex_obj_hold(nlock);
+	mutex_enter(nlock);
+	so->so_lock = nlock;
+	mutex_exit(olock);
+}
+
+static void
+unp_free(struct unpcb *unp)
+{
+
+	if (unp->unp_addr)
+		free(unp->unp_addr, M_SONAME);
+	if (unp->unp_streamlock != NULL)
+		mutex_obj_free(unp->unp_streamlock);
+	free(unp, M_PCB);
+}
 
 int
 unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
@@ -148,6 +284,9 @@
 	const struct sockaddr_un *sun;
 
 	so2 = unp->unp_conn->unp_socket;
+
+	KASSERT(solocked(so2));
+
 	if (unp->unp_addr)
 		sun = unp->unp_addr;
 	else
@@ -156,10 +295,12 @@
 		control = unp_addsockcred(l, control);
 	if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
 	    control) == 0) {
+		so2->so_rcv.sb_overflowed++;
+	    	sounlock(so2);
 		unp_dispose(control);
 		m_freem(control);
 		m_freem(m);
-		so2->so_rcv.sb_overflowed++;
+	    	solock(so2);
 		return (ENOBUFS);
 	} else {
 		sorwakeup(so2);
@@ -168,33 +309,38 @@
 }
 
 void
-unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
+unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr)
 {
 	const struct sockaddr_un *sun;
+	struct unpcb *unp;
+	bool ext;
 
-	if (unp->unp_addr)
-		sun = unp->unp_addr;
-	else
-		sun = &sun_noname;
-	nam->m_len = sun->sun_len;
-	if (nam->m_len > MLEN)
-		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
-	memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
-}
+	unp = sotounpcb(so);
+	ext = false;
 
-void
-unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
-{
-	const struct sockaddr_un *sun;
-
-	if (unp->unp_conn && unp->unp_conn->unp_addr)
-		sun = unp->unp_conn->unp_addr;
-	else
-		sun = &sun_noname;
-	nam->m_len = sun->sun_len;
-	if (nam->m_len > MLEN)
-		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
-	memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
+	for (;;) {
+		sun = NULL;
+		if (peeraddr) {
+			if (unp->unp_conn && unp->unp_conn->unp_addr)
+				sun = unp->unp_conn->unp_addr;
+		} else {
+			if (unp->unp_addr)
+				sun = unp->unp_addr;
+		}
+		if (sun == NULL)
+			sun = &sun_noname;
+		nam->m_len = sun->sun_len;
+		if (nam->m_len > MLEN && !ext) {
+			sounlock(so);
+			MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK);
+			solock(so);
+			ext = true;
+		} else {
+			KASSERT(nam->m_len <= MAXPATHLEN * 2);
+			memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
+			break;
+		}
+	}
 }
 
 /*ARGSUSED*/
@@ -216,9 +362,12 @@
 		panic("uipc_usrreq: unexpected control mbuf");
 #endif
 	p = l ? l->l_proc : NULL;
-	if (unp == 0 && req != PRU_ATTACH) {
-		error = EINVAL;
-		goto release;
+	if (req != PRU_ATTACH) {
+		if (unp == 0) {
+			error = EINVAL;
+			goto release;
+		}
+		KASSERT(solocked(so));
 	}
 
 	switch (req) {
@@ -237,10 +386,15 @@
 
 	case PRU_BIND:
 		KASSERT(l != NULL);
-		error = unp_bind(unp, nam, l);
+		error = unp_bind(so, nam, l);
 		break;
 
 	case PRU_LISTEN:
+		/*
+		 * If the socket can accept a connection, it must be
+		 * locked by uipc_lock.
+		 */
+		unp_resetlock(so);
 		if (unp->unp_vnode == 0)
 			error = EINVAL;
 		break;
@@ -259,15 +413,35 @@
 		break;
 
 	case PRU_ACCEPT:
-		unp_setpeeraddr(unp, nam);
+		KASSERT(so->so_lock == uipc_lock);
 		/*
 		 * Mark the initiating STREAM socket as connected *ONLY*
 		 * after it's been accepted.  This prevents a client from
 		 * overrunning a server and receiving ECONNREFUSED.
 		 */
-		if (unp->unp_conn != NULL &&
-		    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
-			soisconnected(unp->unp_conn->unp_socket);
+		if (unp->unp_conn == NULL)
+			break;
+		so2 = unp->unp_conn->unp_socket;
+		if (so2->so_state & SS_ISCONNECTING) {
+			KASSERT(solocked2(so, so->so_head));
+			KASSERT(solocked2(so2, so->so_head));
+			soisconnected(so2);
+		}
+		/*
+		 * If the connection is fully established, break the
+		 * association with uipc_lock and give the connected
+		 * pair a seperate lock to share.
+		 */
+		unp_setpeerlocks(so2, so);
+		/*
+		 * Only now return peer's address, as we may need to
+		 * block in order to allocate memory.
+		 *
+		 * XXX Minor race: connection can be broken while
+		 * lock is dropped in unp_setaddr().  We will return
+		 * error == 0 and sun_noname as the peer address.
+		 */
+		unp_setaddr(so, nam, true);
 		break;
 
 	case PRU_SHUTDOWN:
@@ -288,6 +462,7 @@
 			if (unp->unp_conn == 0)
 				break;
 			so2 = unp->unp_conn->unp_socket;
+			KASSERT(solocked2(so, so2));
 			/*
 			 * Adjust backpressure on sender
 			 * and wakeup any waiting to write.
@@ -316,8 +491,10 @@
 		 * forging SCM_CREDS.
 		 */
 		if (control) {
-			KASSERT(l != NULL);
-			if ((error = unp_internalize(&control, l)) != 0) {
+			sounlock(so);
+			error = unp_internalize(&control);
+			solock(so);
+			if (error != 0) {
 				m_freem(control);
 				m_freem(m);
 				break;
@@ -326,11 +503,20 @@
 		switch (so->so_type) {
 
 		case SOCK_DGRAM: {
+			KASSERT(so->so_lock == uipc_lock);
 			if (nam) {
 				if ((so->so_state & SS_ISCONNECTED) != 0)
 					error = EISCONN;
 				else {
-					KASSERT(l != NULL);
+					/*
+					 * Note: once connected, the
+					 * socket's lock must not be
+					 * dropped until we have sent
+					 * the message and disconnected.
+					 * This is necessary to prevent
+					 * intervening control ops, like
+					 * another connection.
+					 */
 					error = unp_connect(so, nam, l);
 				}
 			} else {
@@ -338,9 +524,11 @@
 					error = ENOTCONN;
 			}
 			if (error) {
+				sounlock(so);
 				unp_dispose(control);
 				m_freem(control);
 				m_freem(m);
+				solock(so);
 				break;
 			}
 			KASSERT(p != NULL);
@@ -358,6 +546,7 @@
 				break;
 			}
 			so2 = unp->unp_conn->unp_socket;
+			KASSERT(solocked2(so, so2));
 			if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
 				/*
 				 * Credentials are passed only once on
@@ -372,10 +561,8 @@
 			 * Wake up readers.
 			 */
 			if (control) {
-				if (sbappendcontrol(rcv, m, control) == 0) {
-					unp_dispose(control);
-					m_freem(control);
-				}
+				if (sbappendcontrol(rcv, m, control) != 0)
+					control = NULL;
 			} else
 				sbappend(rcv, m);
 			snd->sb_mbmax -=
@@ -389,6 +576,12 @@
 			sorwakeup(so2);
 #undef snd
 #undef rcv
+			if (control != NULL) {
+				sounlock(so);
+				unp_dispose(control);
+				m_freem(control);
+				solock(so);
+			}
 			break;
 
 		default:
@@ -397,7 +590,7 @@
 		break;
 
 	case PRU_ABORT:
-		unp_drop(unp, ECONNABORTED);
+		(void)unp_drop(unp, ECONNABORTED);
 
 		KASSERT(so->so_head == NULL);
 #ifdef DIAGNOSTIC
@@ -411,6 +604,7 @@
 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 			so2 = unp->unp_conn->unp_socket;
+			KASSERT(solocked2(so, so2));
 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
 		}
 		((struct stat *) m)->st_dev = NODEV;
@@ -433,11 +627,11 @@
 		break;
 
 	case PRU_SOCKADDR:
-		unp_setsockaddr(unp, nam);
+		unp_setaddr(so, nam, false);
 		break;
 
 	case PRU_PEERADDR:
-		unp_setpeeraddr(unp, nam);
+		unp_setaddr(so, nam, true);
 		break;
 
 	default:
@@ -459,6 +653,8 @@
 	struct mbuf *m = *mp;
 	int optval = 0, error = 0;
 
+	KASSERT(solocked(so));
+
 	if (level != 0) {
 		error = ENOPROTOOPT;
 		if (op == PRCO_SETOPT && m)
@@ -500,6 +696,7 @@
 		break;
 
 	case PRCO_GETOPT:
+		sounlock(so);
 		switch (optname) {
 		case LOCAL_PEEREID:
 			if (unp->unp_flags & UNP_EIDSVALID) {
@@ -525,6 +722,7 @@
 			error = ENOPROTOOPT;
 			break;
 		}
+		solock(so);
 		break;
 	}
 	return (error);
@@ -552,23 +750,40 @@
 	struct unpcb *unp;
 	int error;
 
-	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
-		switch (so->so_type) {
+	switch (so->so_type) {
+	case SOCK_STREAM:
+		if (so->so_lock == NULL) {
+			/* 
+			 * XXX Assuming that no socket locks are held,
+			 * as this call may sleep.
+			 */
+			so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+			solock(so);
+		}
+		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+			error = soreserve(so, unpst_sendspace, unpst_recvspace);
+			if (error != 0)
+				return (error);
+		}
+		break;
 
-		case SOCK_STREAM:
-			error = soreserve(so, unpst_sendspace, unpst_recvspace);
-			break;
-
-		case SOCK_DGRAM:
+	case SOCK_DGRAM:
+		if (so->so_lock == NULL) {
+			mutex_obj_hold(uipc_lock);
+			so->so_lock = uipc_lock;
+			solock(so);
+		}
+		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
-			break;
+			if (error != 0)
+				return (error);
+		}
+		break;
 
-		default:
-			panic("unp_attach");
-		}
-		if (error)
-			return (error);
+	default:
+		panic("unp_attach");
 	}
+	KASSERT(solocked(so));
 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
 	if (unp == NULL)
 		return (ENOBUFS);
@@ -582,20 +797,32 @@
 void
 unp_detach(struct unpcb *unp)
 {
+	struct socket *so;
+	vnode_t *vp;
 
-	if (unp->unp_vnode) {
-		unp->unp_vnode->v_socket = 0;
-		vrele(unp->unp_vnode);
-		unp->unp_vnode = 0;
+	so = unp->unp_socket;
+
+ retry:
+	if ((vp = unp->unp_vnode) != NULL) {
+		sounlock(so);
+		/* Acquire v_interlock to protect against unp_connect(). */
+		mutex_enter(&vp->v_interlock);
+		vp->v_socket = NULL;
+		vrelel(vp, 0);
+		solock(so);
+		unp->unp_vnode = NULL;
 	}
 	if (unp->unp_conn)
 		unp_disconnect(unp);
-	while (unp->unp_refs)
-		unp_drop(unp->unp_refs, ECONNRESET);
-	soisdisconnected(unp->unp_socket);
-	unp->unp_socket->so_pcb = 0;
-	if (unp->unp_addr)
-		free(unp->unp_addr, M_SONAME);
+	while (unp->unp_refs) {
+		KASSERT(solocked2(so, unp->unp_refs->unp_socket));
+		if (unp_drop(unp->unp_refs, ECONNRESET)) {
+			solock(so);
+			goto retry;
+		}
+	}
+	soisdisconnected(so);
+	so->so_pcb = NULL;
 	if (unp_rights) {
 		/*
 		 * Normally the receive buffer is flushed later,
@@ -604,27 +831,30 @@
 		 * of those descriptor references after the garbage collector
 		 * gets them (resulting in a "panic: closef: count < 0").
 		 */
-		sorflush(unp->unp_socket);
-		free(unp, M_PCB);
+		sorflush(so);
+		unp_free(unp);
+		sounlock(so);
 		unp_gc();
+		solock(so);
 	} else
-		free(unp, M_PCB);
+		unp_free(unp);
 }
 
 int
-unp_bind(struct unpcb *unp, struct mbuf *nam, struct lwp *l)
+unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l)
 {
 	struct sockaddr_un *sun;
+	struct unpcb *unp;
 	vnode_t *vp;
 	struct vattr vattr;
 	size_t addrlen;
-	struct proc *p;
 	int error;
 	struct nameidata nd;
+	proc_t *p;
 
-	if (unp->unp_vnode != 0)
+	unp = sotounpcb(so);
+	if (unp->unp_vnode != NULL)
 		return (EINVAL);
-
 	if ((unp->unp_flags & UNP_BUSY) != 0) {
 		/*
 		 * EALREADY may not be strictly accurate, but since this
@@ -633,13 +863,14 @@
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BUSY;
+	sounlock(so);
 
-	p = l->l_proc;
 	/*
 	 * Allocate the new sockaddr.  We have to allocate one
 	 * extra byte so that we can ensure that the pathname
 	 * is nul-terminated.
 	 */
+	p = l->l_proc;
 	addrlen = nam->m_len + 1;
 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
 	m_copydata(nam, 0, nam->m_len, (void *)sun);
@@ -669,13 +900,14 @@
 	if (error)
 		goto bad;
 	vp = nd.ni_vp;
+	solock(so);
 	vp->v_socket = unp->unp_socket;
 	unp->unp_vnode = vp;
 	unp->unp_addrlen = addrlen;
 	unp->unp_addr = sun;
 	unp->unp_connid.unp_pid = p->p_pid;
-	unp->unp_connid.unp_euid = kauth_cred_geteuid(p->p_cred);
-	unp->unp_connid.unp_egid = kauth_cred_getegid(p->p_cred);
+	unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
+	unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
 	unp->unp_flags |= UNP_EIDSBIND;
 	VOP_UNLOCK(vp, 0);
 	unp->unp_flags &= ~UNP_BUSY;
@@ -683,6 +915,7 @@
 
  bad:
 	free(sun, M_SONAME);
+	solock(so);
 	unp->unp_flags &= ~UNP_BUSY;
 	return (error);
 }
@@ -695,7 +928,6 @@
 	struct socket *so2, *so3;
 	struct unpcb *unp, *unp2, *unp3;
 	size_t addrlen;
-	struct proc *p;
 	int error;
 	struct nameidata nd;
 
@@ -708,8 +940,8 @@
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BUSY;
+	sounlock(so);
 
-	p = l->l_proc;
 	/*
 	 * Allocate a temporary sockaddr.  We have to allocate one extra
 	 * byte so that we can ensure that the pathname is nul-terminated.
@@ -733,19 +965,34 @@
 	}
 	if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
 		goto bad;
+	/* Acquire v_interlock to protect against unp_detach(). */
+	mutex_enter(&vp->v_interlock);
 	so2 = vp->v_socket;
-	if (so2 == 0) {
+	if (so2 == NULL) {
+		mutex_exit(&vp->v_interlock);
 		error = ECONNREFUSED;
 		goto bad;
 	}
 	if (so->so_type != so2->so_type) {
+		mutex_exit(&vp->v_interlock);
 		error = EPROTOTYPE;
 		goto bad;
 	}
-	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+	solock(so);
+	unp_resetlock(so);
+	mutex_exit(&vp->v_interlock);
+	if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
+		/*
+		 * This may seem somewhat fragile but is OK: if we can
+		 * see SO_ACCEPTCONN set on the endpoint, then it must
+		 * be locked by the domain-wide uipc_lock.
+		 */
+		KASSERT((so->so_options & SO_ACCEPTCONN) == 0 ||
+		    so2->so_lock == uipc_lock);
 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
 		    (so3 = sonewconn(so2, 0)) == 0) {
 			error = ECONNREFUSED;
+			sounlock(so);
 			goto bad;
 		}
 		unp2 = sotounpcb(so2);
@@ -758,21 +1005,23 @@
 			unp3->unp_addrlen = unp2->unp_addrlen;
 		}
 		unp3->unp_flags = unp2->unp_flags;
-		unp3->unp_connid.unp_pid = p->p_pid;
-		unp3->unp_connid.unp_euid = kauth_cred_geteuid(p->p_cred);
-		unp3->unp_connid.unp_egid = kauth_cred_getegid(p->p_cred);
+		unp3->unp_connid.unp_pid = l->l_proc->p_pid;
+		unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
+		unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
 		unp3->unp_flags |= UNP_EIDSVALID;
-		so2 = so3;
 		if (unp2->unp_flags & UNP_EIDSBIND) {
 			unp->unp_connid = unp2->unp_connid;
 			unp->unp_flags |= UNP_EIDSVALID;
 		}
+		so2 = so3;
 	}
 	error = unp_connect2(so, so2, PRU_CONNECT);
+	sounlock(so);
  bad:
 	vput(vp);
  bad2:
 	free(sun, M_SONAME);
+	solock(so);
 	unp->unp_flags &= ~UNP_BUSY;
 	return (error);
 }
@@ -785,6 +1034,20 @@
 
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
+
+	/*
+	 * All three sockets involved must be locked by same lock:
+	 *
+	 * local endpoint (so)
+	 * remote endpoint (so2)
+	 * queue head (so->so_head, only if PR_CONNREQUIRED)
+	 */
+	KASSERT(solocked2(so, so2));
+	if (so->so_head != NULL) {
+		KASSERT(so->so_lock == uipc_lock);
+		KASSERT(solocked2(so, so->so_head));
+	}
+
 	unp2 = sotounpcb(so2);
 	unp->unp_conn = unp2;
 	switch (so->so_type) {
@@ -803,6 +1066,15 @@
 		else
 			soisconnected(so);
 		soisconnected(so2);
+		/*
+		 * If the connection is fully established, break the
+		 * association with uipc_lock and give the connected
+		 * pair a seperate lock to share.  For CONNECT2, we
+		 * require that the locks already match (the sockets
+		 * are created that way).
+		 */
+		if (req == PRU_CONNECT)
+			unp_setpeerlocks(so, so2);
 		break;
 
 	default:
@@ -815,18 +1087,20 @@
 unp_disconnect(struct unpcb *unp)
 {
 	struct unpcb *unp2 = unp->unp_conn;
+	struct socket *so;
 
 	if (unp2 == 0)
 		return;
 	unp->unp_conn = 0;
-	switch (unp->unp_socket->so_type) {
-
+	so = unp->unp_socket;
+	switch (so->so_type) {
 	case SOCK_DGRAM:
 		if (unp2->unp_refs == unp)
 			unp2->unp_refs = unp->unp_nextref;
 		else {
 			unp2 = unp2->unp_refs;
 			for (;;) {
+				KASSERT(solocked2(so, unp2->unp_socket));
 				if (unp2 == 0)
 					panic("unp_disconnect");
 				if (unp2->unp_nextref == unp)
@@ -836,11 +1110,12 @@
 			unp2->unp_nextref = unp->unp_nextref;
 		}
 		unp->unp_nextref = 0;
-		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+		so->so_state &= ~SS_ISCONNECTED;
 		break;
 
 	case SOCK_STREAM:
-		soisdisconnected(unp->unp_socket);
+		KASSERT(solocked2(so, unp2->unp_socket));
+		soisdisconnected(so);
 		unp2->unp_conn = 0;
 		soisdisconnected(unp2->unp_socket);
 		break;
@@ -864,20 +1139,23 @@
 		socantrcvmore(so);
 }
 
-void
+bool
 unp_drop(struct unpcb *unp, int errno)
 {
 	struct socket *so = unp->unp_socket;
 
+	KASSERT(solocked(so));
+
 	so->so_error = errno;
 	unp_disconnect(unp);
 	if (so->so_head) {
-		so->so_pcb = 0;
+		so->so_pcb = NULL;
+		/* sofree() drops the socket lock */
 		sofree(so);
-		if (unp->unp_addr)
-			free(unp->unp_addr, M_SONAME);
-		free(unp, M_PCB);
+		unp_free(unp);
+		return true;
 	}
+	return false;
 }
 
 #ifdef notdef
@@ -971,7 +1249,8 @@
 
 	/*
 	 * Now that adding them has succeeded, update all of the
-	 * descriptor passing state.	 */
+	 * descriptor passing state.
+	 */
 	rp = (file_t **)CMSG_DATA(cm);
 	for (i = 0; i < nfds; i++) {
 		fp = *rp++;
@@ -1003,7 +1282,7 @@
 }
 
 int
-unp_internalize(struct mbuf **controlp, struct lwp *l)
+unp_internalize(struct mbuf **controlp)
 {
 	struct filedesc *fdescp = curlwp->l_fd;
 	struct mbuf *control = *controlp;
@@ -1013,8 +1292,6 @@
 	int i, fd, *fdp;
 	int nfds, error;
 
-	KASSERT(l == curlwp);
-
 	error = 0;
 	newcm = NULL;
 
@@ -1212,7 +1489,7 @@
 			atomic_or_uint(&fp->f_flag, FMARK);
 
 			if (fp->f_type != DTYPE_SOCKET ||
-			    (so = (struct socket *)fp->f_data) == NULL ||
+			    (so = fp->f_data) == NULL ||
 			    so->so_proto->pr_domain != &unixdomain ||
 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
 				mutex_exit(&fp->f_lock);
@@ -1239,6 +1516,13 @@
 #endif
 			mutex_exit(&fp->f_lock);
 
+			/*
+			 * XXX Locking a socket with filelist_lock held
+			 * is ugly.  filelist_lock can be taken by the
+			 * pagedaemon when reclaiming items from file_cache.
+			 * Socket activity could delay the pagedaemon.
+			 */
+			solock(so);
 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
 			/*
 			 * Mark descriptors referenced from sockets queued
@@ -1252,6 +1536,7 @@
 					unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
 				}
 			}
+			sounlock(so);
 		}
 	} while (unp_defer);
 
@@ -1318,8 +1603,12 @@
 
 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 		fp = *fpp;
-		if (fp->f_type == DTYPE_SOCKET)
+		if (fp->f_type == DTYPE_SOCKET) {
+			so = fp->f_data;
+			solock(so);
 			sorflush(fp->f_data);
+			sounlock(so);
+		}
 	}
 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 		closef(*fpp);
--- a/sys/miscfs/fifofs/fifo_vnops.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/miscfs/fifofs/fifo_vnops.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,37 @@
-/*	$NetBSD: fifo_vnops.c,v 1.63 2008/03/21 21:55:00 ad Exp $	*/
+/*	$NetBSD: fifo_vnops.c,v 1.64 2008/04/24 11:38:37 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1990, 1993, 1995
@@ -32,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fifo_vnops.c,v 1.63 2008/03/21 21:55:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fifo_vnops.c,v 1.64 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -47,11 +80,11 @@
 #include <sys/ioctl.h>
 #include <sys/file.h>
 #include <sys/errno.h>
-#include <sys/malloc.h>
+#include <sys/kmem.h>
 #include <sys/un.h>
 #include <sys/poll.h>
 #include <sys/event.h>
-#include <sys/atomic.h>
+#include <sys/condvar.h>
 
 #include <miscfs/fifofs/fifo.h>
 #include <miscfs/genfs/genfs.h>
@@ -63,8 +96,10 @@
 struct fifoinfo {
 	struct socket	*fi_readsock;
 	struct socket	*fi_writesock;
-	long		fi_readers;
-	long		fi_writers;
+	kcondvar_t	fi_rcv;
+	int		fi_readers;
+	kcondvar_t	fi_wcv;
+	int		fi_writers;
 };
 
 int (**fifo_vnodeop_p)(void *);
@@ -153,60 +188,69 @@
 	vp = ap->a_vp;
 	p = l->l_proc;
 
-	KERNEL_LOCK(1, NULL);
 	if ((fip = vp->v_fifoinfo) == NULL) {
-		MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+		fip = kmem_alloc(sizeof(*fip), KM_SLEEP);
 		vp->v_fifoinfo = fip;
-		error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l);
+		error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL);
 		if (error != 0) {
-			free(fip, M_VNODE);
+			kmem_free(fip, sizeof(*fip));
 			vp->v_fifoinfo = NULL;
-			goto done;
+			return (error);
 		}
 		fip->fi_readsock = rso;
-		error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l);
+		error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso);
 		if (error != 0) {
 			(void)soclose(rso);
-			free(fip, M_VNODE);
+			kmem_free(fip, sizeof(*fip));
 			vp->v_fifoinfo = NULL;
-			goto done;
+			return (error);
 		}
 		fip->fi_writesock = wso;
+		solock(wso);
 		if ((error = unp_connect2(wso, rso, PRU_CONNECT2)) != 0) {
+			sounlock(wso);
 			(void)soclose(wso);
 			(void)soclose(rso);
-			free(fip, M_VNODE);
+			kmem_free(fip, sizeof(*fip));
 			vp->v_fifoinfo = NULL;
-			goto done;
+			return (error);
 		}
-		fip->fi_readers = fip->fi_writers = 0;
+		fip->fi_readers = 0;
+		fip->fi_writers = 0;
 		wso->so_state |= SS_CANTRCVMORE;
 		rso->so_state |= SS_CANTSENDMORE;
+		cv_init(&fip->fi_rcv, "fiford");
+		cv_init(&fip->fi_wcv, "fifowr");
+	} else {
+		wso = fip->fi_writesock;
+		rso = fip->fi_readsock;
+		solock(wso);
 	}
+
 	if (ap->a_mode & FREAD) {
 		if (fip->fi_readers++ == 0) {
-			fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
-			if (fip->fi_writers > 0)
-				wakeup(&fip->fi_writers);
+			wso->so_state &= ~SS_CANTSENDMORE;
+			cv_broadcast(&fip->fi_wcv);
 		}
 	}
 	if (ap->a_mode & FWRITE) {
 		if (fip->fi_writers++ == 0) {
-			fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
-			if (fip->fi_readers > 0)
-				wakeup(&fip->fi_readers);
+			rso->so_state &= ~SS_CANTRCVMORE;
+			cv_broadcast(&fip->fi_rcv);
 		}
 	}
 	if (ap->a_mode & FREAD) {
 		if (ap->a_mode & O_NONBLOCK) {
 		} else {
-			while (!soreadable(fip->fi_readsock) && fip->fi_writers == 0) {
+			while (!soreadable(rso) && fip->fi_writers == 0) {
 				VOP_UNLOCK(vp, 0);
-				error = tsleep(&fip->fi_readers,
-				    PCATCH | PSOCK, "fifor", 0);
+				error = cv_wait_sig(&fip->fi_rcv,
+				    wso->so_lock);
+				sounlock(wso);
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 				if (error)
 					goto bad;
+				solock(wso);
 			}
 		}
 	}
@@ -214,25 +258,26 @@
 		if (ap->a_mode & O_NONBLOCK) {
 			if (fip->fi_readers == 0) {
 				error = ENXIO;
+				sounlock(wso);
 				goto bad;
 			}
 		} else {
 			while (fip->fi_readers == 0) {
 				VOP_UNLOCK(vp, 0);
-				error = tsleep(&fip->fi_writers,
-				    PCATCH | PSOCK, "fifow", 0);
+				error = cv_wait_sig(&fip->fi_wcv,
+				    wso->so_lock);
+				sounlock(wso);
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 				if (error)
 					goto bad;
+				solock(wso);
 			}
 		}
 	}
-	KERNEL_UNLOCK_ONE(NULL);
+	sounlock(wso);
 	return (0);
  bad:
 	VOP_CLOSE(vp, ap->a_mode, ap->a_cred);
- done:
-	KERNEL_UNLOCK_ONE(NULL);
 	return (error);
 }
 
@@ -262,14 +307,13 @@
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
-	KERNEL_LOCK(1, NULL);
-	if (ap->a_ioflag & IO_NDELAY)
-		rso->so_nbio = 1;
 	startresid = uio->uio_resid;
 	VOP_UNLOCK(ap->a_vp, 0);
-	error = (*rso->so_receive)(rso, (struct mbuf **)0, uio,
-	    (struct mbuf **)0, (struct mbuf **)0, (int *)0);
-	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
+	if (ap->a_ioflag & IO_NDELAY) {
+		/* XXX Bogus, affects other threads. */
+		rso->so_nbio = 1;
+	}
+	error = (*rso->so_receive)(rso, NULL, uio, NULL, NULL, NULL);
 	/*
 	 * Clear EOF indication after first such return.
 	 */
@@ -281,7 +325,7 @@
 		    ap->a_vp->v_fifoinfo->fi_writers == 0)
 			error = 0;
 	}
-	KERNEL_UNLOCK_ONE(NULL);
+	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 	return (error);
 }
 
@@ -306,16 +350,15 @@
 	if (ap->a_uio->uio_rw != UIO_WRITE)
 		panic("fifo_write mode");
 #endif
-	KERNEL_LOCK(1, NULL);
-	if (ap->a_ioflag & IO_NDELAY)
+	VOP_UNLOCK(ap->a_vp, 0);
+	if (ap->a_ioflag & IO_NDELAY) {
+		/* XXX Bogus, affects other threads. */
 		wso->so_nbio = 1;
-	VOP_UNLOCK(ap->a_vp, 0);
-	error = (*wso->so_send)(wso, (struct mbuf *)0, ap->a_uio, 0,
-	    (struct mbuf *)0, 0, curlwp /*XXX*/);
-	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
+	}
+	error = (*wso->so_send)(wso, NULL, ap->a_uio, 0, NULL, 0, curlwp);
 	if (ap->a_ioflag & IO_NDELAY)
 		wso->so_nbio = 0;
-	KERNEL_UNLOCK_ONE(NULL);
+	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 	return (error);
 }
 
@@ -363,19 +406,19 @@
 		int		a_events;
 		struct lwp	*a_l;
 	} */ *ap = v;
-	struct file	filetmp;
+	struct socket	*so;
 	int		revents;
 
 	revents = 0;
 	if (ap->a_events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
-		filetmp.f_data = ap->a_vp->v_fifoinfo->fi_readsock;
-		if (filetmp.f_data)
-			revents |= soo_poll(&filetmp, ap->a_events);
+		so = ap->a_vp->v_fifoinfo->fi_readsock;
+		if (so)
+			revents |= sopoll(so, ap->a_events);
 	}
 	if (ap->a_events & (POLLOUT | POLLWRNORM | POLLWRBAND)) {
-		filetmp.f_data = ap->a_vp->v_fifoinfo->fi_writesock;
-		if (filetmp.f_data)
-			revents |= soo_poll(&filetmp, ap->a_events);
+		so = ap->a_vp->v_fifoinfo->fi_writesock;
+		if (so)
+			revents |= sopoll(so, ap->a_events);
 	}
 
 	return (revents);
@@ -431,35 +474,40 @@
 	} */ *ap = v;
 	struct vnode	*vp;
 	struct fifoinfo	*fip;
+	struct socket *wso, *rso;
 	int isrevoke;
 
 	vp = ap->a_vp;
 	fip = vp->v_fifoinfo;
 	isrevoke = (ap->a_fflag & (FREAD | FWRITE | FNONBLOCK)) == FNONBLOCK;
-	KERNEL_LOCK(1, NULL);
+	wso = fip->fi_writesock;
+	rso = fip->fi_readsock;
+	solock(wso);
 	if (isrevoke) {
 		if (fip->fi_readers != 0) {
 			fip->fi_readers = 0;
-			socantsendmore(fip->fi_writesock);
+			socantsendmore(wso);
 		}
 		if (fip->fi_writers != 0) {
 			fip->fi_writers = 0;
-			socantrcvmore(fip->fi_readsock);
+			socantrcvmore(rso);
 		}
 	} else {
 		if ((ap->a_fflag & FREAD) && --fip->fi_readers == 0)
-			socantsendmore(fip->fi_writesock);
+			socantsendmore(wso);
 		if ((ap->a_fflag & FWRITE) && --fip->fi_writers == 0)
-			socantrcvmore(fip->fi_readsock);
+			socantrcvmore(rso);
 	}
-	/* Shut down if all readers and writers are gone. */
 	if ((fip->fi_readers + fip->fi_writers) == 0) {
-		(void) soclose(fip->fi_readsock);
-		(void) soclose(fip->fi_writesock);
-		FREE(fip, M_VNODE);
+		sounlock(wso);
+		(void) soclose(rso);
+		(void) soclose(wso);
+		cv_destroy(&fip->fi_rcv);
+		cv_destroy(&fip->fi_wcv);
+		kmem_free(fip, sizeof(*fip));
 		vp->v_fifoinfo = NULL;
-	}
-	KERNEL_UNLOCK_ONE(NULL);
+	} else
+		sounlock(wso);
 	return (0);
 }
 
@@ -488,7 +536,7 @@
 	struct fifoinfo	*fip;
 
 	fip = vp->v_fifoinfo;
-	printf(", fifo with %ld readers and %ld writers",
+	printf(", fifo with %d readers and %d writers",
 	    fip->fi_readers, fip->fi_writers);
 }
 
@@ -529,24 +577,32 @@
 	struct socket *so;
 
 	so = (struct socket *)kn->kn_hook;
+	solock(so);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
+	sounlock(so);
 }
 
 static int
 filt_fiforead(struct knote *kn, long hint)
 {
 	struct socket *so;
+	int rv;
 
 	so = (struct socket *)kn->kn_hook;
+	if (hint != NOTE_SUBMIT)
+		solock(so);
 	kn->kn_data = so->so_rcv.sb_cc;
 	if (so->so_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	}
 	kn->kn_flags &= ~EV_EOF;
-	return (kn->kn_data > 0);
+	rv = (kn->kn_data > 0);
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
+	return rv;
 }
 
 static void
@@ -555,24 +611,32 @@
 	struct socket *so;
 
 	so = (struct socket *)kn->kn_hook;
+	solock(so);
 	SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
+	sounlock(so);
 }
 
 static int
 filt_fifowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
+	int rv;
 
 	so = (struct socket *)kn->kn_hook;
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	}
 	kn->kn_flags &= ~EV_EOF;
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+	rv = (kn->kn_data >= so->so_snd.sb_lowat);
+	if (hint != NOTE_SUBMIT)
+		sounlock(so);
+	return rv;
 }
 
 static const struct filterops fiforead_filtops =
@@ -607,7 +671,10 @@
 
 	ap->a_kn->kn_hook = so;
 
+	solock(so);
 	SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, ap->a_kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
+	sounlock(so);
+
 	return (0);
 }
--- a/sys/miscfs/portal/portal_vnops.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/miscfs/portal/portal_vnops.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: portal_vnops.c,v 1.78 2008/03/21 21:55:00 ad Exp $	*/
+/*	$NetBSD: portal_vnops.c,v 1.79 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: portal_vnops.c,v 1.78 2008/03/21 21:55:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: portal_vnops.c,v 1.79 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -254,8 +254,7 @@
 	struct unpcb *unp2;
 	struct unpcb *unp3;
 
-	if (so2 == 0)
-		return (ECONNREFUSED);
+	KASSERT(solocked2(so, so2));
 
 	if (so->so_type != so2->so_type)
 		return (EPROTOTYPE);
@@ -289,11 +288,10 @@
 		int  a_mode;
 		kauth_cred_t a_cred;
 	} */ *ap = v;
-	struct socket *so = 0;
+	struct socket *so = 0, *so2;
 	struct portalnode *pt;
 	struct lwp *l = curlwp;
 	struct vnode *vp = ap->a_vp;
-	int s;
 	struct uio auio;
 	struct iovec aiov[2];
 	int res;
@@ -324,11 +322,17 @@
 
 	pt = VTOPORTAL(vp);
 	fmp = VFSTOPORTAL(vp->v_mount);
+	so2 = fmp->pm_server->f_data;
+
+	if (so2 == NULL) {
+		/* XXX very fishy */
+		return ECONNREFUSED;
+	}
 
 	/*
 	 * Create a new socket.
 	 */
-	error = socreate(AF_LOCAL, &so, SOCK_STREAM, 0, l);
+	error = socreate(AF_LOCAL, &so, SOCK_STREAM, 0, l, so2);
 	if (error)
 		goto bad;
 
@@ -336,16 +340,21 @@
 	 * Reserve some buffer space
 	 */
 	res = pt->pt_size + sizeof(pcred) + 512;	/* XXX */
+	solock(so);
 	error = soreserve(so, res, res);
-	if (error)
+	if (error) {
+		sounlock(so);
 		goto bad;
+	}
 
 	/*
 	 * Kick off connection
 	 */
-	error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
-	if (error)
+	error = portal_connect(so, so2);
+	if (error) {
+		sounlock(so);
 		goto bad;
+	}
 
 	/*
 	 * Wait for connection to complete
@@ -360,19 +369,17 @@
 	 * will happen if the server dies.  Sleep for 5 second intervals
 	 * and keep polling the reference count.   XXX.
 	 */
-	s = splsoftnet();
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		if (fmp->pm_server->f_count == 1) {
 			error = ECONNREFUSED;
-			splx(s);
+			sounlock(so);
 			goto bad;
 		}
-		(void) tsleep(&so->so_timeo, PSOCK, "portalcon", 5 * hz);
+		sowait(so, 5 * hz);
 	}
-	splx(s);
-
 	if (so->so_error) {
 		error = so->so_error;
+		sounlock(so);
 		goto bad;
 	}
 
@@ -383,7 +390,7 @@
 	so->so_snd.sb_timeo = 0;
 	so->so_rcv.sb_flags |= SB_NOINTR;
 	so->so_snd.sb_flags |= SB_NOINTR;
-
+	sounlock(so);
 
 	pcred.pcr_flag = ap->a_mode;
 	pcred.pcr_uid = kauth_cred_geteuid(ap->a_cred);
@@ -402,8 +409,7 @@
 	auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
 	UIO_SETUP_SYSSPACE(&auio);
 
-	error = (*so->so_send)(so, (struct mbuf *) 0, &auio,
-			(struct mbuf *) 0, (struct mbuf *) 0, 0, l);
+	error = (*so->so_send)(so, NULL, &auio, NULL, NULL, 0, l);
 	if (error)
 		goto bad;
 
@@ -508,7 +514,9 @@
 	}
 
 	if (so) {
+		solock(so);
 		soshutdown(so, 2);
+		sounlock(so);
 		soclose(so);
 	}
 	return (error);
--- a/sys/net/if.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: if.c,v 1.218 2008/02/29 21:23:55 dyoung Exp $	*/
+/*	$NetBSD: if.c,v 1.219 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
- * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
+ * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.218 2008/02/29 21:23:55 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.219 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 
@@ -1637,13 +1637,15 @@
 			break;
 		if (so->so_proto == NULL)
 			return EOPNOTSUPP;
+		solock(so);
 #ifdef COMPAT_OSOCK
 		error = compat_ifioctl(so, ocmd, cmd, data, l);
 #else
-		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+		error = (*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
 		    (struct mbuf *)cmd, (struct mbuf *)data,
 		    (struct mbuf *)ifp, l));
 #endif
+		sounlock(so);
 		break;
 	}
 
--- a/sys/net/if_etherip.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_etherip.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_etherip.c,v 1.18 2008/04/05 13:53:07 cegger Exp $        */
+/*      $NetBSD: if_etherip.c,v 1.19 2008/04/24 11:38:37 ad Exp $        */
 
 /*
  *  Copyright (c) 2006, Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
@@ -29,7 +29,7 @@
  *  SUCH DAMAGE.
  *
  *
- *  Copyright (c) 2003, 2004 The NetBSD Foundation.
+ *  Copyright (c) 2003, 2004, 2008 The NetBSD Foundation.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -86,7 +86,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_etherip.c,v 1.18 2008/04/05 13:53:07 cegger Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_etherip.c,v 1.19 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 #include "bpfilter.h"
@@ -103,6 +103,7 @@
 #include <sys/queue.h>
 #include <sys/kauth.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/intr.h>
 
 #include <net/if.h>
@@ -361,12 +362,13 @@
 	struct mbuf *m;
 	int s, error;
 
+	mutex_enter(softnet_lock);
 	for (;;) {
 		s = splnet();
 		IFQ_DEQUEUE(&ifp->if_snd, m);
 		splx(s);
 		if (m == NULL)
-			return;
+			break;
 		
 #if NBPFILTER > 0
 		if (ifp->if_bpf)
@@ -393,6 +395,7 @@
 			ifp->if_flags &= ~IFF_OACTIVE;
 		} else  m_freem(m);
 	}
+	mutex_exit(softnet_lock);
 }
 
 static int
--- a/sys/net/if_gre.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_gre.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_gre.c,v 1.129 2008/04/03 21:40:59 dyoung Exp $ */
+/*	$NetBSD: if_gre.c,v 1.130 2008/04/24 11:38:37 ad Exp $ */
 
 /*
  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
@@ -47,7 +47,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.129 2008/04/03 21:40:59 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.130 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_gre.h"
 #include "opt_inet.h"
@@ -544,24 +544,23 @@
 	struct mbuf	**mp;
 	struct proc	*p;
 	long		space, resid;
-	int		error, s;
+	int		error;
 
 	p = l->l_proc;
 
 	resid = top->m_pkthdr.len;
 	if (p)
 		l->l_ru.ru_msgsnd++;
-#define	snderr(errno)	{ error = errno; splx(s); goto release; }
+#define	snderr(errno)	{ error = errno; goto release; }
 
+	solock(so);
 	if ((error = sblock(&so->so_snd, M_NOWAIT)) != 0)
 		goto out;
-	s = splsoftnet();
 	if (so->so_state & SS_CANTSENDMORE)
 		snderr(EPIPE);
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
-		splx(s);
 		goto release;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
@@ -576,27 +575,19 @@
 		snderr(EMSGSIZE);
 	if (space < resid)
 		snderr(EWOULDBLOCK);
-	splx(s);
 	mp = &top;
 	/*
 	 * Data is prepackaged in "top".
 	 */
-	s = splsoftnet();
-
 	if (so->so_state & SS_CANTSENDMORE)
 		snderr(EPIPE);
-
 	error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, top, NULL, NULL, l);
-	splx(s);
-
 	top = NULL;
 	mp = &top;
-	if (error != 0)
-		goto release;
-
  release:
 	sbunlock(&so->so_snd);
  out:
+ 	sounlock(so);
 	if (top != NULL)
 		m_freem(top);
 	return error;
@@ -610,7 +601,7 @@
 gre_soreceive(struct socket *so, struct mbuf **mp0)
 {
 	struct mbuf *m, **mp;
-	int flags, len, error, s, type;
+	int flags, len, error, type;
 	const struct protosw	*pr;
 	struct mbuf *nextrecord;
 
@@ -625,14 +616,14 @@
 
 	KASSERT(pr->pr_flags & PR_ATOMIC);
 
+	solock(so);
 	if (so->so_state & SS_ISCONFIRMING)
 		(*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, curlwp);
-
  restart:
-	if ((error = sblock(&so->so_rcv, M_NOWAIT)) != 0)
+	if ((error = sblock(&so->so_rcv, M_NOWAIT)) != 0) {
+		sounlock(so);
 		return error;
-	s = splsoftnet();
-
+	}
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, do not block awaiting more.
@@ -796,13 +787,12 @@
 	if (*mp0 == NULL && (flags & MSG_EOR) == 0 &&
 	    (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
-		splx(s);
 		goto restart;
 	}
 
  release:
 	sbunlock(&so->so_rcv);
-	splx(s);
+	sounlock(so);
 	return error;
 }
 
@@ -1057,6 +1047,7 @@
 
 	ss = mtod(m, struct sockaddr_storage *);
 
+	solock(so);
 	if ((rc = gre_getsockname(so, m, l)) != 0)
 		goto out;
 	*src = *ss;
@@ -1064,8 +1055,8 @@
 	if ((rc = gre_getpeername(so, m, l)) != 0)
 		goto out;
 	*dst = *ss;
-
 out:
+	sounlock(so);
 	m_freem(m);
 	return rc;
 }
--- a/sys/net/if_ppp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_ppp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_ppp.c,v 1.121 2008/02/07 01:22:01 dyoung Exp $	*/
+/*	$NetBSD: if_ppp.c,v 1.122 2008/04/24 11:38:37 ad Exp $	*/
 /*	Id: if_ppp.c,v 1.6 1997/03/04 03:33:00 paulus Exp 	*/
 
 /*
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_ppp.c,v 1.121 2008/02/07 01:22:01 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_ppp.c,v 1.122 2008/04/24 11:38:37 ad Exp $");
 
 #include "ppp.h"
 
@@ -128,6 +128,7 @@
 #include <sys/kauth.h>
 #include <sys/intr.h>
 #include <sys/simplelock.h>
+#include <sys/socketvar.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -1268,6 +1269,7 @@
 	struct mbuf *m;
 	int s;
 
+	mutex_enter(softnet_lock);
 	if (!(sc->sc_flags & SC_TBUSY)
 	    && (IFQ_IS_EMPTY(&sc->sc_if.if_snd) == 0 || sc->sc_fastq.ifq_head
 		|| sc->sc_outm)) {
@@ -1284,6 +1286,7 @@
 			break;
 		ppp_inproc(sc, m);
 	}
+	mutex_exit(softnet_lock);
 }
 
 #ifdef PPP_COMPRESS
--- a/sys/net/if_pppoe.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_pppoe.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: if_pppoe.c,v 1.84 2008/02/20 17:05:53 matt Exp $ */
+/* $NetBSD: if_pppoe.c,v 1.85 2008/04/24 11:38:37 ad Exp $ */
 
 /*-
  * Copyright (c) 2002 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_pppoe.c,v 1.84 2008/02/20 17:05:53 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_pppoe.c,v 1.85 2008/04/24 11:38:37 ad Exp $");
 
 #include "pppoe.h"
 #include "bpfilter.h"
@@ -55,6 +55,7 @@
 #include <sys/ioctl.h>
 #include <sys/kauth.h>
 #include <sys/intr.h>
+#include <sys/socketvar.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -363,7 +364,9 @@
 pppoe_softintr_handler(void *dummy)
 {
 	/* called at splsoftnet() */
+	mutex_enter(softnet_lock);
 	pppoe_input();
+	mutex_exit(softnet_lock);
 }
 
 /* called at appropriate protection level */
--- a/sys/net/if_sl.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_sl.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_sl.c,v 1.110 2008/02/07 01:22:01 dyoung Exp $	*/
+/*	$NetBSD: if_sl.c,v 1.111 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1987, 1989, 1992, 1993
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_sl.c,v 1.110 2008/02/07 01:22:01 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_sl.c,v 1.111 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 #include "bpfilter.h"
@@ -77,6 +77,7 @@
 #include <sys/conf.h>
 #include <sys/tty.h>
 #include <sys/kernel.h>
+#include <sys/socketvar.h>
 #if __NetBSD__
 #include <sys/systm.h>
 #include <sys/kauth.h>
@@ -697,6 +698,7 @@
 	/*
 	 * Output processing loop.
 	 */
+	mutex_enter(softnet_lock);
 	for (;;) {
 #ifdef INET
 		struct ip *ip;
@@ -965,6 +967,7 @@
 		splx(s);
 #endif
 	}
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/net/if_strip.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/if_strip.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_strip.c,v 1.85 2008/02/20 17:05:53 matt Exp $	*/
+/*	$NetBSD: if_strip.c,v 1.86 2008/04/24 11:38:37 ad Exp $	*/
 /*	from: NetBSD: if_sl.c,v 1.38 1996/02/13 22:00:23 christos Exp $	*/
 
 /*
@@ -87,7 +87,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_strip.c,v 1.85 2008/02/20 17:05:53 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_strip.c,v 1.86 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 #include "bpfilter.h"
@@ -111,6 +111,7 @@
 #include <sys/syslog.h>
 #include <sys/cpu.h>
 #include <sys/intr.h>
+#include <sys/socketvar.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -1068,6 +1069,7 @@
 	/*
 	 * Output processing loop.
 	 */
+	mutex_enter(softnet_lock);
 	for (;;) {
 #ifdef INET
 		struct ip *ip;
@@ -1259,6 +1261,7 @@
 		splx(s);
 #endif
 	}
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/net/raw_cb.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/raw_cb.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: raw_cb.c,v 1.18 2007/03/04 06:03:18 christos Exp $	*/
+/*	$NetBSD: raw_cb.c,v 1.19 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1980, 1986, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: raw_cb.c,v 1.18 2007/03/04 06:03:18 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: raw_cb.c,v 1.19 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -98,7 +98,10 @@
 	struct socket *so = rp->rcb_socket;
 
 	so->so_pcb = 0;
+	KASSERT(so->so_lock == softnet_lock);	/* XXX */
+	/* sofree drops the socket's lock. */
 	sofree(so);
+	mutex_enter(softnet_lock);
 	LIST_REMOVE(rp, rcb_list);
 #ifdef notdef
 	if (rp->rcb_laddr)
--- a/sys/net/raw_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/raw_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: raw_usrreq.c,v 1.33 2007/05/06 06:21:26 dyoung Exp $	*/
+/*	$NetBSD: raw_usrreq.c,v 1.34 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1980, 1986, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: raw_usrreq.c,v 1.33 2007/05/06 06:21:26 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: raw_usrreq.c,v 1.34 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -51,6 +51,7 @@
 #include <net/raw_cb.h>
 
 #include <machine/stdarg.h>
+
 /*
  * Initialize raw connection block q.
  */
@@ -86,6 +87,8 @@
 	struct sockproto *proto;
 	struct sockaddr *src, *dst;
 
+	KASSERT(mutex_owned(softnet_lock));
+
 	va_start(ap, m0);
 	proto = va_arg(ap, struct sockproto *);
 	src = va_arg(ap, struct sockaddr *);
@@ -173,6 +176,7 @@
 		return (EOPNOTSUPP);
 
 	s = splsoftnet();
+	KERNEL_LOCK(1, NULL);
 	rp = sotorawcb(so);
 #ifdef DIAGNOSTIC
 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
@@ -191,6 +195,7 @@
 	 * the appropriate raw interface routine.
 	 */
 	case PRU_ATTACH:
+		sosetlock(so);
 		if (l == NULL)
 			break;
 
@@ -310,6 +315,7 @@
 	}
 
 release:
+	KERNEL_UNLOCK_ONE(NULL);
 	splx(s);
 	return (error);
 }
--- a/sys/net/rtsock.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/net/rtsock.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: rtsock.c,v 1.100 2008/03/29 13:00:43 yamt Exp $	*/
+/*	$NetBSD: rtsock.c,v 1.101 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.100 2008/03/29 13:00:43 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.101 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 
@@ -160,6 +160,7 @@
 	int s;
 
 	if (req == PRU_ATTACH) {
+		sosetlock(so);
 		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
 		if ((so->so_pcb = rp) != NULL)
 			memset(so->so_pcb, 0, sizeof(*rp));
@@ -1173,6 +1174,8 @@
 	struct mbuf *m;
 	int s;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	while (!IF_IS_EMPTY(&route_intrq)) {
 		s = splnet();
 		IF_DEQUEUE(&route_intrq, m);
@@ -1182,6 +1185,8 @@
 		proto.sp_protocol = M_GETCTX(m, uintptr_t);
 		raw_input(m, &proto, &route_src, &route_dst);
 	}
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -1211,12 +1216,15 @@
 {
 
 	route_intrq.ifq_maxlen = route_maxqlen;
-	route_sih = softint_establish(SOFTINT_NET, route_intr, NULL);
+	route_sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
+	    route_intr, NULL);
 }
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
  */
+PR_WRAP_USRREQ(route_usrreq)
+#define	route_usrreq	route_usrreq_wrapper
 
 const struct protosw routesw[] = {
 	{
--- a/sys/netatalk/aarp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netatalk/aarp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: aarp.c,v 1.26 2007/12/04 10:22:34 dyoung Exp $	*/
+/*	$NetBSD: aarp.c,v 1.27 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1990,1991 Regents of The University of Michigan.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: aarp.c,v 1.26 2007/12/04 10:22:34 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: aarp.c,v 1.27 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_mbuftrace.h"
 
@@ -40,6 +40,7 @@
 #include <sys/mbuf.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
+#include <sys/socketvar.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_ether.h>
@@ -106,6 +107,7 @@
 	struct aarptab *aat;
 	int             i, s;
 
+	mutex_enter(softnet_lock);
 	callout_reset(&aarptimer_callout, AARPT_AGE * hz, aarptimer, NULL);
 	aat = aarptab;
 	for (i = 0; i < AARPTAB_SIZE; i++, aat++) {
@@ -119,6 +121,7 @@
 		aarptfree(aat);
 		splx(s);
 	}
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -545,6 +548,8 @@
 	struct sockaddr sa;
 	struct ifnet   *ifp = arp;
 
+	mutex_enter(softnet_lock);
+
 	/*
          * We need to check whether the output ethernet type should
          * be phase 1 or 2. We have the interface that we'll be sending
@@ -560,18 +565,22 @@
 	}
 	if (ia == NULL) {	/* serious error XXX */
 		printf("aarpprobe why did this happen?!\n");
+		mutex_exit(softnet_lock);
 		return;
 	}
 	if (aa->aa_probcnt <= 0) {
 		aa->aa_flags &= ~AFA_PROBING;
 		wakeup(aa);
+		mutex_exit(softnet_lock);
 		return;
 	} else {
 		callout_reset(&aa->aa_probe_ch, hz / 5, aarpprobe, arp);
 	}
 
-	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) {
+		mutex_exit(softnet_lock);
 		return;
+	}
 
 	MCLAIM(m, &aarp_mowner);
 	m->m_len = sizeof(*ea);
@@ -595,8 +604,10 @@
 		    sizeof(eh->ether_dhost));
 		eh->ether_type = 0;	/* if_output will treat as 802 */
 		M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
-		if (!m)
+		if (!m) {
+			mutex_exit(softnet_lock);
 			return;
+		}
 
 		llc = mtod(m, struct llc *);
 		llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP;
@@ -627,6 +638,7 @@
 	sa.sa_family = AF_UNSPEC;
 	(*ifp->if_output) (ifp, m, &sa, NULL);	/* XXX */
 	aa->aa_probcnt--;
+	mutex_exit(softnet_lock);
 }
 
 void
--- a/sys/netatalk/at_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netatalk/at_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: at_proto.c,v 1.15 2007/08/30 02:17:36 dyoung Exp $	*/
+/*	$NetBSD: at_proto.c,v 1.16 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1990,1991 Regents of The University of Michigan.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: at_proto.c,v 1.15 2007/08/30 02:17:36 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: at_proto.c,v 1.16 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -50,6 +50,9 @@
 
 DOMAIN_DEFINE(atalkdomain);	/* forward declare and add to link set */
 
+PR_WRAP_USRREQ(ddp_usrreq)
+#define	ddp_usrreq	ddp_usrreq_wrapper
+
 const struct protosw atalksw[] = {
     {
 	.pr_type = SOCK_DGRAM,
--- a/sys/netatalk/ddp_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netatalk/ddp_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ddp_input.c,v 1.18 2008/04/23 15:17:42 thorpej Exp $	 */
+/*	$NetBSD: ddp_input.c,v 1.19 2008/04/24 11:38:37 ad Exp $	 */
 
 /*
  * Copyright (c) 1990,1994 Regents of The University of Michigan.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ddp_input.c,v 1.18 2008/04/23 15:17:42 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ddp_input.c,v 1.19 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -67,6 +67,7 @@
 	struct at_ifaddr *aa;
 	int             s;
 
+	mutex_enter(softnet_lock);
 	for (;;) {
 		s = splnet();
 
@@ -127,6 +128,7 @@
 			ddp_input(m, ifp, &elh, 1);
 		}
 	}
+	mutex_exit(softnet_lock);
 }
 
 struct route    forwro;
--- a/sys/netatalk/ddp_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netatalk/ddp_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ddp_usrreq.c,v 1.31 2008/04/23 15:17:42 thorpej Exp $	 */
+/*	$NetBSD: ddp_usrreq.c,v 1.32 2008/04/24 11:38:37 ad Exp $	 */
 
 /*
  * Copyright (c) 1990,1991 Regents of The University of Michigan.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ddp_usrreq.c,v 1.31 2008/04/23 15:17:42 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ddp_usrreq.c,v 1.32 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_mbuftrace.h"
 
@@ -96,7 +96,9 @@
 		    (struct ifnet *) rights, l));
 	}
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		at_purgeif((struct ifnet *) rights);
+		mutex_exit(softnet_lock);
 		return (0);
 	}
 	if (rights && rights->m_len) {
@@ -113,6 +115,7 @@
 			error = EINVAL;
 			break;
 		}
+		sosetlock(so);
 		if ((error = at_pcballoc(so)) != 0) {
 			break;
 		}
@@ -471,7 +474,9 @@
 {
 	soisdisconnected(so);
 	so->so_pcb = 0;
+	/* sofree drops the lock */
 	sofree(so);
+	mutex_enter(softnet_lock);
 
 	/* remove ddp from ddp_ports list */
 	if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT &&
--- a/sys/netbt/bluetooth.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/bluetooth.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: bluetooth.h,v 1.6 2007/09/17 01:23:17 rillig Exp $	*/
+/*	$NetBSD: bluetooth.h,v 1.7 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -144,6 +144,8 @@
 # define UNKNOWN(x) ((void)0)
 #endif	/* BLUETOOTH_DEBUG */
 
+extern kmutex_t *bt_lock;
+
 #endif	/* _KERNEL */
 
 #endif	/* _NETBT_BLUETOOTH_H_ */
--- a/sys/netbt/bt_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/bt_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: bt_proto.c,v 1.9 2007/11/20 20:18:00 plunky Exp $	*/
+/*	$NetBSD: bt_proto.c,v 1.10 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bt_proto.c,v 1.9 2007/11/20 20:18:00 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: bt_proto.c,v 1.10 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/domain.h>
@@ -50,6 +50,28 @@
 
 DOMAIN_DEFINE(btdomain);	/* forward declare and add to link set */
 
+static void	bt_init(void);
+
+PR_WRAP_CTLOUTPUT(hci_ctloutput)
+PR_WRAP_CTLOUTPUT(sco_ctloutput)
+PR_WRAP_CTLOUTPUT(l2cap_ctloutput)
+PR_WRAP_CTLOUTPUT(rfcomm_ctloutput)
+
+#define	hci_ctloutput		hci_ctloutput_wrapper
+#define	sco_ctloutput		sco_ctloutput_wrapper
+#define	l2cap_ctloutput		l2cap_ctloutput_wrapper
+#define	rfcomm_ctloutput	rfcomm_ctloutput_wrapper
+
+PR_WRAP_USRREQ(hci_usrreq)
+PR_WRAP_USRREQ(sco_usrreq)
+PR_WRAP_USRREQ(l2cap_usrreq)
+PR_WRAP_USRREQ(rfcomm_usrreq)
+
+#define	hci_usrreq		hci_usrreq_wrapper
+#define	sco_usrreq		sco_usrreq_wrapper
+#define	l2cap_usrreq		l2cap_usrreq_wrapper
+#define	rfcomm_usrreq		rfcomm_usrreq_wrapper
+
 const struct protosw btsw[] = {
 	{ /* raw HCI commands */
 		.pr_type = SOCK_RAW,
@@ -88,6 +110,16 @@
 struct domain btdomain = {
 	.dom_family = AF_BLUETOOTH,
 	.dom_name = "bluetooth",
+	.dom_init = bt_init,
 	.dom_protosw = btsw,
 	.dom_protoswNPROTOSW = &btsw[__arraycount(btsw)],
 };
+
+kmutex_t *bt_lock;
+
+static void
+bt_init(void)
+{
+
+	bt_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+}
--- a/sys/netbt/hci.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/hci.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: hci.h,v 1.25 2008/03/17 09:16:17 plunky Exp $	*/
+/*	$NetBSD: hci.h,v 1.26 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -54,7 +54,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $Id: hci.h,v 1.25 2008/03/17 09:16:17 plunky Exp $
+ * $Id: hci.h,v 1.26 2008/04/24 11:38:37 ad Exp $
  * $FreeBSD: src/sys/netgraph/bluetooth/include/ng_hci.h,v 1.6 2005/01/07 01:45:43 imp Exp $
  */
 
@@ -2360,6 +2360,7 @@
 
 #ifdef _KERNEL
 
+#include <sys/condvar.h>
 #include <sys/device.h>
 
 struct l2cap_channel;
@@ -2463,6 +2464,7 @@
 	/* device info */
 	bdaddr_t	 hci_bdaddr;		/* device address */
 	uint16_t	 hci_flags;		/* see BTF_ above */
+	kcondvar_t	 hci_init;		/* sleep on this */
 
 	uint16_t	 hci_packet_type;	/* packet types */
 	uint16_t	 hci_acl_mask;		/* ACL packet capabilities */
--- a/sys/netbt/hci_event.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/hci_event.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: hci_event.c,v 1.17 2008/03/17 09:16:17 plunky Exp $	*/
+/*	$NetBSD: hci_event.c,v 1.18 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hci_event.c,v 1.17 2008/03/17 09:16:17 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hci_event.c,v 1.18 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -839,7 +839,7 @@
 
 	unit->hci_flags &= ~BTF_INIT_BDADDR;
 
-	wakeup(unit);
+	cv_broadcast(&unit->hci_init);
 }
 
 /*
@@ -867,7 +867,7 @@
 
 	unit->hci_flags &= ~BTF_INIT_BUFFER_SIZE;
 
-	wakeup(unit);
+	cv_broadcast(&unit->hci_init);
 }
 
 /*
@@ -955,7 +955,7 @@
 
 	unit->hci_flags &= ~BTF_INIT_FEATURES;
 
-	wakeup(unit);
+	cv_broadcast(&unit->hci_init);
 
 	DPRINTFN(1, "%s: lmp_mask %4.4x, acl_mask %4.4x, sco_mask %4.4x\n",
 		device_xname(unit->hci_dev), unit->hci_lmp_mask,
@@ -984,7 +984,7 @@
 
 	if (rp.hci_version < HCI_SPEC_V12) {
 		unit->hci_flags &= ~BTF_INIT_COMMANDS;
-		wakeup(unit);
+		cv_broadcast(&unit->hci_init);
 		return;
 	}
 
@@ -1012,7 +1012,7 @@
 	unit->hci_flags &= ~BTF_INIT_COMMANDS;
 	memcpy(unit->hci_cmds, rp.commands, HCI_COMMANDS_SIZE);
 
-	wakeup(unit);
+	cv_broadcast(&unit->hci_init);
 }
 
 /*
--- a/sys/netbt/hci_link.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/hci_link.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: hci_link.c,v 1.19 2008/03/16 23:28:10 plunky Exp $	*/
+/*	$NetBSD: hci_link.c,v 1.20 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hci_link.c,v 1.19 2008/03/16 23:28:10 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hci_link.c,v 1.20 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -194,9 +194,9 @@
 {
 	struct hci_link *link = arg;
 	hci_discon_cp cp;
-	int s, err;
+	int err;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	callout_ack(&link->hl_expire);
 
 	if (link->hl_refcnt > 0)
@@ -233,7 +233,7 @@
 	}
 
 out:
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 /*
--- a/sys/netbt/hci_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/hci_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: hci_socket.c,v 1.14 2008/02/10 17:40:54 plunky Exp $	*/
+/*	$NetBSD: hci_socket.c,v 1.15 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hci_socket.c,v 1.14 2008/02/10 17:40:54 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hci_socket.c,v 1.15 2008/04/24 11:38:37 ad Exp $");
 
 /* load symbolic names */
 #ifdef BLUETOOTH_DEBUG
@@ -562,9 +562,14 @@
 		return EOPNOTSUPP;
 
 	case PRU_ATTACH:
+		if (up->so_lock == NULL) {
+			mutex_obj_hold(bt_lock);
+			up->so_lock = bt_lock;
+			solock(up);
+		}
+		KASSERT(solocked(up));
 		if (pcb)
 			return EINVAL;
-
 		err = soreserve(up, hci_sendspace, hci_recvspace);
 		if (err)
 			return err;
--- a/sys/netbt/hci_unit.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/hci_unit.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: hci_unit.c,v 1.10 2008/03/17 09:16:17 plunky Exp $	*/
+/*	$NetBSD: hci_unit.c,v 1.11 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hci_unit.c,v 1.10 2008/03/17 09:16:17 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hci_unit.c,v 1.11 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -43,6 +43,7 @@
 #include <sys/queue.h>
 #include <sys/systm.h>
 #include <sys/intr.h>
+#include <sys/socketvar.h>
 
 #include <netbt/bluetooth.h>
 #include <netbt/hci.h>
@@ -83,7 +84,6 @@
 hci_attach(const struct hci_if *hci_if, device_t dev, uint16_t flags)
 {
 	struct hci_unit *unit;
-	int s;
 
 	KASSERT(dev != NULL);
 	KASSERT(hci_if->enable != NULL);
@@ -101,6 +101,7 @@
 	unit->hci_flags = flags;
 
 	mutex_init(&unit->hci_devlock, MUTEX_DRIVER, hci_if->ipl);
+	cv_init(&unit->hci_init, "hci_init");
 
 	MBUFQ_INIT(&unit->hci_eventq);
 	MBUFQ_INIT(&unit->hci_aclrxq);
@@ -111,9 +112,9 @@
 	TAILQ_INIT(&unit->hci_links);
 	LIST_INIT(&unit->hci_memos);
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	SIMPLEQ_INSERT_TAIL(&hci_unit_list, unit, hci_next);
-	splx(s);
+	mutex_exit(bt_lock);
 
 	return unit;
 }
@@ -121,14 +122,14 @@
 void
 hci_detach(struct hci_unit *unit)
 {
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	hci_disable(unit);
 
 	SIMPLEQ_REMOVE(&hci_unit_list, unit, hci_unit, hci_next);
-	splx(s);
+	mutex_exit(bt_lock);
 
+	cv_destroy(&unit->hci_init);
 	mutex_destroy(&unit->hci_devlock);
 	free(unit, M_BLUETOOTH);
 }
@@ -179,7 +180,7 @@
 		goto bad2;
 
 	while (unit->hci_flags & BTF_INIT) {
-		err = tsleep(unit, PWAIT | PCATCH, __func__, 5 * hz);
+		err = cv_timedwait_sig(&unit->hci_init, bt_lock, 5 * hz);
 		if (err)
 			goto bad2;
 
@@ -345,6 +346,7 @@
 	struct hci_unit *unit = arg;
 	struct mbuf *m;
 
+	mutex_enter(bt_lock);
 another:
 	mutex_enter(&unit->hci_devlock);
 
@@ -422,6 +424,7 @@
 	}
 
 	mutex_exit(&unit->hci_devlock);
+	mutex_exit(bt_lock);
 
 	DPRINTFN(10, "done\n");
 }
--- a/sys/netbt/l2cap_misc.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/l2cap_misc.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: l2cap_misc.c,v 1.5 2007/11/03 17:20:17 plunky Exp $	*/
+/*	$NetBSD: l2cap_misc.c,v 1.6 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: l2cap_misc.c,v 1.5 2007/11/03 17:20:17 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: l2cap_misc.c,v 1.6 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -185,9 +185,8 @@
 {
 	struct l2cap_req *req = arg;
 	struct l2cap_channel *chan;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	callout_ack(&req->lr_rtx);
 
 	chan = req->lr_chan;
@@ -198,7 +197,7 @@
 	if (chan && chan->lc_state != L2CAP_CLOSED)
 		l2cap_close(chan, ETIMEDOUT);
 
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 /*
--- a/sys/netbt/l2cap_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/l2cap_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: l2cap_socket.c,v 1.7 2007/04/21 06:15:23 plunky Exp $	*/
+/*	$NetBSD: l2cap_socket.c,v 1.8 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005 Iain Hibbert.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: l2cap_socket.c,v 1.7 2007/04/21 06:15:23 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: l2cap_socket.c,v 1.8 2008/04/24 11:38:37 ad Exp $");
 
 /* load symbolic names */
 #ifdef BLUETOOTH_DEBUG
@@ -120,9 +120,14 @@
 		return EOPNOTSUPP;
 
 	case PRU_ATTACH:
+		if (up->so_lock == NULL) {
+			mutex_obj_hold(bt_lock);
+			up->so_lock = bt_lock;
+			solock(up);
+		}
+		KASSERT(solocked(up));
 		if (pcb != NULL)
 			return EINVAL;
-
 		/*
 		 * For L2CAP socket PCB we just use an l2cap_channel structure
 		 * since we have nothing to add..
--- a/sys/netbt/rfcomm_dlc.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/rfcomm_dlc.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: rfcomm_dlc.c,v 1.4 2007/11/03 17:20:17 plunky Exp $	*/
+/*	$NetBSD: rfcomm_dlc.c,v 1.5 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rfcomm_dlc.c,v 1.4 2007/11/03 17:20:17 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rfcomm_dlc.c,v 1.5 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -195,9 +195,8 @@
 rfcomm_dlc_timeout(void *arg)
 {
 	struct rfcomm_dlc *dlc = arg;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	callout_ack(&dlc->rd_timeout);
 
 	if (dlc->rd_state != RFCOMM_DLC_CLOSED)
@@ -207,7 +206,7 @@
 		free(dlc, M_BLUETOOTH);
 	}
 
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 /*
--- a/sys/netbt/rfcomm_session.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/rfcomm_session.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: rfcomm_session.c,v 1.12 2008/01/31 19:30:23 plunky Exp $	*/
+/*	$NetBSD: rfcomm_session.c,v 1.13 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rfcomm_session.c,v 1.12 2008/01/31 19:30:23 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rfcomm_session.c,v 1.13 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -292,11 +292,10 @@
 {
 	struct rfcomm_session *rs = arg;
 	struct rfcomm_dlc *dlc;
-	int s;
 
 	KASSERT(rs != NULL);
 
-	s = splsoftnet();
+	mutex_enter(bt_lock);
 	callout_ack(&rs->rs_timeout);
 
 	if (rs->rs_state != RFCOMM_SESSION_OPEN) {
@@ -314,7 +313,7 @@
 		DPRINTF("expiring\n");
 		rfcomm_session_free(rs);
 	}
-	splx(s);
+	mutex_exit(bt_lock);
 }
 
 /***********************************************************************
--- a/sys/netbt/rfcomm_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/rfcomm_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: rfcomm_socket.c,v 1.8 2007/10/15 18:04:34 plunky Exp $	*/
+/*	$NetBSD: rfcomm_socket.c,v 1.9 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rfcomm_socket.c,v 1.8 2007/10/15 18:04:34 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rfcomm_socket.c,v 1.9 2008/04/24 11:38:37 ad Exp $");
 
 /* load symbolic names */
 #ifdef BLUETOOTH_DEBUG
@@ -119,9 +119,14 @@
 		return EOPNOTSUPP;
 
 	case PRU_ATTACH:
+		if (up->so_lock == NULL) {
+			mutex_obj_hold(bt_lock);
+			up->so_lock = bt_lock;
+			solock(up);
+		}
+		KASSERT(solocked(up));
 		if (pcb != NULL)
 			return EINVAL;
-
 		/*
 		 * Since we have nothing to add, we attach the DLC
 		 * structure directly to our PCB pointer.
--- a/sys/netbt/sco_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netbt/sco_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: sco_socket.c,v 1.9 2007/04/21 06:15:23 plunky Exp $	*/
+/*	$NetBSD: sco_socket.c,v 1.10 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006 Itronix Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sco_socket.c,v 1.9 2007/04/21 06:15:23 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sco_socket.c,v 1.10 2008/04/24 11:38:37 ad Exp $");
 
 /* load symbolic names */
 #ifdef BLUETOOTH_DEBUG
@@ -113,9 +113,14 @@
 		return EOPNOTSUPP;
 
 	case PRU_ATTACH:
+		if (up->so_lock == NULL) {
+			mutex_obj_hold(bt_lock);
+			up->so_lock = bt_lock;
+			solock(up);
+		}
+		KASSERT(solocked(up));
 		if (pcb)
 			return EINVAL;
-
 		err = soreserve(up, sco_sendspace, sco_recvspace);
 		if (err)
 			return err;
--- a/sys/netinet/if_arp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/if_arp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,7 +1,7 @@
-/*	$NetBSD: if_arp.c,v 1.133 2008/04/23 05:26:50 thorpej Exp $	*/
+/*	$NetBSD: if_arp.c,v 1.134 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
+ * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -75,7 +75,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.133 2008/04/23 05:26:50 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.134 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
@@ -100,6 +100,8 @@
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/sysctl.h>
+#include <sys/socketvar.h>
+#include <sys/percpu.h>
 
 #include <net/ethertypes.h>
 #include <net/if.h>
@@ -341,8 +343,13 @@
 	int count = 0;
 	struct mbuf *mold;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	if (arp_lock_try(0) == 0) {
 		printf("arp_drain: locked; punting\n");
+		KERNEL_UNLOCK_ONE(NULL);
+		mutex_exit(softnet_lock);
 		return;
 	}
 
@@ -359,6 +366,8 @@
 	}
 	ARP_UNLOCK();
 	ARP_STATADD(ARP_STAT_DFRDROPPED, count);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 
@@ -369,14 +378,15 @@
 static void
 arptimer(void *arg)
 {
-	int s;
 	struct llinfo_arp *la, *nla;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	if (arp_lock_try(0) == 0) {
 		/* get it later.. */
-		splx(s);
+		KERNEL_UNLOCK_ONE(NULL);
+		mutex_exit(softnet_lock);
 		return;
 	}
 
@@ -403,7 +413,8 @@
 
 	ARP_UNLOCK();
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -473,7 +484,7 @@
 			ts.tv_nsec = 0;
 			tc_setclock(&ts);
 		}
-		callout_init(&arptimer_ch, 0);
+		callout_init(&arptimer_ch, CALLOUT_MPSAFE);
 		callout_reset(&arptimer_ch, hz, arptimer, NULL);
 	}
 
@@ -822,6 +833,8 @@
 	int s;
 	int arplen;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	while (arpintrq.ifq_head) {
 		s = splnet();
 		IF_DEQUEUE(&arpintrq, m);
@@ -866,6 +879,8 @@
 		}
 		m_freem(m);
 	}
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/netinet/igmp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/igmp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: igmp.c,v 1.47 2008/04/23 05:26:50 thorpej Exp $	*/
+/*	$NetBSD: igmp.c,v 1.48 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -40,13 +40,14 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.47 2008/04/23 05:26:50 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.48 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_mrouting.h"
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
@@ -479,7 +480,6 @@
 {
 	struct in_multi *inm;
 	struct in_multistep step;
-	int s;
 
 	/*
 	 * Quick check to see if any work needs to be done, in order
@@ -488,7 +488,9 @@
 	if (!igmp_timers_are_running)
 		return;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	igmp_timers_are_running = 0;
 	IN_FIRST_MULTI(step, inm);
 	while (inm != NULL) {
@@ -509,23 +511,26 @@
 		}
 		IN_NEXT_MULTI(step, inm);
 	}
-	splx(s);
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
 igmp_slowtimo(void)
 {
 	struct router_info *rti;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	LIST_FOREACH(rti, &rti_head, rti_link) {
 		if (rti->rti_type == IGMP_v1_ROUTER &&
 		    ++rti->rti_age >= IGMP_AGE_THRESHOLD) {
 			rti->rti_type = IGMP_v2_ROUTER;
 		}
 	}
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
--- a/sys/netinet/in_pcb.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/in_pcb.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in_pcb.c,v 1.122 2008/01/14 04:19:09 dyoung Exp $	*/
+/*	$NetBSD: in_pcb.c,v 1.123 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -98,7 +98,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.122 2008/01/14 04:19:09 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.123 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -494,7 +494,9 @@
 	ipsec4_delete_pcbpolicy(inp);
 #endif /*IPSEC*/
 	so->so_pcb = 0;
+	/* sofree drop's the socket's lock */
 	sofree(so);
+	mutex_enter(softnet_lock);
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	rtcache_free(&inp->inp_route);
--- a/sys/netinet/in_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/in_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in_proto.c,v 1.93 2008/04/23 06:09:04 thorpej Exp $	*/
+/*	$NetBSD: in_proto.c,v 1.94 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_proto.c,v 1.93 2008/04/23 06:09:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_proto.c,v 1.94 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_mrouting.h"
 #include "opt_eon.h"			/* ISO CLNL over IP */
@@ -150,6 +150,60 @@
 
 DOMAIN_DEFINE(inetdomain);	/* forward declare and add to link set */
 
+/* Wrappers to acquire kernel_lock. */
+
+PR_WRAP_USRREQ(rip_usrreq)
+PR_WRAP_USRREQ(udp_usrreq)
+PR_WRAP_USRREQ(tcp_usrreq)
+
+#define	rip_usrreq 	rip_usrreq_wrapper
+#define	udp_usrreq 	udp_usrreq_wrapper
+#define	tcp_usrreq 	tcp_usrreq_wrapper
+
+PR_WRAP_CTLINPUT(rip_ctlinput)
+PR_WRAP_CTLINPUT(udp_ctlinput)
+PR_WRAP_CTLINPUT(tcp_ctlinput)
+
+#define	rip_ctlinput	rip_ctlinput_wrapper
+#define	udp_ctlinput	udp_ctlinput_wrapper
+#define	tcp_ctlinput	tcp_ctlinput_wrapper
+
+PR_WRAP_CTLOUTPUT(rip_ctloutput)
+PR_WRAP_CTLOUTPUT(udp_ctloutput)
+PR_WRAP_CTLOUTPUT(tcp_ctloutput)
+
+#define	rip_ctloutput	rip_ctloutput_wrapper
+#define	udp_ctloutput	udp_ctloutput_wrapper
+#define	tcp_ctloutput	tcp_ctloutput_wrapper
+
+#if defined(IPSEC) || defined(FAST_IPSEC)
+PR_WRAP_CTLINPUT(ah4_ctlinput)
+
+#define	ah4_ctlinput	ah4_ctlinput_wrapper
+#endif
+
+#if defined(IPSEC_ESP) || defined(FAST_IPSEC)
+PR_WRAP_CTLINPUT(esp4_ctlinput)
+
+#define	esp4_ctlinput	esp4_ctlinput_wrapper
+#endif
+
+#ifdef TPIP
+PR_WRAP_CTLOUTPUT(tp_ctloutput)
+
+#define	tp_ctloutput	tp_ctloutput_wrapper
+
+PR_WRAP_CTLINPUT(tpip_ctlinput)
+
+#define	tpip_ctlinput	tpip_ctlinput_wrapper
+#endif
+
+#ifdef EON
+PR_WRAP_CTLINPUT(eonctlinput)
+
+#define	eonctlinput	eonctlinput_wrapper
+#endif
+
 const struct protosw inetsw[] = {
 {	.pr_domain = &inetdomain,
 	.pr_init = ip_init,
--- a/sys/netinet/ip_encap.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/ip_encap.c	Thu Apr 24 11:38:36 2008 +0000
@@ -70,7 +70,7 @@
 #define USE_RADIX
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.31 2007/06/13 04:55:25 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.32 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_mrouting.h"
 #include "opt_inet.h"
@@ -684,7 +684,7 @@
 /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
 
 #ifdef INET6
-void
+void *
 encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
 {
 	void *d = d0;
@@ -698,16 +698,16 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 	if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (cmd == PRC_MSGSIZE)
 		; /* special code is present, see below */
 	else if (inet6ctlerrmap[cmd] == 0)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -759,6 +759,7 @@
 	}
 
 	rip6_ctlinput(cmd, sa, d0);
+	return NULL;
 }
 #endif
 
--- a/sys/netinet/ip_encap.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/ip_encap.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_encap.h,v 1.11 2007/02/17 22:34:11 dyoung Exp $	*/
+/*	$NetBSD: ip_encap.h,v 1.12 2008/04/24 11:38:37 ad Exp $	*/
 /*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
 
 /*
@@ -65,7 +65,7 @@
 const struct encaptab *encap_attach_func(int, int,
 	int (*)(struct mbuf *, int, int, void *),
 	const struct protosw *, void *);
-void	encap6_ctlinput(int, const struct sockaddr *, void *);
+void	*encap6_ctlinput(int, const struct sockaddr *, void *);
 int	encap_detach(const struct encaptab *);
 void	*encap_getarg(struct mbuf *);
 #endif
--- a/sys/netinet/ip_flow.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/ip_flow.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_flow.c,v 1.54 2008/04/12 05:58:22 thorpej Exp $	*/
+/*	$NetBSD: ip_flow.c,v 1.55 2008/04/24 11:38:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.54 2008/04/12 05:58:22 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.55 2008/04/24 11:38:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -408,6 +408,8 @@
 	struct ipflow *ipf, *next_ipf;
 	uint64_t *ips;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
 		next_ipf = LIST_NEXT(ipf, ipf_list);
 		if (PRT_SLOW_ISEXPIRED(ipf->ipf_timer) ||
@@ -424,6 +426,8 @@
 			ipf->ipf_uses = 0;
 		}
 	}
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
--- a/sys/netinet/ip_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/ip_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_input.c,v 1.267 2008/04/23 06:09:04 thorpej Exp $	*/
+/*	$NetBSD: ip_input.c,v 1.268 2008/04/24 11:38:37 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -98,7 +98,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.267 2008/04/23 06:09:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.268 2008/04/24 11:38:37 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_gateway.h"
@@ -469,14 +469,18 @@
 	int s;
 	struct mbuf *m;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	while (!IF_IS_EMPTY(&ipintrq)) {
 		s = splnet();
 		IF_DEQUEUE(&ipintrq, m);
 		splx(s);
-		if (m == 0)
-			return;
+		if (m == NULL)
+			break;
 		ip_input(m);
 	}
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -1389,7 +1393,9 @@
 	static u_int dropscanidx = 0;
 	u_int i;
 	u_int median_ttl;
-	int s = splsoftnet();
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	IPQ_LOCK();
 
@@ -1431,7 +1437,9 @@
 		dropscanidx = i;
 	}
 	IPQ_UNLOCK();
-	splx(s);
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -1441,20 +1449,24 @@
 ip_drain(void)
 {
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	/*
 	 * We may be called from a device's interrupt context.  If
 	 * the ipq is already busy, just bail out now.
 	 */
-	if (ipq_lock_try() == 0)
-		return;
+	if (ipq_lock_try() != 0) {
+		/*
+		 * Drop half the total fragments now. If more mbufs are
+		 * needed, we will be called again soon.
+		 */
+		ip_reass_drophalf();
+		IPQ_UNLOCK();
+	}
 
-	/*
-	 * Drop half the total fragments now. If more mbufs are needed,
-	 *  we will be called again soon.
-	 */
-	ip_reass_drophalf();
-
-	IPQ_UNLOCK();
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/netinet/raw_ip.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/raw_ip.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: raw_ip.c,v 1.106 2008/04/23 06:09:04 thorpej Exp $	*/
+/*	$NetBSD: raw_ip.c,v 1.107 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.106 2008/04/23 06:09:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.107 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -528,9 +528,11 @@
 	s = splsoftnet();
 
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
 		in_purgeif((struct ifnet *)control);
 		in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
+		mutex_exit(softnet_lock);
 		splx(s);
 		return (0);
 	}
@@ -548,6 +550,7 @@
 	switch (req) {
 
 	case PRU_ATTACH:
+		sosetlock(so);
 		if (inp != 0) {
 			error = EISCONN;
 			break;
--- a/sys/netinet/tcp_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/tcp_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tcp_input.c,v 1.285 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: tcp_input.c,v 1.286 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -152,7 +152,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.285 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.286 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -3468,16 +3468,17 @@
 syn_cache_timer(void *arg)
 {
 	struct syn_cache *sc = arg;
-	int s;
-
-	s = splsoftnet();
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	callout_ack(&sc->sc_timer);
 
 	if (__predict_false(sc->sc_flags & SCF_DEAD)) {
 		TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
 		callout_destroy(&sc->sc_timer);
 		pool_put(&syn_cache_pool, sc);
-		splx(s);
+		KERNEL_UNLOCK_ONE(NULL);
+		mutex_exit(softnet_lock);
 		return;
 	}
 
@@ -3502,14 +3503,16 @@
 	sc->sc_rxtshift++;
 	SYN_CACHE_TIMER_ARM(sc);
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 	return;
 
  dropit:
 	TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
 	syn_cache_rm(sc);
 	syn_cache_put(sc);	/* calls pool_put but see spl above */
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -4082,7 +4085,7 @@
 	 * options into the reply.
 	 */
 	bzero(sc, sizeof(struct syn_cache));
-	callout_init(&sc->sc_timer, 0);
+	callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
 	bcopy(src, &sc->sc_src, src->sa_len);
 	bcopy(dst, &sc->sc_dst, dst->sa_len);
 	sc->sc_flags = 0;
--- a/sys/netinet/tcp_subr.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/tcp_subr.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tcp_subr.c,v 1.227 2008/04/12 05:58:22 thorpej Exp $	*/
+/*	$NetBSD: tcp_subr.c,v 1.228 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -30,7 +30,7 @@
  */
 
 /*-
- * Copyright (c) 1997, 1998, 2000, 2001 The NetBSD Foundation, Inc.
+ * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -98,7 +98,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.227 2008/04/12 05:58:22 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.228 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -1008,10 +1008,10 @@
 
 	/* Don't sweat this loop; hopefully the compiler will unroll it. */
 	for (i = 0; i < TCPT_NTIMERS; i++) {
-		callout_init(&tp->t_timer[i], 0);
+		callout_init(&tp->t_timer[i], CALLOUT_MPSAFE);
 		TCP_TIMER_INIT(tp, i);
 	}
-	callout_init(&tp->t_delack_ch, 0);
+	callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE);
 
 	switch (family) {
 	case AF_INET:
@@ -1106,34 +1106,6 @@
 }
 
 /*
- * Return whether this tcpcb is marked as dead, indicating
- * to the calling timer function that no further action should
- * be taken, as we are about to release this tcpcb.  The release
- * of the storage will be done if this is the last timer running.
- *
- * This should be called from the callout handler function after
- * callout_ack() is done, so that the number of invoking timer
- * functions is 0.
- */
-int
-tcp_isdead(struct tcpcb *tp)
-{
-	int i, dead = (tp->t_flags & TF_DEAD);
-
-	if (__predict_false(dead)) {
-		if (tcp_timers_invoking(tp) > 0)
-				/* not quite there yet -- count separately? */
-			return dead;
-		TCP_STATINC(TCP_STAT_DELAYED_FREE);
-		for (i = 0; i < TCPT_NTIMERS; i++)
-			callout_destroy(&tp->t_timer[i]);
-		callout_destroy(&tp->t_delack_ch);
-		pool_put(&tcpcb_pool, tp);	/* splsoftnet via tcp_timer.c */
-	}
-	return dead;
-}
-
-/*
  * Close a TCP control block:
  *	discard all space held by the tcp
  *	discard internet protocol block
@@ -1256,14 +1228,14 @@
 		m_free(tp->t_template);
 		tp->t_template = NULL;
 	}
-	if (tcp_timers_invoking(tp))
-		tp->t_flags |= TF_DEAD;
-	else {
-		for (j = 0; j < TCPT_NTIMERS; j++)
-			callout_destroy(&tp->t_timer[j]);
-		callout_destroy(&tp->t_delack_ch);
-		pool_put(&tcpcb_pool, tp);
+	tp->t_flags |= TF_DEAD;
+	for (j = 0; j < TCPT_NTIMERS; j++) {
+		callout_halt(&tp->t_timer[j], softnet_lock);
+		callout_destroy(&tp->t_timer[j]);
 	}
+	callout_halt(&tp->t_delack_ch, softnet_lock);
+	callout_destroy(&tp->t_delack_ch);
+	pool_put(&tcpcb_pool, tp);
 
 	if (inp) {
 		inp->inp_ppcb = 0;
@@ -1318,6 +1290,9 @@
 	struct inpcb_hdr *inph;
 	struct tcpcb *tp;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	/*
 	 * Free the sequence queue of all TCP connections.
 	 */
@@ -1348,6 +1323,9 @@
 			TCP_REASS_UNLOCK(tp);
 		}
 	}
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -1377,7 +1355,7 @@
 		so->so_error = error;
 	else
 		tp->t_softerror = error;
-	wakeup((void *) &so->so_timeo);
+	cv_broadcast(&so->so_cv);
 	sorwakeup(so);
 	sowwakeup(so);
 }
@@ -1405,14 +1383,14 @@
 		so->so_error = error;
 	else
 		tp->t_softerror = error;
-	wakeup((void *) &so->so_timeo);
+	cv_broadcast(&so->so_cv);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 #endif
 
 #ifdef INET6
-void
+void *
 tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	struct tcphdr th;
@@ -1426,15 +1404,15 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 	else if (cmd == PRC_QUENCH) {
 		/* 
 		 * Don't honor ICMP Source Quench messages meant for
 		 * TCP connections.
 		 */
-		return;
+		return NULL;
 	} else if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_MSGSIZE)
@@ -1442,7 +1420,7 @@
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -1468,7 +1446,7 @@
 		if (m->m_pkthdr.len < off + sizeof(th)) {
 			if (cmd == PRC_MSGSIZE)
 				icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
-			return;
+			return NULL;
 		}
 
 		bzero(&th, sizeof(th));
@@ -1501,7 +1479,7 @@
 			 * no need to call in6_pcbnotify, it should have been
 			 * called via callback if necessary
 			 */
-			return;
+			return NULL;
 		}
 
 		nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport,
@@ -1516,6 +1494,8 @@
 		(void) in6_pcbnotify(&tcbtable, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
 	}
+
+	return NULL;
 }
 #endif
 
--- a/sys/netinet/tcp_timer.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/tcp_timer.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tcp_timer.c,v 1.79 2008/04/12 05:58:22 thorpej Exp $	*/
+/*	$NetBSD: tcp_timer.c,v 1.80 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -100,7 +100,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.79 2008/04/12 05:58:22 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.80 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_tcp_debug.h"
@@ -201,31 +201,12 @@
 }
 
 /*
- * Return how many timers are currently being invoked.
- */
-int
-tcp_timers_invoking(struct tcpcb *tp)
-{
-	int i;
-	int count = 0;
-
-	for (i = 0; i < TCPT_NTIMERS; i++)
-		if (callout_invoking(&tp->t_timer[i]))
-			count++;
-	if (callout_invoking(&tp->t_delack_ch))
-		count++;
-
-	return count;
-}
-
-/*
  * Callout to process delayed ACKs for a TCPCB.
  */
 void
 tcp_delack(void *arg)
 {
 	struct tcpcb *tp = arg;
-	int s;
 
 	/*
 	 * If tcp_output() wasn't able to transmit the ACK
@@ -233,16 +214,17 @@
 	 * ACK callout.
 	 */
 
-	s = splsoftnet();
-	callout_ack(&tp->t_delack_ch);
-	if (tcp_isdead(tp)) {
-		splx(s);
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0) {
+		mutex_exit(softnet_lock);
 		return;
 	}
 
 	tp->t_flags |= TF_ACKNOW;
+	KERNEL_LOCK(1, NULL);
 	(void) tcp_output(tp);
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -253,12 +235,11 @@
 void
 tcp_slowtimo(void)
 {
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
 	tcp_iss_seq += TCP_ISSINCR;			/* increment iss */
 	tcp_now++;					/* for timestamps */
-	splx(s);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -287,19 +268,18 @@
 {
 	struct tcpcb *tp = arg;
 	uint32_t rto;
-	int s;
 #ifdef TCP_DEBUG
 	struct socket *so = NULL;
 	short ostate;
 #endif
 
-	s = splsoftnet();
-	callout_ack(&tp->t_timer[TCPT_REXMT]);
-	if (tcp_isdead(tp)) {
-		splx(s);
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0) {
+		mutex_exit(softnet_lock);
 		return;
 	}
 
+	KERNEL_LOCK(1, NULL);
 	if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
 	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
 	    SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_ourmss))) {
@@ -321,7 +301,8 @@
 		 */
 		in_pcbnotifyall(&tcbtable, icmpsrc.sin_addr, EMSGSIZE,
 		    tcp_mtudisc);
- 		splx(s);
+		KERNEL_UNLOCK_ONE(NULL);
+		mutex_exit(softnet_lock);
  		return;
  	}
 #ifdef TCP_DEBUG
@@ -435,7 +416,8 @@
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_REXMT << 8));
 #endif
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
@@ -443,19 +425,18 @@
 {
 	struct tcpcb *tp = arg;
 	uint32_t rto;
-	int s;
 #ifdef TCP_DEBUG
 	struct socket *so = NULL;
 	short ostate;
 #endif
 
-	s = splsoftnet();
-	callout_ack(&tp->t_timer[TCPT_PERSIST]);
-	if (tcp_isdead(tp)) {
-		splx(s);
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0) {
+		mutex_exit(softnet_lock);
 		return;
 	}
 
+	KERNEL_LOCK(1, NULL);
 #ifdef TCP_DEBUG
 #ifdef INET
 	if (tp->t_inpcb)
@@ -503,7 +484,8 @@
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_PERSIST << 8));
 #endif
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
@@ -511,18 +493,18 @@
 {
 	struct tcpcb *tp = arg;
 	struct socket *so = NULL;	/* Quell compiler warning */
-	int s;
 #ifdef TCP_DEBUG
 	short ostate;
 #endif
 
-	s = splsoftnet();
-	callout_ack(&tp->t_timer[TCPT_KEEP]);
-	if (tcp_isdead(tp)) {
-		splx(s);
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0) {
+		mutex_exit(softnet_lock);
 		return;
 	}
 
+	KERNEL_LOCK(1, NULL);
+
 #ifdef TCP_DEBUG
 	ostate = tp->t_state;
 #endif /* TCP_DEBUG */
@@ -585,29 +567,29 @@
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_KEEP << 8));
 #endif
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 	return;
 
  dropit:
 	TCP_STATINC(TCP_STAT_KEEPDROPS);
 	(void) tcp_drop(tp, ETIMEDOUT);
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
 tcp_timer_2msl(void *arg)
 {
 	struct tcpcb *tp = arg;
-	int s;
 #ifdef TCP_DEBUG
 	struct socket *so = NULL;
 	short ostate;
 #endif
 
-	s = splsoftnet();
-	callout_ack(&tp->t_timer[TCPT_2MSL]);
-	if (tcp_isdead(tp)) {
-		splx(s);
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0) {
+		mutex_exit(softnet_lock);
 		return;
 	}
 
@@ -615,6 +597,7 @@
 	 * 2 MSL timeout went off, clear the SACK scoreboard, reset
 	 * the FACK estimate.
 	 */
+	KERNEL_LOCK(1, NULL);
 	tcp_free_sackholes(tp);
 	tp->snd_fack = tp->snd_una;
 
@@ -649,5 +632,6 @@
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_2MSL << 8));
 #endif
-	splx(s);
+	mutex_exit(softnet_lock);
+	KERNEL_UNLOCK_ONE(NULL);
 }
--- a/sys/netinet/tcp_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/tcp_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tcp_usrreq.c,v 1.143 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: tcp_usrreq.c,v 1.144 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.143 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.144 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -210,6 +210,7 @@
 	s = splsoftnet();
 
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		switch (family) {
 #ifdef INET
 		case PF_INET:
@@ -226,13 +227,18 @@
 			break;
 #endif
 		default:
+			mutex_exit(softnet_lock);
 			splx(s);
 			return (EAFNOSUPPORT);
 		}
+		mutex_exit(softnet_lock);
 		splx(s);
 		return (0);
 	}
 
+	if (req == PRU_ATTACH)
+		sosetlock(so);
+
 	switch (family) {
 #ifdef INET
 	case PF_INET:
--- a/sys/netinet/tcp_var.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/tcp_var.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tcp_var.h,v 1.155 2008/04/12 05:58:22 thorpej Exp $	*/
+/*	$NetBSD: tcp_var.h,v 1.156 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -822,12 +822,11 @@
 
 int	 tcp_attach(struct socket *);
 void	 tcp_canceltimers(struct tcpcb *);
-int	 tcp_timers_invoking(struct tcpcb*);
 struct tcpcb *
 	 tcp_close(struct tcpcb *);
 int	 tcp_isdead(struct tcpcb *);
 #ifdef INET6
-void	 tcp6_ctlinput(int, const struct sockaddr *, void *);
+void	 *tcp6_ctlinput(int, const struct sockaddr *, void *);
 #endif
 void	 *tcp_ctlinput(int, const struct sockaddr *, void *);
 int	 tcp_ctloutput(int, struct socket *, int, int, struct mbuf **);
--- a/sys/netinet/udp_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet/udp_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: udp_usrreq.c,v 1.169 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: udp_usrreq.c,v 1.170 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.169 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.170 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -1181,9 +1181,11 @@
 	s = splsoftnet();
 
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		in_pcbpurgeif0(&udbtable, (struct ifnet *)control);
 		in_purgeif((struct ifnet *)control);
 		in_pcbpurgeif(&udbtable, (struct ifnet *)control);
+		mutex_exit(softnet_lock);
 		splx(s);
 		return (0);
 	}
@@ -1193,7 +1195,9 @@
 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
 		panic("udp_usrreq: unexpected control mbuf");
 #endif
-	if (inp == 0 && req != PRU_ATTACH) {
+	if (req == PRU_ATTACH) {
+		sosetlock(so);
+	} else if (inp == 0) {
 		error = EINVAL;
 		goto release;
 	}
--- a/sys/netinet6/ah.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ah.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ah.h,v 1.24 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: ah.h,v 1.25 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ah.h,v 1.16 2001/09/04 08:43:19 itojun Exp $	*/
 
 /*
@@ -101,7 +101,7 @@
 	const struct ah_algorithm *, struct secasvar *);
 
 extern void ah6_init(void);
-extern void ah6_ctlinput(int, const struct sockaddr *, void *);
+extern void *ah6_ctlinput(int, const struct sockaddr *, void *);
 #endif /* INET6 */
 
 #endif /* _KERNEL */
--- a/sys/netinet6/ah_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ah_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ah_input.c,v 1.56 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: ah_input.c,v 1.57 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ah_input.c,v 1.64 2001/09/04 08:43:19 itojun Exp $	*/
 
 /*
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ah_input.c,v 1.56 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ah_input.c,v 1.57 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -923,7 +923,7 @@
 	return IPPROTO_DONE;
 }
 
-void
+void *
 ah6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	const struct newah *ahp;
@@ -937,9 +937,9 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -961,7 +961,7 @@
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(ah))
-			return;
+			return NULL;
 
 		if (m->m_len < off + sizeof(ah)) {
 			/*
@@ -1009,5 +1009,7 @@
 	} else {
 		/* we normally notify any pcb here */
 	}
+
+	return NULL;
 }
 #endif /* INET6 */
--- a/sys/netinet6/esp.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/esp.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: esp.h,v 1.24 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: esp.h,v 1.25 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: esp.h,v 1.19 2001/09/04 08:43:19 itojun Exp $	*/
 
 /*
@@ -109,7 +109,7 @@
 extern int esp6_input __P((struct mbuf **, int *, int));
 
 extern void esp6_init(void);
-extern void esp6_ctlinput(int, const struct sockaddr *, void *);
+extern void *esp6_ctlinput(int, const struct sockaddr *, void *);
 #endif /* INET6 */
 
 extern int esp_schedule __P((const struct esp_algorithm *, struct secasvar *));
--- a/sys/netinet6/esp_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/esp_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: esp_input.c,v 1.46 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: esp_input.c,v 1.47 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: esp_input.c,v 1.60 2001/09/04 08:43:19 itojun Exp $	*/
 
 /*
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: esp_input.c,v 1.46 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: esp_input.c,v 1.47 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -897,7 +897,7 @@
 	return IPPROTO_DONE;
 }
 
-void
+void *
 esp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	const struct newesp *espp;
@@ -911,9 +911,9 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -953,7 +953,7 @@
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(esp))
-			return;
+			return NULL;
 
 		if (m->m_len < off + sizeof(esp)) {
 			/*
@@ -999,5 +999,7 @@
 	} else {
 		/* we normally notify any pcb here */
 	}
+
+	return NULL;
 }
 #endif /* INET6 */
--- a/sys/netinet6/frag6.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/frag6.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: frag6.c,v 1.44 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: frag6.c,v 1.45 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: frag6.c,v 1.40 2002/05/27 21:40:31 itojun Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: frag6.c,v 1.44 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: frag6.c,v 1.45 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -40,6 +40,7 @@
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
@@ -678,7 +679,9 @@
 frag6_slowtimo(void)
 {
 	struct ip6q *q6;
-	int s = splsoftnet();
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	IP6Q_LOCK();
 	q6 = ip6q.ip6q_next;
@@ -715,7 +718,8 @@
 	rtcache_free(&ipsrcchk_rt);
 #endif
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -725,12 +729,16 @@
 frag6_drain(void)
 {
 
-	if (ip6q_lock_try() == 0)
-		return;
-	while (ip6q.ip6q_next != &ip6q) {
-		IP6_STATINC(IP6_STAT_FRAGDROPPED);
-		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
-		frag6_freef(ip6q.ip6q_next);
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+	if (ip6q_lock_try() != 0) {
+		while (ip6q.ip6q_next != &ip6q) {
+			IP6_STATINC(IP6_STAT_FRAGDROPPED);
+			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+			frag6_freef(ip6q.ip6q_next);
+		}
+		IP6Q_UNLOCK();
 	}
-	IP6Q_UNLOCK();
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
--- a/sys/netinet6/in6_gif.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/in6_gif.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6_gif.c,v 1.55 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: in6_gif.c,v 1.56 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: in6_gif.c,v 1.62 2001/07/29 04:27:25 itojun Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6_gif.c,v 1.55 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6_gif.c,v 1.56 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_iso.h"
@@ -76,15 +76,9 @@
 
 int	ip6_gif_hlim = GIF_HLIM;
 
-extern struct domain inet6domain;
-const struct ip6protosw in6_gif_protosw =
-{ SOCK_RAW,	&inet6domain,	0/* IPPROTO_IPV[46] */,	PR_ATOMIC|PR_ADDR,
-  in6_gif_input, rip6_output,	in6_gif_ctlinput, rip6_ctloutput,
-  rip6_usrreq,
-  0,            0,              0,              0,
-};
+extern LIST_HEAD(, gif_softc) gif_softc_list;
 
-extern LIST_HEAD(, gif_softc) gif_softc_list;
+extern const struct ip6protosw in6_gif_protosw;
 
 /* 
  * family - family of the packet to be encapsulate. 
@@ -406,7 +400,7 @@
 	return error;
 }
 
-void
+void *
 in6_gif_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	struct gif_softc *sc;
@@ -416,14 +410,14 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 	if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -434,7 +428,7 @@
 	}
 
 	if (!ip6)
-		return;
+		return NULL;
 
 	/*
 	 * for now we don't care which type it was, just flush the route cache.
@@ -454,4 +448,22 @@
 		else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
 			rtcache_free(&sc->gif_ro);
 	}
+
+	return NULL;
 }
+
+PR_WRAP_CTLINPUT(in6_gif_ctlinput)
+PR_WRAP_CTLOUTPUT(rip6_ctloutput)
+PR_WRAP_USRREQ(rip6_usrreq)
+
+#define	in6_gif_ctlinput	in6_gif_ctlinput_wrapper
+#define	rip6_ctloutput		rip6_ctloutput_wrapper
+#define	rip6_usrreq		rip6_usrreq_wrapper
+
+extern struct domain inet6domain;
+const struct ip6protosw in6_gif_protosw =
+{ SOCK_RAW,	&inet6domain,	0/* IPPROTO_IPV[46] */,	PR_ATOMIC|PR_ADDR,
+  in6_gif_input, rip6_output,	in6_gif_ctlinput, rip6_ctloutput,
+  rip6_usrreq,
+  0,            0,              0,              0,
+};
--- a/sys/netinet6/in6_gif.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/in6_gif.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6_gif.h,v 1.12 2007/02/17 22:34:13 dyoung Exp $	*/
+/*	$NetBSD: in6_gif.h,v 1.13 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: in6_gif.h,v 1.7 2001/07/26 06:53:16 jinmei Exp $	*/
 
 /*
@@ -45,6 +45,6 @@
 #endif
 int in6_gif_attach(struct gif_softc *);
 int in6_gif_detach(struct gif_softc *);
-void in6_gif_ctlinput(int, const struct sockaddr *, void *);
+void *in6_gif_ctlinput(int, const struct sockaddr *, void *);
 
 #endif /* !_NETINET6_IN6_GIF_H_ */
--- a/sys/netinet6/in6_ifattach.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/in6_ifattach.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6_ifattach.c,v 1.79 2007/12/06 00:28:36 dyoung Exp $	*/
+/*	$NetBSD: in6_ifattach.c,v 1.80 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: in6_ifattach.c,v 1.124 2001/07/18 08:32:51 jinmei Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6_ifattach.c,v 1.79 2007/12/06 00:28:36 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6_ifattach.c,v 1.80 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -41,6 +41,7 @@
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/md5.h>
+#include <sys/socketvar.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -961,7 +962,9 @@
 	struct nd_ifinfo *ndi;
 	u_int8_t nullbuf[8];
 	struct ifnet *ifp;
-	int s = splsoftnet();
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	callout_reset(&in6_tmpaddrtimer_ch,
 	    (ip6_temp_preferred_lifetime - ip6_desync_factor -
@@ -980,5 +983,6 @@
 		}
 	}
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
--- a/sys/netinet6/in6_pcb.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/in6_pcb.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6_pcb.c,v 1.96 2008/03/20 20:32:00 dyoung Exp $	*/
+/*	$NetBSD: in6_pcb.c,v 1.97 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: in6_pcb.c,v 1.84 2001/02/08 18:02:08 itojun Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.96 2008/03/20 20:32:00 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.97 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -499,7 +499,9 @@
 	ipsec6_delete_pcbpolicy(in6p);
 #endif /* IPSEC */
 	so->so_pcb = 0;
+	/* sofree drops the socket's lock */
 	sofree(so);
+	mutex_enter(softnet_lock);
 	if (in6p->in6p_options)
 		m_freem(in6p->in6p_options);
 	if (in6p->in6p_outputopts != NULL) {
--- a/sys/netinet6/in6_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/in6_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6_proto.c,v 1.81 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: in6_proto.c,v 1.82 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: in6_proto.c,v 1.66 2000/10/10 15:35:47 itojun Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6_proto.c,v 1.81 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6_proto.c,v 1.82 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -146,6 +146,44 @@
 
 DOMAIN_DEFINE(inet6domain);	/* forward declare and add to link set */
 
+/* Wrappers to acquire kernel_lock. */
+
+PR_WRAP_USRREQ(rip6_usrreq)
+PR_WRAP_USRREQ(udp6_usrreq)
+PR_WRAP_USRREQ(tcp_usrreq)
+
+#define	rip6_usrreq 	rip6_usrreq_wrapper
+#define	udp6_usrreq 	udp6_usrreq_wrapper
+#define	tcp_usrreq 	tcp_usrreq_wrapper
+
+PR_WRAP_CTLINPUT(rip6_ctlinput)
+PR_WRAP_CTLINPUT(encap6_ctlinput)
+PR_WRAP_CTLINPUT(udp6_ctlinput)
+PR_WRAP_CTLINPUT(tcp6_ctlinput)
+
+#define	rip6_ctlinput	rip6_ctlinput_wrapper
+#define	encap6_ctlinput	encap6_ctlinput_wrapper
+#define	udp6_ctlinput	udp6_ctlinput_wrapper
+#define	tcp6_ctlinput	tcp6_ctlinput_wrapper
+
+PR_WRAP_CTLOUTPUT(rip6_ctloutput)
+PR_WRAP_CTLOUTPUT(ip6_ctloutput)
+PR_WRAP_CTLOUTPUT(tcp_ctloutput)
+PR_WRAP_CTLOUTPUT(icmp6_ctloutput)
+
+#define	rip6_ctloutput	rip6_ctloutput_wrapper
+#define	ip6_ctloutput	ip6_ctloutput_wrapper
+#define	tcp_ctloutput	tcp_ctloutput_wrapper
+#define	icmp6_ctloutput	icmp6_ctloutput_wrapper
+
+#if defined(IPSEC) || defined(FAST_IPSEC)
+PR_WRAP_CTLINPUT(ah6_ctlinput)
+PR_WRAP_CTLINPUT(esp6_ctlinput)
+
+#define	ah6_ctlinput	ah6_ctlinput_wrapper
+#define	esp6_ctlinput	esp6_ctlinput_wrapper
+#endif
+
 const struct ip6protosw inet6sw[] = {
 {	.pr_domain = &inet6domain,
 	.pr_protocol = IPPROTO_IPV6,
--- a/sys/netinet6/ip6_flow.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ip6_flow.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_flow.c,v 1.15 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: ip6_flow.c,v 1.16 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -45,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.15 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.16 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -410,6 +410,9 @@
 {
 	struct ip6flow *ip6f, *next_ip6f;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
 		next_ip6f = LIST_NEXT(ip6f, ip6f_list);
 		if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) ||
@@ -423,6 +426,9 @@
 			ip6f->ip6f_forwarded = 0;
 		}
 	}
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/netinet6/ip6_input.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ip6_input.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_input.c,v 1.117 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: ip6_input.c,v 1.118 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ip6_input.c,v 1.188 2001/03/29 05:34:31 itojun Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip6_input.c,v 1.117 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip6_input.c,v 1.118 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -206,11 +206,11 @@
 {
 
 	/* nd6_timer_init */
-	callout_init(&nd6_timer_ch, 0);
+	callout_init(&nd6_timer_ch, CALLOUT_MPSAFE);
 	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
 
 	/* timer for regeneranation of temporary addresses randomize ID */
-	callout_init(&in6_tmpaddrtimer_ch, 0);
+	callout_init(&in6_tmpaddrtimer_ch, CALLOUT_MPSAFE);
 	callout_reset(&in6_tmpaddrtimer_ch,
 		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
 		       ip6_temp_regen_advance) * hz,
@@ -226,19 +226,23 @@
 	int s;
 	struct mbuf *m;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	for (;;) {
 		s = splnet();
 		IF_DEQUEUE(&ip6intrq, m);
 		splx(s);
 		if (m == 0)
-			return;
+			break;
 		/* drop the packet if IPv6 operation is disabled on the IF */
 		if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
 			m_freem(m);
-			return;
+			break;
 		}
 		ip6_input(m);
 	}
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 extern struct	route ip6_forward_rt;
--- a/sys/netinet6/ip6_mroute.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ip6_mroute.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_mroute.c,v 1.91 2008/04/23 05:26:50 thorpej Exp $	*/
+/*	$NetBSD: ip6_mroute.c,v 1.92 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ip6_mroute.c,v 1.49 2001/07/25 09:21:18 jinmei Exp $	*/
 
 /*
@@ -117,7 +117,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip6_mroute.c,v 1.91 2008/04/23 05:26:50 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip6_mroute.c,v 1.92 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
@@ -487,7 +487,7 @@
 
 	pim6 = 0;/* used for stubbing out/in pim stuff */
 
-	callout_init(&expire_upcalls_ch, 0);
+	callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 
@@ -1296,9 +1296,10 @@
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
 	int i;
-	int s;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		if (n6expire[i] == 0)
 			continue;
@@ -1339,9 +1340,11 @@
 			}
 		}
 	}
-	splx(s);
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
--- a/sys/netinet6/ip6_var.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ip6_var.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_var.h,v 1.49 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: ip6_var.h,v 1.50 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $	*/
 
 /*
@@ -367,7 +367,7 @@
 
 void	rip6_init(void);
 int	rip6_input(struct mbuf **, int *, int);
-void	rip6_ctlinput(int, const struct sockaddr *, void *);
+void	*rip6_ctlinput(int, const struct sockaddr *, void *);
 int	rip6_ctloutput(int, struct socket *, int, int, struct mbuf **);
 int	rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *,
 			 struct mbuf *);
--- a/sys/netinet6/ip6protosw.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/ip6protosw.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6protosw.h,v 1.19 2007/07/19 20:48:57 dyoung Exp $	*/
+/*	$NetBSD: ip6protosw.h,v 1.20 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: ip6protosw.h,v 1.22 2001/02/08 18:02:08 itojun Exp $	*/
 
 /*
@@ -123,7 +123,7 @@
 	int	(*pr_output)		/* output to protocol (from above) */
 			(struct mbuf *, struct socket *, struct sockaddr_in6 *,
 			 struct mbuf *);
-	void	(*pr_ctlinput)		/* control input (from below) */
+	void	*(*pr_ctlinput)		/* control input (from below) */
 			(int, const struct sockaddr *, void *);
 	int	(*pr_ctloutput)		/* control output (from above) */
 			(int, struct socket *, int, int, struct mbuf **);
--- a/sys/netinet6/mld6.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/mld6.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: mld6.c,v 1.44 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: mld6.c,v 1.45 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
 
 /*
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.44 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.45 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 
@@ -110,6 +110,7 @@
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
@@ -221,7 +222,9 @@
 mld_timeo(void *arg)
 {
 	struct in6_multi *in6m = arg;
-	int s = splsoftnet();
+
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
 
@@ -234,7 +237,8 @@
 		break;
 	}
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 static u_long
@@ -674,7 +678,7 @@
 			return (NULL);
 		}
 
-		callout_init(&in6m->in6m_timer_ch, 0);
+		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
 		in6m->in6m_timer = timer;
 		if (in6m->in6m_timer > 0) {
--- a/sys/netinet6/nd6.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/nd6.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nd6.c,v 1.125 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: nd6.c,v 1.126 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.125 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.126 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_ipsec.h"
 
@@ -41,6 +41,7 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
@@ -148,8 +149,8 @@
 
 	nd6_init_done = 1;
 
-	callout_init(&nd6_slowtimo_ch, 0);
-	callout_init(&nd6_timer_ch, 0);
+	callout_init(&nd6_slowtimo_ch, CALLOUT_MPSAFE);
+	callout_init(&nd6_timer_ch, CALLOUT_MPSAFE);
 
 	/* start timer */
 	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
@@ -410,20 +411,21 @@
 static void
 nd6_llinfo_timer(void *arg)
 {
-	int s;
 	struct llinfo_nd6 *ln;
 	struct rtentry *rt;
 	const struct sockaddr_in6 *dst;
 	struct ifnet *ifp;
 	struct nd_ifinfo *ndi = NULL;
 
-	s = splsoftnet();
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	ln = (struct llinfo_nd6 *)arg;
 
 	if (ln->ln_ntick > 0) {
 		nd6_llinfo_settimer(ln, ln->ln_ntick);
-		splx(s);
+		KERNEL_UNLOCK_ONE(NULL);
+		mutex_exit(softnet_lock);
 		return;
 	}
 
@@ -509,7 +511,8 @@
 		break;
 	}
 
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -518,16 +521,17 @@
 void
 nd6_timer(void *ignored_arg)
 {
-	int s;
 	struct nd_defrouter *next_dr, *dr;
 	struct nd_prefix *next_pr, *pr;
 	struct in6_ifaddr *ia6, *nia6;
 	struct in6_addrlifetime *lt6;
 
-	s = splsoftnet();
 	callout_reset(&nd6_timer_ch, nd6_prune * hz,
 	    nd6_timer, NULL);
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+
 	/* expire default router list */
 	
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr != NULL; dr = next_dr) {
@@ -628,7 +632,9 @@
 			prelist_remove(pr);
 		}
 	}
-	splx(s);
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /* ia6: deprecated/invalidated temporary address */
@@ -1272,7 +1278,7 @@
 		nd6_allocated++;
 		bzero(ln, sizeof(*ln));
 		ln->ln_rt = rt;
-		callout_init(&ln->ln_timer_ch, 0);
+		callout_init(&ln->ln_timer_ch, CALLOUT_MPSAFE);
 		/* this is required for "ndp" command. - shin */
 		if (req == RTM_ADD) {
 		        /*
@@ -1870,11 +1876,12 @@
 static void
 nd6_slowtimo(void *ignored_arg)
 {
-	int s = splsoftnet();
 	struct nd_ifinfo *nd6if;
 	struct ifnet *ifp;
 
-	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+      	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, NULL);
 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
 		nd6if = ND_IFINFO(ifp);
@@ -1890,7 +1897,8 @@
 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 		}
 	}
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 #define senderr(e) { error = (e); goto bad;}
--- a/sys/netinet6/nd6_nbr.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/nd6_nbr.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nd6_nbr.c,v 1.85 2008/04/15 03:57:04 thorpej Exp $	*/
+/*	$NetBSD: nd6_nbr.c,v 1.86 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: nd6_nbr.c,v 1.61 2001/02/10 16:06:14 jinmei Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nd6_nbr.c,v 1.85 2008/04/15 03:57:04 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nd6_nbr.c,v 1.86 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -41,6 +41,7 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
@@ -1099,7 +1100,7 @@
 		return;
 	}
 	bzero(dp, sizeof(*dp));
-	callout_init(&dp->dad_timer_ch, 0);
+	callout_init(&dp->dad_timer_ch, CALLOUT_MPSAFE);
 	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
 
 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
@@ -1151,11 +1152,11 @@
 static void
 nd6_dad_timer(struct ifaddr *ifa)
 {
-	int s;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
 
-	s = splsoftnet();	/* XXX */
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 
 	/* Sanity check */
 	if (ia == NULL) {
@@ -1248,7 +1249,8 @@
 	}
 
 done:
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 void
--- a/sys/netinet6/raw_ip6.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/raw_ip6.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: raw_ip6.c,v 1.97 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: raw_ip6.c,v 1.98 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: raw_ip6.c,v 1.82 2001/07/23 18:57:56 jinmei Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: raw_ip6.c,v 1.97 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: raw_ip6.c,v 1.98 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_ipsec.h"
 
@@ -293,7 +293,7 @@
 	return IPPROTO_DONE;
 }
 
-void
+void *
 rip6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	struct ip6_hdr *ip6;
@@ -305,10 +305,10 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
@@ -316,7 +316,7 @@
 	else if (cmd == PRC_MSGSIZE)
 		; /* special code is present, see below */
 	else if (inet6ctlerrmap[cmd] == 0)
-		return;
+		return NULL;;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -385,6 +385,7 @@
 
 	(void) in6_pcbnotify(&raw6cbtable, sa, 0,
 	    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
+	return NULL;
 }
 
 /*
@@ -627,14 +628,17 @@
 		    (struct ifnet *)control, l);
 
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		in6_pcbpurgeif0(&raw6cbtable, (struct ifnet *)control);
 		in6_purgeif((struct ifnet *)control);
 		in6_pcbpurgeif(&raw6cbtable, (struct ifnet *)control);
+		mutex_exit(softnet_lock);
 		return 0;
 	}
 
 	switch (req) {
 	case PRU_ATTACH:
+		sosetlock(so);
 		if (in6p != NULL)
 			panic("rip6_attach");
 		if (!priv) {
--- a/sys/netinet6/udp6_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/udp6_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: udp6_usrreq.c,v 1.83 2008/04/23 05:26:50 thorpej Exp $	*/
+/*	$NetBSD: udp6_usrreq.c,v 1.84 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: udp6_usrreq.c,v 1.86 2001/05/27 17:33:00 itojun Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: udp6_usrreq.c,v 1.83 2008/04/23 05:26:50 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: udp6_usrreq.c,v 1.84 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
@@ -132,7 +132,7 @@
 	sowwakeup(in6p->in6p_socket);
 }
 
-void
+void *
 udp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 {
 	struct udphdr uh;
@@ -151,10 +151,10 @@
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
-		return;
+		return NULL;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
-		return;
+		return NULL;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
@@ -164,7 +164,7 @@
 		notify = in6_rtchange;
 	}
 	else if (inet6ctlerrmap[cmd] == 0)
-		return;
+		return NULL;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
@@ -192,7 +192,7 @@
 		if (m->m_pkthdr.len < off + sizeof(*uhp)) {
 			if (cmd == PRC_MSGSIZE)
 				icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
-			return;
+			return NULL;
 		}
 
 		bzero(&uh, sizeof(uh));
@@ -249,6 +249,7 @@
 		(void) in6_pcbnotify(&udbtable, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 	}
+	return NULL;
 }
 
 extern	int udp6_sendspace;
@@ -277,15 +278,17 @@
 				   (struct ifnet *)control, l);
 
 	if (req == PRU_PURGEIF) {
-		s = splsoftnet();
+		mutex_enter(softnet_lock);
 		in6_pcbpurgeif0(&udbtable, (struct ifnet *)control);
 		in6_purgeif((struct ifnet *)control);
 		in6_pcbpurgeif(&udbtable, (struct ifnet *)control);
-		splx(s);
+		mutex_exit(softnet_lock);
 		return 0;
 	}
 
-	if (in6p == NULL && req != PRU_ATTACH) {
+	if (req == PRU_ATTACH)
+		sosetlock(so);
+	else if (in6p == NULL) {
 		error = EINVAL;
 		goto release;
 	}
--- a/sys/netinet6/udp6_var.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netinet6/udp6_var.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: udp6_var.h,v 1.22 2008/04/15 04:43:25 thorpej Exp $	*/
+/*	$NetBSD: udp6_var.h,v 1.23 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: udp6_var.h,v 1.11 2000/06/05 00:14:31 itojun Exp $	*/
 
 /*
@@ -99,7 +99,7 @@
 }
 
 #ifdef _KERNEL
-void	udp6_ctlinput(int, const struct sockaddr *, void *);
+void	*udp6_ctlinput(int, const struct sockaddr *, void *);
 void	udp6_init(void);
 int	udp6_input(struct mbuf **, int *, int);
 int	udp6_output(struct in6pcb *, struct mbuf *, struct mbuf *,
--- a/sys/netipsec/key.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netipsec/key.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: key.c,v 1.52 2008/04/23 07:29:47 thorpej Exp $	*/
+/*	$NetBSD: key.c,v 1.53 2008/04/24 11:38:38 ad Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $	*/
 /*	$KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $	*/
 	
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.52 2008/04/23 07:29:47 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.53 2008/04/24 11:38:38 ad Exp $");
 
 /*
  * This code is referd to RFC 2367
@@ -4451,7 +4451,7 @@
  * time handler.
  * scanning SPD and SAD to check status for each entries,
  * and do to remove or to expire.
- * XXX: year 2038 problem may remain.
+ * XXX2038: year 2038 problem may remain.
  */
 void
 key_timehandler(void* arg)
@@ -4461,6 +4461,7 @@
 	time_t now = time_second;
 
 	s = splsoftnet();	/*called from softclock()*/
+	mutex_enter(softnet_lock);
 
 	/* SPD */
     {
@@ -4707,6 +4708,7 @@
 	callout_reset(&key_timehandler_ch, hz, key_timehandler, NULL);
 #endif /* IPSEC_DEBUG2 */
 
+	mutex_exit(softnet_lock);
 	splx(s);
 	return;
 }
--- a/sys/netipsec/keysock.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netipsec/keysock.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: keysock.c,v 1.15 2008/04/23 07:29:47 thorpej Exp $	*/
+/*	$NetBSD: keysock.c,v 1.16 2008/04/24 11:38:38 ad Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/keysock.c,v 1.3.2.1 2003/01/24 05:11:36 sam Exp $	*/
 /*	$KAME: keysock.c,v 1.25 2001/08/13 20:07:41 itojun Exp $	*/
 
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: keysock.c,v 1.15 2008/04/23 07:29:47 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: keysock.c,v 1.16 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_ipsec.h"
 
@@ -641,6 +641,7 @@
 	s = splsoftnet();
 	if (req == PRU_ATTACH) {
 		kp = (struct keycb *)malloc(sizeof(*kp), M_PCB, M_WAITOK);
+		sosetlock(so);
 		so->so_pcb = kp;
 		if (so->so_pcb)
 			bzero(so->so_pcb, sizeof(*kp));
--- a/sys/netipsec/xform_ipip.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netipsec/xform_ipip.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: xform_ipip.c,v 1.22 2008/04/23 06:09:05 thorpej Exp $	*/
+/*	$NetBSD: xform_ipip.c,v 1.23 2008/04/24 11:38:38 ad Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/xform_ipip.c,v 1.3.2.1 2003/01/24 05:11:36 sam Exp $	*/
 /*	$OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */
 
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xform_ipip.c,v 1.22 2008/04/23 06:09:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xform_ipip.c,v 1.23 2008/04/24 11:38:38 ad Exp $");
 
 /*
  * IP-inside-IP processing
@@ -686,6 +686,11 @@
 };
 
 #ifdef INET
+PR_WRAP_CTLOUTPUT(rip_ctloutput)
+PR_WRAP_CTLOUTPUT(rip_usrreq)
+#define	rip_ctloutput	rip_ctloutput_wrapper
+#define	rip_usrreq	rip_usrreq_wrapper
+
 extern struct domain inetdomain;
 static struct ipprotosw ipe4_protosw = {
  .pr_type = SOCK_RAW,
@@ -704,6 +709,11 @@
 };
 #endif
 #ifdef INET6
+PR_WRAP_CTLOUTPUT(rip6_ctloutput)
+PR_WRAP_CTLOUTPUT(rip6_usrreq)
+#define	rip6_ctloutput	rip6_ctloutput_wrapper
+#define	rip6_usrreq	rip6_usrreq_wrapper
+
 extern struct domain inet6domain;
 static struct ip6protosw ipe4_protosw6 = {
  .pr_type = SOCK_RAW,
--- a/sys/netiso/clnp_timer.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/clnp_timer.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: clnp_timer.c,v 1.13 2005/12/11 12:25:12 christos Exp $	*/
+/*	$NetBSD: clnp_timer.c,v 1.14 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: clnp_timer.c,v 1.13 2005/12/11 12:25:12 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: clnp_timer.c,v 1.14 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -145,9 +145,11 @@
 void
 clnp_slowtimo(void)
 {
-	struct clnp_fragl *cfh = clnp_frags;
-	int s = splsoftnet();
+	struct clnp_fragl *cfh;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+	cfh = clnp_frags;
 	while (cfh != NULL) {
 		if (--cfh->cfl_ttl == 0) {
 			cfh = clnp_freefrags(cfh);
@@ -156,7 +158,8 @@
 			cfh = cfh->cfl_next;
 		}
 	}
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -174,8 +177,13 @@
 void
 clnp_drain(void)
 {
-	struct clnp_fragl *cfh = clnp_frags;
+	struct clnp_fragl *cfh;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
+	cfh = clnp_frags;
 	while (cfh != NULL)
 		cfh = clnp_freefrags(cfh);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
--- a/sys/netiso/cltp_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/cltp_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cltp_usrreq.c,v 1.32 2007/03/04 06:03:31 christos Exp $	*/
+/*	$NetBSD: cltp_usrreq.c,v 1.33 2008/04/24 11:38:38 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cltp_usrreq.c,v 1.32 2007/03/04 06:03:31 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cltp_usrreq.c,v 1.33 2008/04/24 11:38:38 ad Exp $");
 
 #ifndef CLTPOVAL_SRC		/* XXX -- till files gets changed */
 #include <sys/param.h>
@@ -298,7 +298,9 @@
 		    (struct ifnet *)control, l));
 
 	if (req == PRU_PURGEIF) {
+		mutex_enter(softnet_lock);
 		iso_purgeif((struct ifnet *)control);
+		mutex_exit(softnet_lock);
 		return (0);
 	}
 
--- a/sys/netiso/iso_pcb.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/iso_pcb.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: iso_pcb.c,v 1.43 2008/04/23 09:57:59 plunky Exp $	*/
+/*	$NetBSD: iso_pcb.c,v 1.44 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: iso_pcb.c,v 1.43 2008/04/23 09:57:59 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: iso_pcb.c,v 1.44 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_iso.h"
 
@@ -488,7 +488,9 @@
 	if (so) {		/* in the x.25 domain, we sometimes have no
 				 * socket */
 		so->so_pcb = 0;
+		/* sofree drops the lock */
 		sofree(so);
+		mutex_enter(softnet_lock);
 	}
 #ifdef ARGO_DEBUG
 	if (argo_debug[D_ISO]) {
--- a/sys/netiso/iso_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/iso_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: iso_proto.c,v 1.27 2008/04/23 09:57:59 plunky Exp $	*/
+/*	$NetBSD: iso_proto.c,v 1.28 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: iso_proto.c,v 1.27 2008/04/23 09:57:59 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: iso_proto.c,v 1.28 2008/04/24 11:38:38 ad Exp $");
 
 
 #include <sys/param.h>
@@ -97,6 +97,32 @@
 
 DOMAIN_DEFINE(isodomain);	/* forward declare and add to link set */
 
+/* Wrappers to acquire kernel_lock. */
+
+PR_WRAP_USRREQ(cltp_usrreq)
+PR_WRAP_USRREQ(clnp_usrreq)
+PR_WRAP_USRREQ(idrp_usrreq)
+PR_WRAP_USRREQ(tp_usrreq)
+PR_WRAP_USRREQ(esis_usrreq)
+
+#define	cltp_usrreq	cltp_usrreq_wrapper
+#define	clnp_usrreq	clnp_usrreq_wrapper
+#define	idrp_usrreq	idrp_usrreq_wrapper
+#define	tp_usrreq	tp_usrreq_wrapper
+#define	esis_usrreq	esis_usrreq_wrapper
+
+PR_WRAP_CTLOUTPUT(rclnp_ctloutput)
+PR_WRAP_CTLOUTPUT(tp_ctloutput)
+
+#define	rclnp_ctloutput	rclnp_ctloutput_wrapper
+#define	tp_ctloutput	tp_ctloutput_wrapper
+
+PR_WRAP_CTLINPUT(esis_ctlinput)
+PR_WRAP_CTLINPUT(tpclnp_ctlinput)
+
+#define	esis_ctlinput	esis_ctlinput_wrapper
+#define	tpclnp_ctlinput	tpclnp_ctlinput_wrapper
+
 const struct protosw  isosw[] = {
 	/*
 	 *  We need a datagram entry through which net mgmt programs can get
--- a/sys/netiso/tp_timer.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/tp_timer.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tp_timer.c,v 1.18 2007/03/04 06:03:33 christos Exp $	*/
+/*	$NetBSD: tp_timer.c,v 1.19 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tp_timer.c,v 1.18 2007/03/04 06:03:33 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tp_timer.c,v 1.19 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -188,8 +188,10 @@
 	struct tp_ref *rp;
 	struct tp_pcb  *tpcb;
 	struct tp_event E;
-	int             s = splsoftnet(), t;
+	int             t;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	/* check only open reference structures */
 	IncStat(ts_Cticks);
 	/* tp_ref[0] is never used */
@@ -226,7 +228,8 @@
 			}
 		}
 	}
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 /*
@@ -281,9 +284,10 @@
 tp_fasttimo(void)
 {
 	struct tp_pcb *t;
-	int             s = splsoftnet();
 	struct tp_event E;
 
+	mutex_enter(softnet_lock);
+	KERNEL_LOCK(1, NULL);
 	E.ev_number = TM_sendack;
 	while ((t = tp_ftimeolist) != (struct tp_pcb *) & tp_ftimeolist) {
 		if (t == 0) {
@@ -300,7 +304,8 @@
 			t->tp_fasttimeo = 0;
 		}
 	}
-	splx(s);
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
 }
 
 #ifdef TP_DEBUG_TIMERS
--- a/sys/netiso/tp_usrreq.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netiso/tp_usrreq.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: tp_usrreq.c,v 1.34 2008/02/06 21:57:55 ad Exp $	*/
+/*	$NetBSD: tp_usrreq.c,v 1.35 2008/04/24 11:38:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tp_usrreq.c,v 1.34 2008/02/06 21:57:55 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tp_usrreq.c,v 1.35 2008/04/24 11:38:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -381,7 +381,6 @@
 	struct mbuf *control, struct lwp *l)
 {
 	struct tp_pcb *tpcb;
-	int             s;
 	int             error = 0;
 	int             flags, *outflags = &flags;
 	u_long          eotsdu = 0;
@@ -404,7 +403,6 @@
 	if (req == PRU_CONTROL)
 		return (EOPNOTSUPP);
 
-	s = splsoftnet();
 	tpcb = sototpcb(so);
 	if (tpcb == 0 && req != PRU_ATTACH) {
 #ifdef TPPT
@@ -725,7 +723,6 @@
 		/*
 		 * stat: don't bother with a blocksize.
 		 */
-		splx(s);
 		return (0);
 
 	case PRU_SOCKADDR:
@@ -757,7 +754,6 @@
 	}
 #endif
 release:
-	splx(s);
 	return error;
 }
 
--- a/sys/netkey/key.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netkey/key.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: key.c,v 1.158 2008/04/23 07:29:47 thorpej Exp $	*/
+/*	$NetBSD: key.c,v 1.159 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: key.c,v 1.310 2003/09/08 02:23:44 itojun Exp $	*/
 
 /*
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.158 2008/04/23 07:29:47 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.159 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -4651,7 +4651,7 @@
  * time handler.
  * scanning SPD and SAD to check status for each entries,
  * and do to remove or to expire.
- * XXX: year 2038 problem may remain.
+ * XXX2038: year 2038 problem may remain.
  */
 void
 key_timehandler(void *arg)
@@ -4663,6 +4663,7 @@
 	getmicrotime(&tv);
 
 	s = splsoftnet();	/*called from softclock()*/
+	mutex_enter(softnet_lock);
 
 	/* SPD */
     {
@@ -4925,6 +4926,7 @@
 
 	callout_reset(&key_timehandler_ch, hz, key_timehandler, (void *)0);
 
+	mutex_exit(softnet_lock);
 	splx(s);
 	return;
 }
--- a/sys/netkey/keysock.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netkey/keysock.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: keysock.c,v 1.46 2008/04/23 07:29:47 thorpej Exp $	*/
+/*	$NetBSD: keysock.c,v 1.47 2008/04/24 11:38:38 ad Exp $	*/
 /*	$KAME: keysock.c,v 1.32 2003/08/22 05:45:08 itojun Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: keysock.c,v 1.46 2008/04/23 07:29:47 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: keysock.c,v 1.47 2008/04/24 11:38:38 ad Exp $");
 
 #include "opt_inet.h"
 
@@ -456,6 +456,10 @@
 
 DOMAIN_DEFINE(keydomain);
 
+PR_WRAP_USRREQ(key_usrreq)
+
+#define	key_usrreq	key_usrreq_wrapper
+
 const struct protosw keysw[] = {
 { .pr_type = SOCK_RAW,
   .pr_domain = &keydomain,
--- a/sys/netnatm/natm.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netnatm/natm.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: natm.c,v 1.14 2007/03/04 06:03:35 christos Exp $	*/
+/*	$NetBSD: natm.c,v 1.15 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  *
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: natm.c,v 1.14 2007/03/04 06:03:35 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: natm.c,v 1.15 2008/04/24 11:38:39 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -137,7 +137,9 @@
 
       npcb_free(npcb, NPCB_DESTROY);	/* drain */
       so->so_pcb = NULL;
+      /* sofree drops the lock */
       sofree(so);
+      mutex_enter(softnet_lock);
 
       break;
 
@@ -359,12 +361,15 @@
   struct socket *so;
   struct natmpcb *npcb;
 
+  mutex_enter(softnet_lock);
 next:
   s = splnet();
   IF_DEQUEUE(&natmintrq, m);
   splx(s);
-  if (m == NULL)
+  if (m == NULL) {
+    mutex_exit(softnet_lock);
     return;
+  }
 
 #ifdef DIAGNOSTIC
   if ((m->m_flags & M_PKTHDR) == 0)
--- a/sys/netnatm/natm_proto.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netnatm/natm_proto.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: natm_proto.c,v 1.12 2007/12/07 19:46:19 elad Exp $	*/
+/*	$NetBSD: natm_proto.c,v 1.13 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  *
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: natm_proto.c,v 1.12 2007/12/07 19:46:19 elad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: natm_proto.c,v 1.13 2008/04/24 11:38:39 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -64,6 +64,10 @@
 struct	ifqueue natmintrq;       	/* natm packet input queue */
 int	natmqmaxlen = IFQ_MAXLEN;	/* max # of packets on queue */
 
+PR_WRAP_USRREQ(natm_usrreq)
+
+#define	natm_usrreq	natm_usrreq_wrapper
+
 const struct protosw natmsw[] = {
 { .pr_type = SOCK_STREAM,
   .pr_domain = &natmdomain,
--- a/sys/netsmb/smb_trantcp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/netsmb/smb_trantcp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: smb_trantcp.c,v 1.33 2008/03/22 18:04:42 ad Exp $	*/
+/*	$NetBSD: smb_trantcp.c,v 1.34 2008/04/24 11:38:39 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smb_trantcp.c,v 1.33 2008/03/22 18:04:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smb_trantcp.c,v 1.34 2008/04/24 11:38:39 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -105,19 +105,8 @@
 static int nb_tcprcvbuf = NB_RCVQ;
 static const struct timeval nb_timo = { 15, 0 };	/* XXX sysctl? */
 
-#ifndef __NetBSD__
-SYSCTL_DECL(_net_smb);
-SYSCTL_INT(_net_smb, OID_AUTO, tcpsndbuf, CTLFLAG_RW, &nb_tcpsndbuf, 0, "");
-SYSCTL_INT(_net_smb, OID_AUTO, tcprcvbuf, CTLFLAG_RW, &nb_tcprcvbuf, 0, "");
-#endif
-
-#ifndef __NetBSD__
-#define nb_sosend(so,m,flags,p) (so)->so_proto->pr_usrreqs->pru_sosend( \
-				    so, NULL, 0, m, 0, flags, p)
-#else
 #define nb_sosend(so,m,flags,l) (*(so)->so_send)(so, NULL, (struct uio *)0, \
 					m, (struct mbuf *)0, flags, l)
-#endif
 
 static int  nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, int *lenp,
 	u_int8_t *rpcodep, struct lwp *l);
@@ -126,18 +115,7 @@
 static int
 nb_setsockopt_int(struct socket *so, int level, int name, int val)
 {
-#ifdef __NetBSD__
 	return sosetopt(so, level, name, NULL); /* XXX */
-#else
-	struct sockopt sopt;
-
-	bzero(&sopt, sizeof(sopt));
-	sopt.sopt_level = level;
-	sopt.sopt_name = name;
-	sopt.sopt_val = &val;
-	sopt.sopt_valsize = sizeof(val);
-	return sosetopt(so, &sopt);
-#endif
 }
 
 static int
@@ -199,55 +177,53 @@
 nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct lwp *l)
 {
 	struct socket *so;
-	int error, s;
-#ifdef __NetBSD__
+	int error;
 	struct mbuf *m;
-#endif
 
-	error = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP, l);
+	error = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP, l, NULL);
 	if (error)
 		return error;
+	solock(so);
 	nbp->nbp_tso = so;
 	so->so_upcallarg = (void *)nbp;
 	so->so_upcall = nb_upcall;
 	so->so_rcv.sb_flags |= SB_UPCALL;
+	so->so_rcv.sb_flags &= ~SB_NOINTR;
+	so->so_snd.sb_flags &= ~SB_NOINTR;
 	so->so_rcv.sb_timeo = NB_SNDTIMEO;
 	so->so_snd.sb_timeo = NB_RCVTIMEO;
 	error = soreserve(so, nb_tcpsndbuf, nb_tcprcvbuf);
+	sounlock(so);
 	if (error)
 		goto bad;
 	nb_setsockopt_int(so, SOL_SOCKET, SO_KEEPALIVE, 1);
 	nb_setsockopt_int(so, IPPROTO_TCP, TCP_NODELAY, 1);
-	so->so_rcv.sb_flags &= ~SB_NOINTR;
-	so->so_snd.sb_flags &= ~SB_NOINTR;
-#ifndef __NetBSD__
-	error = soconnect(so, (struct sockaddr*)to, l);
-#else
 	m = m_get(M_WAIT, MT_SONAME);
 	*mtod(m, struct sockaddr *) = *(struct sockaddr *)to;
 	m->m_len = sizeof(struct sockaddr);
+	solock(so);
 	error = soconnect(so, m, l);
 	m_free(m);
-#endif
-	if (error)
+	if (error) {
+		sounlock(so);
 		goto bad;
-	s = splnet();
+	}
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		tsleep(&so->so_timeo, PSOCK, "smbcon", 2 * hz);
+		sowait(so, 2 * hz);
 		if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 &&
 			(error = nb_intr(nbp, l)) != 0) {
 			so->so_state &= ~SS_ISCONNECTING;
-			splx(s);
+			sounlock(so);
 			goto bad;
 		}
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
-		splx(s);
+		sounlock(so);
 		goto bad;
 	}
-	splx(s);
+	sounlock(so);
 	return 0;
 bad:
 	smb_nbst_disconnect(nbp->nbp_vc, l);
@@ -348,15 +324,7 @@
 	auio.uio_offset = 0;
 	auio.uio_resid = sizeof(len);
 	UIO_SETUP_SYSSPACE(&auio);
-#ifndef __NetBSD__
-	error = so->so_proto->pr_usrreqs->pru_soreceive
-	    (so, (struct sockaddr **)NULL, &auio,
-	    (struct mbuf **)NULL, (struct mbuf **)NULL, &flags);
-#else
-	error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
-				  (struct mbuf **)NULL,
-				  (struct mbuf **)NULL, &flags);
-#endif
+	error = (*so->so_receive)(so, NULL, &auio, NULL, NULL, &flags);
 	if (error)
 		return error;
 	if (auio.uio_resid > 0) {
@@ -441,15 +409,8 @@
 			 */
 			do {
 				rcvflg = MSG_WAITALL;
-#ifdef __NetBSD__
-				error = (*so->so_receive)(so, (struct mbuf **)0,
-					&auio, &tm, (struct mbuf **)NULL,
-					&rcvflg);
-#else
-				error = so->so_proto->pr_usrreqs->pru_soreceive
-				    (so, (struct sockaddr **)NULL,
-				    &auio, &tm, (struct mbuf **)NULL, &rcvflg);
-#endif
+				error = (*so->so_receive)(so, NULL, &auio, &tm,
+				    NULL, &rcvflg);
 			} while (error == EWOULDBLOCK || error == EINTR ||
 				 error == ERESTART);
 			if (error)
@@ -610,7 +571,9 @@
 	if ((so = nbp->nbp_tso) != NULL) {
 		nbp->nbp_flags &= ~NBF_CONNECTED;
 		nbp->nbp_tso = (struct socket *)NULL;
+		solock(so);
 		soshutdown(so, 2);
+		sounlock(so);
 		soclose(so);
 	}
 	if (nbp->nbp_state != NBST_RETARGET) {
@@ -666,11 +629,15 @@
 smb_nbst_intr(struct smb_vc *vcp)
 {
 	struct nbpcb *nbp = vcp->vc_tdata;
+	struct socket *so;
 
-	if (nbp == NULL || nbp->nbp_tso == NULL)
+	if (nbp == NULL || (so = nbp->nbp_tso) == NULL)
 		return;
-	sorwakeup(nbp->nbp_tso);
-	sowwakeup(nbp->nbp_tso);
+	
+	solock(so);
+	sorwakeup(so);
+	sowwakeup(so);
+	sounlock(so);
 }
 
 static int
@@ -733,4 +700,3 @@
 	smb_nbst_fatal,
 	{ NULL, NULL },
 };
-
--- a/sys/nfs/krpc_subr.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/nfs/krpc_subr.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: krpc_subr.c,v 1.32 2007/03/04 06:03:36 christos Exp $	*/
+/*	$NetBSD: krpc_subr.c,v 1.33 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon Ross, Adam Glass
@@ -43,7 +43,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: krpc_subr.c,v 1.32 2007/03/04 06:03:36 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: krpc_subr.c,v 1.33 2008/04/24 11:38:39 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -246,7 +246,7 @@
 	/*
 	 * Create socket and set its receive timeout.
 	 */
-	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, l)))
+	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, l, NULL)))
 		goto out;
 
 	if ((error = nfs_boot_setrecvtimo(so)))
--- a/sys/nfs/nfs_boot.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/nfs/nfs_boot.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_boot.c,v 1.70 2008/04/05 13:49:36 cegger Exp $	*/
+/*	$NetBSD: nfs_boot.c,v 1.71 2008/04/24 11:38:39 ad Exp $	*/
 
 /*-
  * Copyright (c) 1995, 1997 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_boot.c,v 1.70 2008/04/05 13:49:36 cegger Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_boot.c,v 1.71 2008/04/24 11:38:39 ad Exp $");
 
 #include "opt_nfs.h"
 #include "opt_tftproot.h"
@@ -206,7 +206,7 @@
 	 * Get a socket to use for various things in here.
 	 * After this, use "goto out" to cleanup and return.
 	 */
-	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp);
+	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
 	if (error) {
 		printf("ifupdown: socreate, error=%d\n", error);
 		return (error);
@@ -255,7 +255,7 @@
 	 * Get a socket to use for various things in here.
 	 * After this, use "goto out" to cleanup and return.
 	 */
-	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp);
+	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
 	if (error) {
 		printf("setaddress: socreate, error=%d\n", error);
 		return (error);
@@ -315,7 +315,7 @@
 	 * Get a socket to use for various things in here.
 	 * After this, use "goto out" to cleanup and return.
 	 */
-	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp);
+	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
 	if (error) {
 		printf("deladdress: socreate, error=%d\n", error);
 		return (error);
--- a/sys/nfs/nfs_bootdhcp.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/nfs/nfs_bootdhcp.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_bootdhcp.c,v 1.37 2007/12/20 16:19:38 dyoung Exp $	*/
+/*	$NetBSD: nfs_bootdhcp.c,v 1.38 2008/04/24 11:38:39 ad Exp $	*/
 
 /*-
  * Copyright (c) 1995, 1997 The NetBSD Foundation, Inc.
@@ -51,7 +51,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_bootdhcp.c,v 1.37 2007/12/20 16:19:38 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_bootdhcp.c,v 1.38 2008/04/24 11:38:39 ad Exp $");
 
 #include "opt_nfs_boot.h"
 #include "opt_tftproot.h"
@@ -459,7 +459,7 @@
 	int vcilen;
 #endif
 
-	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp);
+	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, lwp, NULL);
 	if (error) {
 		printf("bootp: socreate, error=%d\n", error);
 		return (error);
--- a/sys/nfs/nfs_socket.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/nfs/nfs_socket.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_socket.c,v 1.169 2008/04/10 12:32:37 yamt Exp $	*/
+/*	$NetBSD: nfs_socket.c,v 1.170 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993, 1995
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.169 2008/04/10 12:32:37 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.170 2008/04/24 11:38:39 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_nfs.h"
@@ -192,7 +192,7 @@
 	struct lwp *l;
 {
 	struct socket *so;
-	int s, error, rcvreserve, sndreserve;
+	int error, rcvreserve, sndreserve;
 	struct sockaddr *saddr;
 	struct sockaddr_in *sin;
 #ifdef INET6
@@ -203,7 +203,7 @@
 	nmp->nm_so = (struct socket *)0;
 	saddr = mtod(nmp->nm_nam, struct sockaddr *);
 	error = socreate(saddr->sa_family, &nmp->nm_so,
-		nmp->nm_sotype, nmp->nm_soproto, l);
+		nmp->nm_sotype, nmp->nm_soproto, l, NULL);
 	if (error)
 		goto bad;
 	so = nmp->nm_so;
@@ -262,40 +262,41 @@
 	 * Protocols that do not require connections may be optionally left
 	 * unconnected for servers that reply from a port other than NFS_PORT.
 	 */
+	solock(so);
 	if (nmp->nm_flag & NFSMNT_NOCONN) {
 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
+			sounlock(so);
 			error = ENOTCONN;
 			goto bad;
 		}
 	} else {
 		error = soconnect(so, nmp->nm_nam, l);
-		if (error)
+		if (error) {
+			sounlock(so);
 			goto bad;
+		}
 
 		/*
 		 * Wait for the connection to complete. Cribbed from the
 		 * connect system call but with the wait timing out so
 		 * that interruptible mounts don't hang here for a long time.
 		 */
-		s = splsoftnet();
 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-			(void) tsleep((void *)&so->so_timeo, PSOCK,
-				"nfscn1", 2 * hz);
+			(void)sowait(so, 2 * hz);
 			if ((so->so_state & SS_ISCONNECTING) &&
 			    so->so_error == 0 && rep &&
 			    (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){
 				so->so_state &= ~SS_ISCONNECTING;
-				splx(s);
+				sounlock(so);
 				goto bad;
 			}
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
+			sounlock(so);
 			goto bad;
 		}
-		splx(s);
 	}
 	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
 		so->so_rcv.sb_timeo = (5 * hz);
@@ -317,6 +318,7 @@
 		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
 		    NFS_MAXPKTHDR) * 2;
 	} else {
+		sounlock(so);
 		if (nmp->nm_sotype != SOCK_STREAM)
 			panic("nfscon sotype");
 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
@@ -337,12 +339,16 @@
 		    sizeof (u_int32_t)) * 2;
 		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
 		    sizeof (u_int32_t)) * 2;
+		solock(so);
 	}
 	error = soreserve(so, sndreserve, rcvreserve);
-	if (error)
+	if (error) {
+		sounlock(so);
 		goto bad;
+	}
 	so->so_rcv.sb_flags |= SB_NOINTR;
 	so->so_snd.sb_flags |= SB_NOINTR;
+	sounlock(so);
 
 	/* Initialize other non-zero congestion variables */
 	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
@@ -409,7 +415,9 @@
 	if (nmp->nm_so) {
 		so = nmp->nm_so;
 		nmp->nm_so = (struct socket *)0;
+		solock(so);
 		soshutdown(so, SHUT_RDWR);
+		sounlock(so);
 		drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
 		if (drain) {
 			/*
@@ -499,9 +507,7 @@
 	else
 		flags = 0;
 
-	KERNEL_LOCK(1, curlwp);
 	error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags,  l);
-	KERNEL_UNLOCK_ONE(curlwp);
 	if (error) {
 		if (rep) {
 			if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
@@ -1674,6 +1680,7 @@
 		 *	Resend it
 		 * Set r_rtt to -1 in case we fail to send it now.
 		 */
+		solock(so);
 		rep->r_rtt = -1;
 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
@@ -1715,6 +1722,7 @@
 				rep->r_rtt = 0;
 			}
 		}
+		sounlock(so);
 	}
 	splx(s);
 
@@ -2240,9 +2248,7 @@
 		auio.uio_resid = 1000000000;
 		/* not need to setup uio_vmspace */
 		flags = MSG_DONTWAIT;
-		KERNEL_LOCK(1, curlwp);
 		error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
-		KERNEL_UNLOCK_ONE(curlwp);
 		if (error || mp == NULL) {
 			if (error == EWOULDBLOCK)
 				setflags |= SLP_A_NEEDQ;
@@ -2278,10 +2284,8 @@
 			auio.uio_resid = 1000000000;
 			/* not need to setup uio_vmspace */
 			flags = MSG_DONTWAIT;
-			KERNEL_LOCK(1, curlwp);
 			error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
 			    &flags);
-			KERNEL_UNLOCK_ONE(curlwp);
 			if (mp) {
 				if (nam) {
 					m = nam;
--- a/sys/nfs/nfs_syscalls.c	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/nfs/nfs_syscalls.c	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_syscalls.c,v 1.133 2008/03/23 00:46:25 rmind Exp $	*/
+/*	$NetBSD: nfs_syscalls.c,v 1.134 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.133 2008/03/23 00:46:25 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.134 2008/04/24 11:38:39 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_nfs.h"
@@ -393,7 +393,7 @@
 	struct nfssvc_sock *slp;
 	struct socket *so;
 	struct nfssvc_sock *tslp;
-	int error, s;
+	int error;
 
 	so = (struct socket *)fp->f_data;
 	tslp = (struct nfssvc_sock *)0;
@@ -424,7 +424,9 @@
 		siz = NFS_MAXPACKET + sizeof (u_long);
 	else
 		siz = NFS_MAXPACKET;
+	solock(so);
 	error = soreserve(so, siz, siz);
+	sounlock(so);
 	if (error) {
 		m_freem(mynam);
 		return (error);
@@ -454,10 +456,12 @@
 		m->m_len = sizeof(int32_t);
 		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
 	}
+	solock(so);
 	so->so_rcv.sb_flags &= ~SB_NOINTR;
 	so->so_rcv.sb_timeo = 0;
 	so->so_snd.sb_flags &= ~SB_NOINTR;
 	so->so_snd.sb_timeo = 0;
+	sounlock(so);
 	if (tslp) {
 		slp = tslp;
 	} else {
@@ -473,13 +477,11 @@
 	slp->ns_aflags = SLP_A_NEEDQ;
 	slp->ns_gflags = 0;
 	slp->ns_sflags = 0;
-	KERNEL_LOCK(1, curlwp);
-	s = splsoftnet();
+	solock(so);
 	so->so_upcallarg = (void *)slp;
 	so->so_upcall = nfsrv_soupcall;
 	so->so_rcv.sb_flags |= SB_UPCALL;
-	splx(s);
-	KERNEL_UNLOCK_ONE(curlwp);
+	sounlock(so);
 	nfsrv_wakenfsd(slp);
 	return (0);
 }
@@ -802,7 +804,6 @@
 	struct nfsrv_descript *nwp;
 	struct socket *so;
 	struct mbuf *m;
-	int s;
 
 	if (nfsdsock_drain(slp)) {
 		return;
@@ -816,14 +817,12 @@
 
 	so = slp->ns_so;
 	KASSERT(so != NULL);
-	KERNEL_LOCK(1, curlwp);
-	s = splsoftnet();
+	solock(so);
 	so->so_upcall = NULL;
 	so->so_upcallarg = NULL;
 	so->so_rcv.sb_flags &= ~SB_UPCALL;
-	splx(s);
 	soshutdown(so, SHUT_RDWR);
-	KERNEL_UNLOCK_ONE(curlwp);
+	sounlock(so);
 
 	if (slp->ns_nam)
 		m_free(slp->ns_nam);
--- a/sys/sys/protosw.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/sys/protosw.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: protosw.h,v 1.42 2007/03/31 18:17:13 plunky Exp $	*/
+/*	$NetBSD: protosw.h,v 1.43 2008/04/24 11:38:39 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1993
@@ -261,6 +261,47 @@
 struct domain *pffinddomain(int);
 void pfctlinput(int, const struct sockaddr *);
 void pfctlinput2(int, const struct sockaddr *, void *);
+
+/*
+ * Wrappers for non-MPSAFE protocols
+ */
+#include <sys/systm.h>	/* kernel_lock */
+
+#define	PR_WRAP_USRREQ(name)				\
+static int						\
+name##_wrapper(struct socket *a, int b, struct mbuf *c,	\
+     struct mbuf *d, struct mbuf *e, struct lwp *f)	\
+{							\
+	int rv;						\
+	KERNEL_LOCK(1, NULL);				\
+	rv = name(a, b, c, d, e, f);			\
+	KERNEL_UNLOCK_ONE(NULL);			\
+	return rv;					\
+}
+
+#define	PR_WRAP_CTLOUTPUT(name)				\
+static int						\
+name##_wrapper(int a, struct socket *b, int c, int d,	\
+     struct mbuf **e)					\
+{							\
+	int rv;						\
+	KERNEL_LOCK(1, NULL);				\
+	rv = name(a, b, c, d, e);			\
+	KERNEL_UNLOCK_ONE(NULL);			\
+	return rv;					\
+}
+
+#define	PR_WRAP_CTLINPUT(name)				\
+static void *						\
+name##_wrapper(int a, const struct sockaddr *b, void *c)\
+{							\
+	void *rv;					\
+	KERNEL_LOCK(1, NULL);				\
+	rv = name(a, b, c);				\
+	KERNEL_UNLOCK_ONE(NULL);			\
+	return rv;					\
+}
+
 #endif /* _KERNEL */
 
 #endif /* !_SYS_PROTOSW_H_ */
--- a/sys/sys/socketvar.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/sys/socketvar.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,37 @@
-/*	$NetBSD: socketvar.h,v 1.104 2008/03/21 21:55:01 ad Exp $	*/
+/*	$NetBSD: socketvar.h,v 1.105 2008/04/24 11:38:39 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
@@ -37,6 +70,8 @@
 #include <sys/select.h>
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/queue.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
 
 #if !defined(_KERNEL) || defined(LKM)
 struct uio;
@@ -52,6 +87,8 @@
 struct sockbuf {
 	struct selinfo sb_sel;		/* process selecting read/write */
 	struct mowner *sb_mowner;	/* who owns data for this sockbuf */
+	struct socket *sb_so;		/* back pointer to socket */
+	kcondvar_t sb_cv;		/* notifier */
 	/* When re-zeroing this struct, we zero from sb_startzero to the end */
 #define	sb_startzero	sb_cc
 	u_long	sb_cc;			/* actual chars in buffer */
@@ -73,13 +110,10 @@
 #endif
 
 #define	SB_LOCK		0x01		/* lock on data queue */
-#define	SB_WANT		0x02		/* someone is waiting to lock */
-#define	SB_WAIT		0x04		/* someone is waiting for data/space */
-#define	SB_SEL		0x08		/* someone is selecting */
+#define	SB_NOTIFY	0x04		/* someone is waiting for data/space */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
-    	/* XXXLUKEM: 0x80 left for FreeBSD's SB_AIO */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 
@@ -90,6 +124,8 @@
  * private data and error information.
  */
 struct socket {
+	kmutex_t * volatile so_lock;	/* pointer to lock on structure */
+	kcondvar_t	so_cv;		/* notifier */
 	short		so_type;	/* generic type, see socket.h */
 	short		so_options;	/* from socket call, see socket.h */
 	u_short		so_linger;	/* time to linger while closing */
@@ -139,6 +175,7 @@
 
 #define	SB_EMPTY_FIXUP(sb)						\
 do {									\
+	KASSERT(solocked((sb)->sb_so));					\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
@@ -165,103 +202,12 @@
 					 */
 #define	SS_ISAPIPE 		0x1000	/* socket is implementing a pipe */
 
-
-/*
- * Macros for sockets and socket buffering.
- */
-
-/*
- * Do we need to notify the other side when I/O is possible?
- */
-#define	sb_notify(sb)	(((sb)->sb_flags & \
-	(SB_WAIT | SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE)) != 0)
-
-/*
- * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
- * This is problematical if the fields are unsigned, as the space might
- * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
- * overflow and return 0.
- */
-#define	sbspace(sb) \
-	(lmin((sb)->sb_hiwat - (sb)->sb_cc, (sb)->sb_mbmax - (sb)->sb_mbcnt))
-
-/* do we have to send all at once on a socket? */
-#define	sosendallatonce(so) \
-	((so)->so_proto->pr_flags & PR_ATOMIC)
-
-/* can we read something from so? */
-#define	soreadable(so) \
-	((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
-	    ((so)->so_state & SS_CANTRCVMORE) || \
-	    (so)->so_qlen || (so)->so_error)
-
-/* can we write something to so? */
-#define	sowritable(so) \
-	((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
-	    (((so)->so_state&SS_ISCONNECTED) || \
-	      ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
-	 ((so)->so_state & SS_CANTSENDMORE) || \
-	 (so)->so_error)
+#ifdef _KERNEL
 
-/* adjust counters in sb reflecting allocation of m */
-#define	sballoc(sb, m)							\
-do {									\
-	(sb)->sb_cc += (m)->m_len;					\
-	(sb)->sb_mbcnt += MSIZE;					\
-	if ((m)->m_flags & M_EXT)					\
-		(sb)->sb_mbcnt += (m)->m_ext.ext_size;			\
-} while (/* CONSTCOND */ 0)
-
-/* adjust counters in sb reflecting freeing of m */
-#define	sbfree(sb, m)							\
-do {									\
-	(sb)->sb_cc -= (m)->m_len;					\
-	(sb)->sb_mbcnt -= MSIZE;					\
-	if ((m)->m_flags & M_EXT)					\
-		(sb)->sb_mbcnt -= (m)->m_ext.ext_size;			\
-} while (/* CONSTCOND */ 0)
-
-/*
- * Set lock on sockbuf sb; sleep if lock is already held.
- * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
- * Returns error without lock if sleep is interrupted.
- */
-#define	sblock(sb, wf)							\
-	((sb)->sb_flags & SB_LOCK ?					\
-	    (((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) :		\
-	    ((sb)->sb_flags |= SB_LOCK, 0))
-
-/* release lock on sockbuf sb */
-#define	sbunlock(sb)							\
-do {									\
-	(sb)->sb_flags &= ~SB_LOCK;					\
-	if ((sb)->sb_flags & SB_WANT) {					\
-		(sb)->sb_flags &= ~SB_WANT;				\
-		wakeup((void *)&(sb)->sb_flags);			\
-	}								\
-} while (/* CONSTCOND */ 0)
-
-#define	sorwakeup(so)							\
-do {									\
-	if (sb_notify(&(so)->so_rcv))					\
-		sowakeup((so), &(so)->so_rcv, POLL_IN);				\
-} while (/* CONSTCOND */ 0)
-
-#define	sowwakeup(so)							\
-do {									\
-	if (sb_notify(&(so)->so_snd))					\
-		sowakeup((so), &(so)->so_snd, POLL_OUT);		\
-} while (/* CONSTCOND */ 0)
-
-#ifdef _KERNEL
 extern u_long		sb_max;
 extern int		somaxkva;
 extern int		sock_loan_thresh;
-
-/* strings for sleep message: */
-extern const char	netio[], netcon[], netcls[];
-
-extern struct pool	socket_pool;
+extern kmutex_t		*softnet_lock;
 
 struct mbuf;
 struct sockaddr;
@@ -303,7 +249,6 @@
 void	sbrelease(struct sockbuf *, struct socket *);
 int	sbreserve(struct sockbuf *, u_long, struct socket *);
 int	sbwait(struct sockbuf *);
-int	sb_lock(struct sockbuf *);
 int	sb_max_set(u_long);
 void	soinit(void);
 int	soabort(struct socket *);
@@ -314,7 +259,8 @@
 int	soclose(struct socket *);
 int	soconnect(struct socket *, struct mbuf *, struct lwp *);
 int	soconnect2(struct socket *, struct socket *);
-int	socreate(int, struct socket **, int, int, struct lwp *);
+int	socreate(int, struct socket **, int, int, struct lwp *,
+		 struct socket *);
 int	fsocreate(int, struct socket **, int, int, struct lwp *, int *);
 int	sodisconnect(struct socket *);
 void	sofree(struct socket *);
@@ -340,12 +286,20 @@
 void	sowakeup(struct socket *, struct sockbuf *, int);
 int	sockargs(struct mbuf **, const void *, size_t, int);
 int	sopoll(struct socket *, int);
+struct	socket *soget(bool);
+void	soput(struct socket *);
+bool	solocked(struct socket *);
+bool	solocked2(struct socket *, struct socket *);
+int	sblock(struct sockbuf *, int);
+void	sbunlock(struct sockbuf *);
+int	sowait(struct socket *, int);
+void	solockretry(struct socket *, kmutex_t *);
+void	sosetlock(struct socket *);
 
 int	copyout_sockname(struct sockaddr *, unsigned int *, int, struct mbuf *);
 int	copyout_msg_control(struct lwp *, struct msghdr *, struct mbuf *);
 void	free_control_mbuf(struct lwp *, struct mbuf *, struct mbuf *);
 
-
 int	do_sys_getsockname(struct lwp *, int, int, struct mbuf **);
 int	do_sys_sendmsg(struct lwp *, int, struct msghdr *, int, register_t *);
 int	do_sys_recvmsg(struct lwp *, int, struct msghdr *, struct mbuf **,
@@ -355,6 +309,137 @@
 int	do_sys_connect(struct lwp *, int, struct mbuf *);
 int	do_sys_accept(struct lwp *, int, struct mbuf **, register_t *);
 
+/*
+ * Inline functions for sockets and socket buffering.
+ */
+
+#include <sys/protosw.h>
+#include <sys/mbuf.h>
+
+/*
+ * Do we need to notify the other side when I/O is possible?
+ */
+static inline int
+sb_notify(struct sockbuf *sb)
+{
+
+	KASSERT(solocked(sb->sb_so));
+
+	return sb->sb_flags & (SB_NOTIFY | SB_ASYNC | SB_UPCALL | SB_KNOTE);
+}
+
+/*
+ * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
+ * This is problematical if the fields are unsigned, as the space might
+ * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
+ * overflow and return 0.
+ */
+static inline u_long
+sbspace(struct sockbuf *sb)
+{
+
+	KASSERT(solocked(sb->sb_so));
+
+	return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
+}
+
+/* do we have to send all at once on a socket? */
+static inline int
+sosendallatonce(struct socket *so)
+{
+
+	return so->so_proto->pr_flags & PR_ATOMIC;
+}
+
+/* can we read something from so? */
+static inline int
+soreadable(struct socket *so)
+{
+
+	KASSERT(solocked(so));
+
+	return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
+	    (so->so_state & SS_CANTRCVMORE) != 0 ||
+	    so->so_qlen != 0 || so->so_error != 0;
+}
+
+/* can we write something to so? */
+static inline int
+sowritable(struct socket *so)
+{
+
+	KASSERT(solocked(so));
+
+	return (sbspace(&so->so_snd) >= so->so_snd.sb_lowat &&
+	    ((so->so_state & SS_ISCONNECTED) != 0 ||
+	    (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) ||
+	    (so->so_state & SS_CANTSENDMORE) != 0 ||
+	    so->so_error != 0;
+}
+
+/* adjust counters in sb reflecting allocation of m */
+static inline void
+sballoc(struct sockbuf *sb, struct mbuf *m)
+{
+
+	KASSERT(solocked(sb->sb_so));
+
+	sb->sb_cc += m->m_len;
+	sb->sb_mbcnt += MSIZE;
+	if (m->m_flags & M_EXT)
+		sb->sb_mbcnt += m->m_ext.ext_size;
+}
+
+/* adjust counters in sb reflecting freeing of m */
+static inline void
+sbfree(struct sockbuf *sb, struct mbuf *m)
+{
+
+	KASSERT(solocked(sb->sb_so));
+
+	sb->sb_cc -= m->m_len;
+	sb->sb_mbcnt -= MSIZE;
+	if (m->m_flags & M_EXT)
+		sb->sb_mbcnt -= m->m_ext.ext_size;
+}
+
+static inline void
+sorwakeup(struct socket *so)
+{
+
+	KASSERT(solocked(so));
+
+	if (sb_notify(&so->so_rcv))
+		sowakeup(so, &so->so_rcv, POLL_IN);
+}
+
+static inline void
+sowwakeup(struct socket *so)
+{
+
+	KASSERT(solocked(so));
+
+	if (sb_notify(&so->so_snd))
+		sowakeup(so, &so->so_snd, POLL_OUT);
+}
+
+static inline void
+solock(struct socket *so)
+{
+	kmutex_t *lock;
+
+	lock = so->so_lock;
+	mutex_enter(lock);
+	if (__predict_false(lock != so->so_lock))
+		solockretry(so, lock);
+}
+	
+static inline void
+sounlock(struct socket *so)
+{
+
+	mutex_exit(so->so_lock);
+}
 
 #ifdef SOCKBUF_DEBUG
 /*
@@ -369,9 +454,11 @@
 
 void	sblastmbufchk(struct sockbuf *, const char *);
 #define	SBLASTMBUFCHK(sb, where)	sblastmbufchk((sb), (where))
+#define	SBCHECK(sb)			sbcheck(sb)
 #else
 #define	SBLASTRECORDCHK(sb, where)	/* nothing */
 #define	SBLASTMBUFCHK(sb, where)	/* nothing */
+#define	SBCHECK(sb)			/* nothing */
 #endif /* SOCKBUF_DEBUG */
 
 /* sosend loan */
--- a/sys/sys/un.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/sys/un.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: un.h,v 1.42 2008/03/24 12:24:37 yamt Exp $	*/
+/*	$NetBSD: un.h,v 1.43 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -77,26 +77,29 @@
 int	uipc_usrreq(struct socket *, int, struct mbuf *,
 	    struct mbuf *, struct mbuf *, struct lwp *);
 int	uipc_ctloutput(int, struct socket *, int, int, struct mbuf **);
+void	uipc_init (void);
+kmutex_t *uipc_dgramlock (void);
+kmutex_t *uipc_streamlock (void);
+kmutex_t *uipc_rawlock (void);
 
 int	unp_attach (struct socket *);
-int	unp_bind (struct unpcb *, struct mbuf *, struct lwp *);
+int	unp_bind (struct socket *, struct mbuf *, struct lwp *);
 int	unp_connect (struct socket *, struct mbuf *, struct lwp *);
 int	unp_connect2 (struct socket *, struct socket *, int);
 void	unp_detach (struct unpcb *);
 void	unp_discard (struct file *);
 void	unp_disconnect (struct unpcb *);
-void	unp_drop (struct unpcb *, int);
+bool	unp_drop (struct unpcb *, int);
 void	unp_gc (void);
 void	unp_mark (struct file *);
 void	unp_scan (struct mbuf *, void (*)(struct file *), int);
 void	unp_shutdown (struct unpcb *);
 int 	unp_externalize (struct mbuf *, struct lwp *);
-int	unp_internalize (struct mbuf **, struct lwp *);
+int	unp_internalize (struct mbuf **);
 void 	unp_dispose (struct mbuf *);
 int	unp_output (struct mbuf *, struct mbuf *, struct unpcb *,
 	    struct lwp *);
-void	unp_setsockaddr (struct unpcb *, struct mbuf *);
-void	unp_setpeeraddr (struct unpcb *, struct mbuf *);
+void	unp_setaddr (struct socket *, struct mbuf *, bool);
 #else /* !_KERNEL */
 
 /* actual length of an initialized sockaddr_un */
--- a/sys/sys/unpcb.h	Thu Apr 24 11:36:51 2008 +0000
+++ b/sys/sys/unpcb.h	Thu Apr 24 11:38:36 2008 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: unpcb.h,v 1.16 2008/03/28 12:14:22 ad Exp $	*/
+/*	$NetBSD: unpcb.h,v 1.17 2008/04/24 11:38:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -35,6 +35,7 @@
 #define _SYS_UNPCB_H_
 
 #include <sys/un.h>
+#include <sys/mutex.h>
 
 /*
  * Protocol control block for an active
@@ -75,6 +76,7 @@
 	struct	unpcb *unp_refs;	/* referencing socket linked list */
 	struct 	unpcb *unp_nextref;	/* link in unp_refs list */
 	struct	sockaddr_un *unp_addr;	/* bound address of socket */
+	kmutex_t *unp_streamlock;	/* lock for est. stream connections */
 	size_t	unp_addrlen;		/* size of socket address */
 	int	unp_cc;			/* copy of rcv.sb_cc */
 	int	unp_mbcnt;		/* copy of rcv.sb_mbcnt */