- use O->A loan to serve read(2). based on a patch from Chuck Silvers yamt-pagecache
authoryamt <yamt@NetBSD.org>
Mon, 26 Dec 2011 16:03:10 +0000
branchyamt-pagecache
changeset 280333 a787b13760f0
parent 280332 d3b9869d9aa7
child 280334 1310804b6e11
- use O->A loan to serve read(2). based on a patch from Chuck Silvers - associated O->A loan fixes.
sys/kern/kern_mutex_obj.c
sys/sys/mutex.h
sys/uvm/uvm.h
sys/uvm/uvm_amap.c
sys/uvm/uvm_amap.h
sys/uvm/uvm_anon.c
sys/uvm/uvm_extern.h
sys/uvm/uvm_fault.c
sys/uvm/uvm_loan.c
sys/uvm/uvm_loan.h
sys/uvm/uvm_map.c
sys/uvm/uvm_meter.c
sys/uvm/uvm_page.c
sys/uvm/uvm_pdaemon.c
--- a/sys/kern/kern_mutex_obj.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/kern/kern_mutex_obj.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_mutex_obj.c,v 1.5.2.1 2011/11/18 00:57:33 yamt Exp $	*/
+/*	$NetBSD: kern_mutex_obj.c,v 1.5.2.2 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_mutex_obj.c,v 1.5.2.1 2011/11/18 00:57:33 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_mutex_obj.c,v 1.5.2.2 2011/12/26 16:03:10 yamt Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -147,6 +147,38 @@
 }
 
 /*
+ * mutex_obj_free_if_last:
+ *
+ *	Drop a reference from a lock object if it's the last reference.
+ *	If the last reference is being dropped, free the object and return
+ *	true.  Otherwise, return false.
+ */
+bool
+mutex_obj_free_if_last(kmutex_t *lock)
+{
+	struct kmutexobj *mo = (struct kmutexobj *)lock;
+	bool ret;
+
+	KASSERTMSG(mo->mo_magic == MUTEX_OBJ_MAGIC,
+	    "%s: lock %p: mo->mo_magic (%#x) != MUTEX_OBJ_MAGIC (%#x)",
+	     __func__, mo, mo->mo_magic, MUTEX_OBJ_MAGIC);
+	KASSERTMSG(mo->mo_refcnt > 0,
+	    "%s: lock %p: mo->mo_refcnt (%#x) == 0",
+	     __func__, mo, mo->mo_refcnt);
+
+	/*
+	 * if mo_refcnt is 1, no one except us have a reference to it and
+	 * thus it's stable.
+	 */
+	if (mo->mo_refcnt != 1) {
+		return false;
+	}
+	ret = mutex_obj_free(lock);
+	KASSERT(ret);
+	return true;
+}
+
+/*
  * mutex_obj_pause:
  *
  *	Pause until lock1 is available.
@@ -162,6 +194,10 @@
 	KASSERT(mutex_owned(lock2));
 	mutex_obj_hold(lock1);
 	mutex_exit(lock2);
+	/*
+	 * acquire and release lock1.
+	 * this can involve priority lending.
+	 */
 	mutex_enter(lock1);
 	mutex_exit(lock1);
 	mutex_obj_free(lock1);
--- a/sys/sys/mutex.h	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/sys/mutex.h	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: mutex.h,v 1.20.10.1 2011/11/18 00:57:33 yamt Exp $	*/
+/*	$NetBSD: mutex.h,v 1.20.10.2 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -210,6 +210,7 @@
 kmutex_t *mutex_obj_alloc(kmutex_type_t, int);
 void	mutex_obj_hold(kmutex_t *);
 bool	mutex_obj_free(kmutex_t *);
+bool	mutex_obj_free_if_last(kmutex_t *);
 void	mutex_obj_pause(kmutex_t *, kmutex_t *);
 kmutex_t *mutex_obj_alloc_kernel_obj_lock(kmutex_type_t, int);
 
--- a/sys/uvm/uvm.h	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm.h	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm.h,v 1.62.4.3 2011/11/20 10:52:33 yamt Exp $	*/
+/*	$NetBSD: uvm.h,v 1.62.4.4 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -91,16 +91,35 @@
 
 	int64_t loan_obj;	/* O->K loan */
 	int64_t unloan_obj;	/* O->K unloan */
-	int64_t loanbreak_obj;	/* O->K loan resolved on write */
-	int64_t loanfree_obj;	/* O->K loan resolved on free */
+	int64_t loanbreak_obj;	/* O->K loan resolved on write to O */
+	int64_t loanfree_obj;	/* O->K loan resolved on free of O */
 
 	int64_t loan_anon;	/* A->K loan */
 	int64_t unloan_anon;	/* A->K unloan */
-	int64_t loanbreak_anon;	/* A->K loan resolved on write */
-	int64_t loanfree_anon;	/* A->K loan resolved on free */
+	int64_t loanbreak_anon;	/* A->K loan resolved on write to A */
+	int64_t loanfree_anon;	/* A->K loan resolved on free of A */
+
+	int64_t loan_oa;	/* O->A->K loan */
+	int64_t unloan_oa;	/* O->A->K unloan */
 
 	int64_t loan_zero;	/* O->K loan (zero) */
 	int64_t unloan_zero;	/* O->K unloan (zero) */
+
+	int64_t loanbreak_orphaned; /* O->A->K loan turned into A->K loan due to
+					write to O */
+	int64_t loanfree_orphaned; /* O->A->K loan turned into A->K loan due to
+					free of O */
+	int64_t loanbreak_orphaned_anon; /* O->A->K loan turned into O->K loan
+					due to write to A */
+	int64_t loanfree_orphaned_anon; /* O->A->K loan turned into O->K loan
+					due to free of A */
+
+	int64_t loanbreak_oa_obj; /* O->A loan resolved on write to O */
+	int64_t loanfree_oa_obj; /* O->A loan resolved on free of O */
+	int64_t loanbreak_oa_anon; /* O->A loan resolved on write to A */
+	int64_t loanfree_oa_anon; /* O->A loan resolved on free of A */
+	int64_t loan_resolve_orphan; /* O->A loaned page taken over by anon */
+	int64_t loan_obj_read;	/* O->A loan for read(2) */
 };
 
 /*
--- a/sys/uvm/uvm_amap.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_amap.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_amap.c,v 1.104 2011/10/11 23:57:50 yamt Exp $	*/
+/*	$NetBSD: uvm_amap.c,v 1.104.2.1 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.104 2011/10/11 23:57:50 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.104.2.1 2011/12/26 16:03:10 yamt Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -178,6 +178,7 @@
 	}
 	totalslots = amap_roundup_slots(slots + padslots);
 	amap->am_lock = NULL;
+	amap->am_obj_lock = NULL;
 	amap->am_ref = 1;
 	amap->am_flags = 0;
 #ifdef UVM_AMAP_PPREF
@@ -288,6 +289,9 @@
 		KASSERT(!mutex_owned(amap->am_lock));
 		mutex_obj_free(amap->am_lock);
 	}
+	if (amap->am_obj_lock != NULL) {
+		mutex_obj_free(amap->am_obj_lock);
+	}
 	slots = amap->am_maxslot;
 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
@@ -767,6 +771,7 @@
 	struct vm_anon *tofree;
 	u_int slots, lcv;
 	vsize_t len;
+	bool have_obj_page;
 
 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
@@ -881,13 +886,22 @@
 	 */
 
 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
+	have_obj_page = false;
 	for (lcv = 0 ; lcv < slots; lcv++) {
-		amap->am_anon[lcv] =
+		struct vm_anon * const anon =
 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
-		if (amap->am_anon[lcv] == NULL)
+
+		amap->am_anon[lcv] = anon;
+		if (anon == NULL)
 			continue;
-		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
-		KASSERT(amap->am_anon[lcv]->an_ref > 0);
+		if (anon->an_page != NULL && anon->an_page->uobject != NULL) {
+			KASSERT(anon->an_page->loan_count > 0);
+			KASSERT(srcamap->am_obj_lock ==
+			    anon->an_page->uobject->vmobjlock);
+			have_obj_page = true;
+		}
+		KASSERT(anon->an_lock == srcamap->am_lock);
+		KASSERT(anon->an_ref > 0);
 		amap->am_anon[lcv]->an_ref++;
 		amap->am_bckptr[lcv] = amap->am_nused;
 		amap->am_slots[amap->am_nused] = lcv;
@@ -925,6 +939,10 @@
 	if (amap->am_nused != 0) {
 		amap->am_lock = srcamap->am_lock;
 		mutex_obj_hold(amap->am_lock);
+		if (have_obj_page) {
+			amap->am_obj_lock = srcamap->am_obj_lock;
+			mutex_obj_hold(amap->am_obj_lock);
+		}
 	}
 	uvm_anon_freelst(srcamap, tofree);
 
@@ -1618,3 +1636,45 @@
 
 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
 }
+
+void
+amap_lock(struct vm_amap *amap)
+{
+
+	mutex_enter(amap->am_lock);
+	if (amap->am_obj_lock != NULL) {
+		if (mutex_obj_free_if_last(amap->am_obj_lock)) {
+			amap->am_obj_lock = NULL;
+		} else {
+			mutex_enter(amap->am_obj_lock);
+		}
+	}
+}
+
+int
+amap_lock_try(struct vm_amap *amap)
+{
+
+	if (!mutex_tryenter(amap->am_lock)) {
+		return 0;
+	}
+	if (amap->am_obj_lock != NULL) {
+		if (mutex_obj_free_if_last(amap->am_obj_lock)) {
+			amap->am_obj_lock = NULL;
+		} else if (!mutex_tryenter(amap->am_obj_lock)) {
+			mutex_exit(amap->am_lock);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void
+amap_unlock(struct vm_amap *amap)
+{
+
+	if (amap->am_obj_lock != NULL) {
+		mutex_exit(amap->am_obj_lock);
+	}
+	mutex_exit(amap->am_lock);
+}
--- a/sys/uvm/uvm_amap.h	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_amap.h	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_amap.h,v 1.37 2011/06/12 03:36:02 rmind Exp $	*/
+/*	$NetBSD: uvm_amap.h,v 1.37.2.1 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -82,6 +82,8 @@
 			(struct vm_amap *);
 void		amap_lock	/* lock amap */
 			(struct vm_amap *);
+int		amap_lock_try	/* trylock amap */
+			(struct vm_amap *);
 struct vm_anon	*amap_lookup	/* lookup an anon @ offset in amap */
 			(struct vm_aref *, vaddr_t);
 void		amap_lookups	/* lookup multiple anons */
@@ -152,6 +154,7 @@
 
 struct vm_amap {
 	kmutex_t *am_lock;	/* lock [locks all vm_amap fields] */
+	kmutex_t *am_obj_lock;	/* uobj which might lend us pages */
 	int am_ref;		/* reference count */
 	int am_flags;		/* flags */
 	int am_maxslot;		/* max # of slots allocated */
@@ -251,10 +254,7 @@
  */
 
 #define amap_flags(AMAP)	((AMAP)->am_flags)
-#define amap_lock(AMAP)		mutex_enter((AMAP)->am_lock)
-#define amap_lock_try(AMAP)	mutex_tryenter((AMAP)->am_lock)
 #define amap_refs(AMAP)		((AMAP)->am_ref)
-#define amap_unlock(AMAP)	mutex_exit((AMAP)->am_lock)
 
 /*
  * if we enable PPREF, then we have a couple of extra functions that
--- a/sys/uvm/uvm_anon.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_anon.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_anon.c,v 1.62.2.1 2011/11/02 21:54:00 yamt Exp $	*/
+/*	$NetBSD: uvm_anon.c,v 1.62.2.2 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.62.2.1 2011/11/02 21:54:00 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.62.2.2 2011/12/26 16:03:10 yamt Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -134,12 +134,18 @@
 		 */
 
 		if (pg->uobject) {
+			struct uvm_cpu *ucpu;
+
 			mutex_enter(&uvm_pageqlock);
 			KASSERT(pg->loan_count > 0);
 			pg->loan_count--;
 			pg->uanon = NULL;
+			anon->an_page = NULL;
 			mutex_exit(&uvm_pageqlock);
-			mutex_exit(pg->uobject->vmobjlock);
+			ucpu = uvm_cpu_get();
+			ucpu->loanfree_orphaned_anon += pg->loan_count;
+			ucpu->loanfree_oa_anon++;
+			uvm_cpu_put(ucpu);
 		} else {
 
 			/*
@@ -261,69 +267,19 @@
 uvm_anon_lockloanpg(struct vm_anon *anon)
 {
 	struct vm_page *pg;
-	bool locked = false;
 
 	KASSERT(mutex_owned(anon->an_lock));
-
-	/*
-	 * loop while we have a resident page that has a non-zero loan count.
-	 * if we successfully get our lock, we will "break" the loop.
-	 * note that the test for pg->loan_count is not protected -- this
-	 * may produce false positive results.   note that a false positive
-	 * result may cause us to do more work than we need to, but it will
-	 * not produce an incorrect result.
-	 */
-
-	while (((pg = anon->an_page) != NULL) && pg->loan_count != 0) {
-
+	pg = anon->an_page;
+	if (pg == NULL) {
+		return NULL;
+	}
+	if (pg->uobject) {
 		/*
-		 * quickly check to see if the page has an object before
-		 * bothering to lock the page queues.   this may also produce
-		 * a false positive result, but that's ok because we do a real
-		 * check after that.
+		 * locked via amap->am_obj_lock
 		 */
-
-		if (pg->uobject) {
-			mutex_enter(&uvm_pageqlock);
-			if (pg->uobject) {
-				locked =
-				    mutex_tryenter(pg->uobject->vmobjlock);
-			} else {
-				/* object disowned before we got PQ lock */
-				locked = true;
-			}
-			mutex_exit(&uvm_pageqlock);
-
-			/*
-			 * if we didn't get a lock (try lock failed), then we
-			 * toggle our anon lock and try again
-			 */
-
-			if (!locked) {
-				/*
-				 * someone locking the object has a chance to
-				 * lock us right now
-				 * 
-				 * XXX Better than yielding but inadequate.
-				 */
-				kpause("livelock", false, 1, anon->an_lock);
-				continue;
-			}
-		}
-
-		/*
-		 * If page is un-owned i.e. the object dropped its ownership,
-		 * then we have to take the ownership.
-		 */
-
-		if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
-			mutex_enter(&uvm_pageqlock);
-			pg->pqflags |= PQ_ANON;
-			pg->loan_count--;
-			mutex_exit(&uvm_pageqlock);
-		}
-		break;
+		KASSERT(mutex_owned(pg->uobject->vmobjlock));
 	}
+	uvm_loan_resolve_orphan(pg, false);
 	return pg;
 }
 
--- a/sys/uvm/uvm_extern.h	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_extern.h	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_extern.h,v 1.176.2.5 2011/12/20 13:46:17 yamt Exp $	*/
+/*	$NetBSD: uvm_extern.h,v 1.176.2.6 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -468,16 +468,35 @@
 
 	int64_t loan_obj;	/* O->K loan */
 	int64_t unloan_obj;	/* O->K unloan */
-	int64_t loanbreak_obj;	/* O->K loan resolved on write */
-	int64_t loanfree_obj;	/* O->K loan resolved on free */
+	int64_t loanbreak_obj;	/* O->K loan resolved on write to O */
+	int64_t loanfree_obj;	/* O->K loan resolved on free of O */
 
 	int64_t loan_anon;	/* A->K loan */
 	int64_t unloan_anon;	/* A->K unloan */
-	int64_t loanbreak_anon;	/* A->K loan resolved on write */
-	int64_t loanfree_anon;	/* A->K loan resolved on free */
+	int64_t loanbreak_anon;	/* A->K loan resolved on write to A */
+	int64_t loanfree_anon;	/* A->K loan resolved on free of A */
+
+	int64_t loan_oa;	/* O->A->K loan */
+	int64_t unloan_oa;	/* O->A->K unloan */
 
 	int64_t loan_zero;	/* O->K loan (zero) */
 	int64_t unloan_zero;	/* O->K unloan (zero) */
+
+	int64_t loanbreak_orphaned; /* O->A->K loan turned into A->K loan due to
+					write to O */
+	int64_t loanfree_orphaned; /* O->A->K loan turned into A->K loan due to
+					free of O */
+	int64_t loanbreak_orphaned_anon; /* O->A->K loan turned into O->K loan
+					due to write to A */
+	int64_t loanfree_orphaned_anon; /* O->A->K loan turned into O->K loan
+					due to free of A */
+
+	int64_t loanbreak_oa_obj; /* O->A loan resolved on write to O */
+	int64_t loanfree_oa_obj; /* O->A loan resolved on free of O */
+	int64_t loanbreak_oa_anon; /* O->A loan resolved on write to A */
+	int64_t loanfree_oa_anon; /* O->A loan resolved on free of A */
+	int64_t loan_resolve_orphan; /* O->A loaned page taken over by anon */
+	int64_t loan_obj_read;	/* O->A loan for read(2) */
 };
 
 #ifdef _KERNEL
--- a/sys/uvm/uvm_fault.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_fault.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.190.2.2 2011/11/14 14:23:16 yamt Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.190.2.3 2011/12/26 16:03:10 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.190.2.2 2011/11/14 14:23:16 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.190.2.3 2011/12/26 16:03:10 yamt Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -332,9 +332,12 @@
 			 * on the owner of page.
 			 */
 
+			uvmfault_unlockall(ufi, NULL, NULL);
 			if (pg->uobject) {
 				/* Owner of page is UVM object. */
-				uvmfault_unlockall(ufi, amap, NULL);
+				KASSERT(amap->am_obj_lock ==
+				    pg->uobject->vmobjlock);
+				mutex_exit(amap->am_lock); /* XXX */
 				UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
 				    0,0,0);
 				UVM_UNLOCK_AND_WAIT(pg,
@@ -342,7 +345,9 @@
 				    false, "anonget1", 0);
 			} else {
 				/* Owner of page is anon. */
-				uvmfault_unlockall(ufi, NULL, NULL);
+				if (amap->am_obj_lock != NULL) {
+					mutex_exit(amap->am_obj_lock); /* XXX */
+				}
 				UVMHIST_LOG(maphist, " unlock+wait on anon",0,
 				    0,0,0);
 				UVM_UNLOCK_AND_WAIT(pg, anon->an_lock,
@@ -398,7 +403,9 @@
 		 */
 
 		locked = uvmfault_relock(ufi);
-		if (locked || we_own) {
+		if (locked) {
+			amap_lock(amap);
+		} else if (we_own) {
 			mutex_enter(anon->an_lock);
 		}
 
@@ -448,6 +455,10 @@
 
 				if (locked) {
 					uvmfault_unlockall(ufi, NULL, NULL);
+					if (amap->am_obj_lock != NULL) {
+						/* XXX */
+						mutex_exit(amap->am_obj_lock);
+					}
 				}
 				mutex_exit(anon->an_lock);
 				UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
@@ -464,6 +475,10 @@
 
 				if (locked) {
 					uvmfault_unlockall(ufi, NULL, NULL);
+					if (amap->am_obj_lock != NULL) {
+						/* XXX */
+						mutex_exit(amap->am_obj_lock);
+					}
 				}
 				uvm_anon_release(anon);
 
@@ -720,17 +735,17 @@
 			    vaddr_t, struct vm_page *, bool);
 static inline int	uvm_fault_upper_loan(
 			    struct uvm_faultinfo *, struct uvm_faultctx *,
-			    struct vm_anon *, struct uvm_object **);
+			    struct vm_anon *);
 static inline int	uvm_fault_upper_promote(
 			    struct uvm_faultinfo *, struct uvm_faultctx *,
-			    struct uvm_object *, struct vm_anon *);
+			    struct vm_anon *);
 static inline int	uvm_fault_upper_direct(
 			    struct uvm_faultinfo *, struct uvm_faultctx *,
-			    struct uvm_object *, struct vm_anon *);
+			    struct vm_anon *);
 static int		uvm_fault_upper_enter(
 			    struct uvm_faultinfo *, const struct uvm_faultctx *,
-			    struct uvm_object *, struct vm_anon *,
-			    struct vm_page *, struct vm_anon *);
+			    struct vm_anon *, struct vm_page *,
+			    struct vm_anon *);
 static inline void	uvm_fault_upper_done(
 			    struct uvm_faultinfo *, const struct uvm_faultctx *,
 			    struct vm_anon *, struct vm_page *);
@@ -1033,6 +1048,7 @@
 
 	if (amap) {
 		amap_lock(amap);
+		KASSERT(uobj == NULL || amap->am_obj_lock == NULL);
 		amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
 	} else {
 		*ranons = NULL;	/* to be safe */
@@ -1282,6 +1298,7 @@
 	/* locked: maps(read), amap, anon, uobj(if one) */
 	KASSERT(mutex_owned(amap->am_lock));
 	KASSERT(anon->an_lock == amap->am_lock);
+	KASSERT(uobj == NULL || amap->am_obj_lock == uobj->vmobjlock);
 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
 
 	/*
@@ -1289,7 +1306,7 @@
 	 */
 
 	if (anon->an_page->loan_count) {
-		error = uvm_fault_upper_loan(ufi, flt, anon, &uobj);
+		error = uvm_fault_upper_loan(ufi, flt, anon);
 		if (error != 0)
 			return error;
 	}
@@ -1309,9 +1326,9 @@
 
 	if (flt->cow_now && anon->an_ref > 1) {
 		flt->promote = true;
-		error = uvm_fault_upper_promote(ufi, flt, uobj, anon);
+		error = uvm_fault_upper_promote(ufi, flt, anon);
 	} else {
-		error = uvm_fault_upper_direct(ufi, flt, uobj, anon);
+		error = uvm_fault_upper_direct(ufi, flt, anon);
 	}
 	return error;
 }
@@ -1326,7 +1343,7 @@
 static int
 uvm_fault_upper_loan(
 	struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
-	struct vm_anon *anon, struct uvm_object **ruobj)
+	struct vm_anon *anon)
 {
 	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
 	int error = 0;
@@ -1357,15 +1374,19 @@
 
 		/* >1 case is already ok */
 		if (anon->an_ref == 1) {
-			error = uvm_loanbreak_anon(anon, *ruobj);
+			struct uvm_object *uobj = anon->an_page->uobject;
+
+			KASSERT(uobj == NULL ||
+			    uobj->vmobjlock == amap->am_obj_lock);
+			KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
+			error = uvm_loanbreak_anon(anon);
 			if (error != 0) {
-				uvmfault_unlockall(ufi, amap, *ruobj);
+				uvmfault_unlockall(ufi, amap, NULL);
 				uvm_wait("flt_noram2");
 				return ERESTART;
 			}
 			/* if we were a loan reciever uobj is gone */
-			if (*ruobj)
-				*ruobj = NULL;
+			KASSERT(anon->an_page->uobject == NULL);
 		}
 	}
 	return error;
@@ -1383,7 +1404,7 @@
 static int
 uvm_fault_upper_promote(
 	struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
-	struct uvm_object *uobj, struct vm_anon *anon)
+	struct vm_anon *anon)
 {
 	struct vm_anon * const oanon = anon;
 	struct vm_page *pg;
@@ -1423,7 +1444,7 @@
 	 * oanon != anon, we'll have to unlock anon, too.
 	 */
 
-	return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
+	return uvm_fault_upper_enter(ufi, flt, anon, pg, oanon);
 }
 
 /*
@@ -1433,7 +1454,7 @@
 static int
 uvm_fault_upper_direct(
 	struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
-	struct uvm_object *uobj, struct vm_anon *anon)
+	struct vm_anon *anon)
 {
 	struct vm_anon * const oanon = anon;
 	struct vm_page *pg;
@@ -1444,7 +1465,7 @@
 	if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
 		flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
 
-	return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
+	return uvm_fault_upper_enter(ufi, flt, anon, pg, oanon);
 }
 
 /*
@@ -1454,16 +1475,17 @@
 static int
 uvm_fault_upper_enter(
 	struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
-	struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg,
-	struct vm_anon *oanon)
+	struct vm_anon *anon, struct vm_page *pg, struct vm_anon *oanon)
 {
 	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
+	struct uvm_object *uobj __unused = pg->uobject;
 	UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist);
 
 	/* locked: maps(read), amap, oanon, anon(if different from oanon) */
 	KASSERT(mutex_owned(amap->am_lock));
 	KASSERT(anon->an_lock == amap->am_lock);
 	KASSERT(oanon->an_lock == amap->am_lock);
+	KASSERT(uobj == NULL || amap->am_obj_lock == uobj->vmobjlock);
 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
 	KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
 
@@ -1487,7 +1509,7 @@
 		 * as the map may change while we're asleep.
 		 */
 
-		uvmfault_unlockall(ufi, amap, uobj);
+		uvmfault_unlockall(ufi, amap, NULL);
 		if (!uvm_reclaimable()) {
 			UVMHIST_LOG(maphist,
 			    "<- failed.  out of VM",0,0,0,0);
@@ -1506,7 +1528,7 @@
 	 */
 
 	pmap_update(ufi->orig_map->pmap);
-	uvmfault_unlockall(ufi, amap, uobj);
+	uvmfault_unlockall(ufi, amap, NULL);
 	return 0;
 }
 
--- a/sys/uvm/uvm_loan.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_loan.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.c,v 1.81.2.4 2011/11/20 10:52:33 yamt Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.81.2.5 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.81.2.4 2011/11/20 10:52:33 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.81.2.5 2011/12/26 16:03:11 yamt Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -41,6 +41,8 @@
 
 #include <uvm/uvm.h>
 
+bool doloanobj = true;
+
 /*
  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
  * from the VM system to other parts of the kernel.   this allows page
@@ -106,6 +108,8 @@
 static void	uvm_unloananon(struct vm_anon **, int);
 static void	uvm_unloanpage(struct vm_page **, int);
 static int	uvm_loanpage(struct vm_page **, int);
+static int	uvm_loanobj_read(struct vm_map *, vaddr_t, size_t,
+			struct uvm_object *, off_t);
 
 
 /*
@@ -328,7 +332,7 @@
 /*
  * uvm_loananon: loan a page from an anon out
  *
- * => called with map, amap, uobj locked
+ * => called with map, amap, anon locked
  * => return value:
  *	-1 = fatal error, everything is unlocked, abort.
  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
@@ -427,11 +431,15 @@
 	(*output)++;
 
 	/* unlock and return success */
-	if (pg->uobject)
+	if (pg->uobject != NULL)
 		mutex_exit(pg->uobject->vmobjlock);
 
 	ucpu = uvm_cpu_get();
-	ucpu->loan_anon++;
+	if (pg->uobject != NULL) {
+		ucpu->loan_oa++;
+	} else {
+		ucpu->loan_anon++;
+	}
 	uvm_cpu_put(ucpu);
 
 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
@@ -1007,38 +1015,36 @@
 			slock = NULL;
 		}
 
-		/*
-		 * drop our loan.  if page is owned by an anon but
-		 * PQ_ANON is not set, the page was loaned to the anon
-		 * from an object which dropped ownership, so resolve
-		 * this by turning the anon's loan into real ownership
-		 * (ie. decrement loan_count again and set PQ_ANON).
-		 * after all this, if there are no loans left, put the
-		 * page back a paging queue (if the page is owned by
-		 * an anon) or free it (if the page is now unowned).
-		 */
-
 		obj = pg->uobject;
 		anon = pg->uanon;
+		/*
+		 * drop our loan. (->K)
+		 */
 		KASSERT(pg->loan_count > 0);
 		pg->loan_count--;
-		if (obj == NULL && anon != NULL &&
-		    (pg->pqflags & PQ_ANON) == 0) {
-			KASSERT(pg->loan_count > 0);
-			pg->loan_count--;
-			pg->pqflags |= PQ_ANON;
-		}
-		if (pg->loan_count == 0 && obj == NULL && anon == NULL) {
-			KASSERT((pg->flags & PG_BUSY) == 0);
-			uvm_pagefree(pg);
+		/*
+		 * if there are no loans left, put the page back a paging queue
+		 * (if the page is owned by an anon) or free it (if the page
+		 * is now unowned).
+		 */
+		uvm_loan_resolve_orphan(pg, true);
+		if (pg->loan_count == 0) {
+			if (obj == NULL && anon == NULL) {
+				KASSERT((pg->flags & PG_BUSY) == 0);
+				uvm_pagefree(pg);
+			}
+			if (anon != NULL) {
+				uvm_pageactivate(pg);
+			}
 		}
 		if (slock != NULL) {
 			mutex_exit(slock);
 		}
 		ucpu = uvm_cpu_get();
 		if (obj != NULL) {
-			KASSERT(anon == NULL); /* XXX no O->A loan */
-			if (obj == &uvm_loanzero_object) {
+			if (anon != NULL) {
+				ucpu->unloan_oa++;
+			} else if (obj == &uvm_loanzero_object) {
 				ucpu->unloan_zero++;
 			} else {
 				ucpu->unloan_obj++;
@@ -1140,6 +1146,7 @@
 #ifdef DIAGNOSTIC
 	struct uvm_object *uobj = uobjpage->uobject;
 #endif
+	struct vm_anon * const anon = uobjpage->uanon;
 	const unsigned int count = uobjpage->loan_count;
 
 	KASSERT(uobj != NULL);
@@ -1170,6 +1177,15 @@
 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
 	UVM_PAGE_OWN(uobjpage, NULL);
 
+	mutex_enter(&uvm_pageqlock);
+
+	/*
+	 * if the page is no longer referenced by an anon (i.e. we are breaking
+	 * O->K loans), then remove it from any pageq's.
+	 */
+	if (anon == NULL)
+		uvm_pagedequeue(uobjpage);
+
 	/*
 	 * replace uobjpage with new page.
 	 *
@@ -1178,20 +1194,8 @@
 
 	uvm_pagereplace(uobjpage, pg);
 
-	mutex_enter(&uvm_pageqlock);
-	KASSERT(uobjpage->uanon == NULL); /* XXX no O->A loan */
-
 	/*
-	 * if the page is no longer referenced by
-	 * an anon (i.e. we are breaking an O->K
-	 * loan), then remove it from any pageq's.
-	 */
-	if (uobjpage->uanon == NULL)
-		uvm_pagedequeue(uobjpage);
-
-	/*
-	 * at this point we have absolutely no
-	 * control over uobjpage
+	 * at this point we have absolutely no control over uobjpage
 	 */
 
 	/* install new page */
@@ -1199,22 +1203,35 @@
 	mutex_exit(&uvm_pageqlock);
 
 	/*
-	 * done!  loan is broken and "pg" is
-	 * PG_BUSY.   it can now replace uobjpage.
+	 * update statistics.
 	 */
+	ucpu = uvm_cpu_get();
+	if (anon != NULL) {
+		ucpu->loanbreak_oa_obj++;
+		ucpu->loanbreak_orphaned += count - 1;
+	} else {
+		ucpu->loanbreak_obj += count;
+	}
+	uvm_cpu_put(ucpu);
 
-	ucpu = uvm_cpu_get();
-	ucpu->loanbreak_obj += count;
-	uvm_cpu_put(ucpu);
+	/*
+	 * done!  loan is broken and "pg" is PG_BUSY.
+	 * it can now replace uobjpage.
+	 */
 	return pg;
 }
 
+/*
+ * uvm_loanbreak_anon:
+ */
+
 int
-uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
+uvm_loanbreak_anon(struct vm_anon *anon)
 {
 	struct uvm_cpu *ucpu;
 	struct vm_page *pg;
 	unsigned int oldstatus;
+	struct uvm_object * const uobj = anon->an_page->uobject;
 	const unsigned int count = anon->an_page->loan_count;
 
 	KASSERT(mutex_owned(anon->an_lock));
@@ -1237,25 +1254,24 @@
 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
 
 	anon->an_page->uanon = NULL;
-	/* in case we owned */
-	anon->an_page->pqflags &= ~PQ_ANON;
-
-	KASSERT(uobj == NULL); /* XXX O->A loan is currently broken */
-	if (uobj) {
-		/* if we were receiver of loan */
+	if (uobj != NULL) {
+		/*
+		 * if we were receiver of loan (O->A)
+		 */
+		KASSERT((anon->an_page->pqflags & PQ_ANON) == 0);
 		anon->an_page->loan_count--;
 	} else {
 		/*
 		 * we were the lender (A->K); need to remove the page from
 		 * pageq's.
+		 *
+		 * PQ_ANON is updated by the caller.
 		 */
+		KASSERT((anon->an_page->pqflags & PQ_ANON) != 0);
+		anon->an_page->pqflags &= ~PQ_ANON;
 		uvm_pagedequeue(anon->an_page);
 	}
 
-	if (uobj) {
-		mutex_exit(uobj->vmobjlock);
-	}
-
 	/* install new page in anon */
 	anon->an_page = pg;
 	pg->uanon = anon;
@@ -1268,12 +1284,482 @@
 	UVM_PAGE_OWN(pg, NULL);
 
 	/* done! */
-	if (uobj == NULL) {
-		ucpu = uvm_cpu_get();
+	ucpu = uvm_cpu_get();
+	if (uobj != NULL) {
+		ucpu->loanbreak_oa_anon++;
+		ucpu->loanbreak_orphaned_anon += count - 1;
+		atomic_inc_uint(&uvmexp.anonpages);
+	} else {
 		ucpu->loanbreak_anon += count;
 		ucpu->pagestate[1][oldstatus]--;
-		ucpu->pagestate[1][UVM_PAGE_STATUS_DIRTY]++;
+	}
+	ucpu->pagestate[1][UVM_PAGE_STATUS_DIRTY]++;
+	uvm_cpu_put(ucpu);
+	return 0;
+}
+
+int
+uvm_loanobj(struct uvm_object *uobj, struct uio *uio)
+{
+	struct iovec *iov;
+	struct vm_map *map;
+	vaddr_t va;
+	size_t len;
+	int i, error = 0;
+
+	if (!doloanobj) {
+		return ENOSYS;
+	}
+
+	/*
+	 * This interface is only for loaning to user space.
+	 * Loans to the kernel should be done with the kernel-specific
+	 * loaning interfaces.
+	 */
+
+	if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
+		return ENOSYS;
+	}
+
+	if (uio->uio_rw != UIO_READ) {
+		return ENOSYS;
+	}
+
+	/*
+	 * Check that the uio is aligned properly for loaning.
+	 */
+
+	if (uio->uio_offset & PAGE_MASK || uio->uio_resid & PAGE_MASK) {
+		return EINVAL;
+	}
+	for (i = 0; i < uio->uio_iovcnt; i++) {
+		if (((vaddr_t)uio->uio_iov[i].iov_base & PAGE_MASK) ||
+		    (uio->uio_iov[i].iov_len & PAGE_MASK)) {
+			return EINVAL;
+		}
+	}
+
+	/*
+	 * Process the uio.
+	 */
+
+	map = &uio->uio_vmspace->vm_map;
+	while (uio->uio_resid) {
+		iov = uio->uio_iov;
+		while (iov->iov_len) {
+			va = (vaddr_t)iov->iov_base;
+			len = MIN(iov->iov_len, MAXPHYS);
+			error = uvm_loanobj_read(map, va, len, uobj,
+						 uio->uio_offset);
+			if (error) {
+				goto out;
+			}
+			iov->iov_base = (char *)iov->iov_base + len;
+			iov->iov_len -= len;
+			uio->uio_offset += len;
+			uio->uio_resid -= len;
+		}
+		uio->uio_iov++;
+		uio->uio_iovcnt--;
+	}
+
+out:
+	pmap_update(map->pmap);
+	return error;
+}
+
+/*
+ * Loan object pages to a user process.
+ */
+
+/* XXX an arbitrary number larger than MAXPHYS/PAGE_SIZE */
+#define	MAXPAGES	16
+
+static int
+uvm_loanobj_read(struct vm_map *map, vaddr_t va, size_t len,
+    struct uvm_object *uobj, off_t off)
+{
+	unsigned int npages = len >> PAGE_SHIFT;
+	struct vm_page *pgs[MAXPAGES];
+	struct vm_amap *amap;
+	struct vm_anon *anon, *oanons[MAXPAGES], *nanons[MAXPAGES];
+	struct vm_map_entry *entry;
+	struct vm_anon *anon_tofree;
+	unsigned int maptime;
+	unsigned int i, refs, aoff, pgoff;
+	unsigned int loaned; /* # of newly created O->A loan */
+	int error;
+	UVMHIST_FUNC("uvm_vnp_loanread"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "map %p va 0x%x npages %d", map, va, npages, 0);
+	UVMHIST_LOG(ubchist, "uobj %p off 0x%x", uobj, off, 0, 0);
+
+	if (npages > MAXPAGES) {
+		return EINVAL;
+	}
+retry:
+	vm_map_lock_read(map);
+	if (!uvm_map_lookup_entry(map, va, &entry)) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "no entry", 0,0,0,0);
+		return EINVAL;
+	}
+	if ((entry->protection & VM_PROT_WRITE) == 0) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "no write access", 0,0,0,0);
+		return EACCES;
+	}
+	if (VM_MAPENT_ISWIRED(entry)) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "entry is wired", 0,0,0,0);
+		return EBUSY;
+	}
+	if (!UVM_ET_ISCOPYONWRITE(entry)) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "entry is not COW", 0,0,0,0);
+		return EINVAL;
+	}
+	if (UVM_ET_ISOBJ(entry)) {
+		/*
+		 * avoid locking order difficulty between
+		 * am_obj_lock and backing object's lock.
+		 */
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "entry is obj backed", 0,0,0,0);
+		return EINVAL;
+	}
+	if (entry->end < va + len) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "chunk longer than entry", 0,0,0,0);
+		return EINVAL;
+	}
+	amap = entry->aref.ar_amap;
+	if (amap != NULL && (amap->am_flags & AMAP_SHARED) != 0) {
+		vm_map_unlock_read(map);
+		UVMHIST_LOG(ubchist, "amap is shared", 0,0,0,0);
+		return EINVAL;
+	}
+
+	/*
+	 * None of the trivial reasons why we might not be able to do the loan
+	 * are true.  If we need to COW the amap, try to do it now.
+	 */
+
+	KASSERT(amap || UVM_ET_ISNEEDSCOPY(entry));
+	if (UVM_ET_ISNEEDSCOPY(entry)) {
+		maptime = map->timestamp;
+		vm_map_unlock_read(map);
+		vm_map_lock(map);
+		if (maptime + 1 != map->timestamp) {
+			vm_map_unlock(map);
+			goto retry;
+		}
+		amap_copy(map, entry, 0, va, va + len);
+		if (UVM_ET_ISNEEDSCOPY(entry)) {
+			vm_map_unlock(map);
+			UVMHIST_LOG(ubchist, "amap COW failed", 0,0,0,0);
+			return ENOMEM;
+		}
+		UVMHIST_LOG(ubchist, "amap has been COWed", 0,0,0,0);
+		aoff = va - entry->start;
+		maptime = map->timestamp;
+		vm_map_unlock(map);
+	} else {
+		aoff = va - entry->start;
+		maptime = map->timestamp;
+		vm_map_unlock_read(map);
+	}
+
+	/*
+	 * The map is all ready for us, now fetch the obj pages.
+	 * If the map changes out from under us, start over.
+	 *
+	 * XXX worth trying PGO_LOCKED?
+	 */
+
+	memset(pgs, 0, sizeof(pgs));
+	mutex_enter(uobj->vmobjlock);
+	error = (*uobj->pgops->pgo_get)(uobj, off, pgs, &npages, 0,
+	    VM_PROT_READ, 0, PGO_SYNCIO);
+	if (error) {
+		UVMHIST_LOG(ubchist, "getpages -> %d", error,0,0,0);
+		return error;
+	}
+	vm_map_lock_read(map);
+	if (map->timestamp != maptime) {
+		vm_map_unlock_read(map);
+		mutex_enter(uobj->vmobjlock);
+		mutex_enter(&uvm_pageqlock);
+		for (i = 0; i < npages; i++) {
+			uvm_pageactivate(pgs[i]);
+		}
+		uvm_page_unbusy(pgs, npages);
+		mutex_exit(&uvm_pageqlock);
+		mutex_exit(uobj->vmobjlock);
+		goto retry;
+	}
+	amap = entry->aref.ar_amap;
+	KASSERT(amap != NULL);
+
+	/*
+	 * Prepare each object page for loaning.  Allocate an anon for each page
+	 * that doesn't already have one.  If any of the pages are wired,
+	 * undo everything and fail.
+	 */
+
+	memset(nanons, 0, sizeof(nanons));
+	amap_lock(amap);
+	if (amap->am_obj_lock != NULL) {
+		if (amap->am_obj_lock != uobj->vmobjlock) {
+			/*
+			 * the amap might already have pages loaned from
+			 * another object.  give up.
+			 *
+			 * XXX worth clipping amap?
+			 */
+			error = EBUSY;
+			amap_unlock(amap);
+			amap = NULL;
+			mutex_enter(uobj->vmobjlock);
+			goto fail_amap_unlocked;
+		}
+	} else {
+		mutex_enter(uobj->vmobjlock);
+	}
+	KASSERT(mutex_owned(amap->am_lock));
+	KASSERT(mutex_owned(uobj->vmobjlock));
+	loaned = 0;
+	for (i = 0; i < npages; i++) {
+		struct vm_page * const pg = pgs[i];
+		KASSERT(uvm_page_locked_p(pg));
+		if (pg->wire_count) {
+			error = EBUSY;
+			goto fail;
+		}
+		pmap_page_protect(pg, VM_PROT_READ);
+		mutex_enter(&uvm_pageqlock);
+		uvm_pageactivate(pgs[i]);
+		mutex_exit(&uvm_pageqlock);
+		if (pg->uanon != NULL) {
+			KASSERTMSG(pg->loan_count > 0, "pg %p loan_count %u",
+			    pg, (unsigned int)pg->loan_count);
+			anon = pg->uanon;
+			if (anon->an_lock != amap->am_lock) {
+				/*
+				 * the page is already loaned to another amap
+				 * whose lock is incompatible with ours.
+				 * give up.
+				 */
+				error = EBUSY;
+				goto fail;
+			}
+			anon->an_ref++;
+		} else {
+			anon = uvm_analloc();
+			if (anon == NULL) {
+				error = ENOMEM;
+				goto fail;
+			}
+			mutex_enter(&uvm_pageqlock);
+			anon->an_page = pg;
+			pg->uanon = anon;
+			pg->loan_count++;
+			mutex_exit(&uvm_pageqlock);
+			loaned++;
+		}
+		nanons[i] = anon;
+	}
+
+	/*
+	 * Look for any existing anons in the amap.  These will be replaced
+	 * by the new loan anons we just set up.  If any of these anon pages
+	 * are wired then we can't replace them.
+	 */
+
+	memset(oanons, 0, sizeof(oanons));
+	for (i = 0; i < npages; i++) {
+		UVMHIST_LOG(ubchist, "pgs[%d] %p", i, pgs[i], 0,0);
+		anon = amap_lookup(&entry->aref, aoff + (i << PAGE_SHIFT));
+		oanons[i] = anon;
+		if (anon && anon->an_page && anon->an_page->wire_count) {
+			error = EBUSY;
+			goto fail;
+		}
+	}
+
+	/*
+	 * Everything is good to go.  Remove any existing anons and insert
+	 * the loaned object anons.
+	 */
+
+	anon_tofree = NULL;
+	for (i = 0; i < npages; i++) {
+		pgoff = i << PAGE_SHIFT;
+		anon = oanons[i];
+		if (anon != NULL) {
+			amap_unadd(&entry->aref, aoff + pgoff);
+			refs = --anon->an_ref;
+			if (refs == 0) {
+				anon->an_link = anon_tofree;
+				anon_tofree = anon;
+			}
+		}
+		anon = nanons[i];
+		if (anon->an_lock == NULL) {
+			anon->an_lock = amap->am_lock;
+		}
+		amap_add(&entry->aref, aoff + pgoff, anon, FALSE);
+	}
+
+	/*
+	 * The map has all the new information now.
+	 * Enter the pages into the pmap to save likely faults later.
+	 */
+
+	for (i = 0; i < npages; i++) {
+		error = pmap_enter(map->pmap, va + (i << PAGE_SHIFT),
+		    VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ, PMAP_CANFAIL);
+		if (error != 0) {
+			/*
+			 * while the failure of pmap_enter here is not critical,
+			 * we should not leave the mapping of the oanon page.
+			 */
+			pmap_remove(map->pmap, va + (i << PAGE_SHIFT),
+			    va + (i << PAGE_SHIFT) + PAGE_SIZE);
+		}
+	}
+
+	/*
+	 * At this point we're done with the pages, unlock them now.
+	 */
+
+	mutex_enter(&uvm_pageqlock);
+	uvm_page_unbusy(pgs, npages);
+	mutex_exit(&uvm_pageqlock);
+	if (amap->am_obj_lock == NULL) {
+		mutex_obj_hold(uobj->vmobjlock);
+		amap->am_obj_lock = uobj->vmobjlock;
+	} else {
+		KASSERT(amap->am_obj_lock == uobj->vmobjlock);
+	}
+	uvm_anon_freelst(amap, anon_tofree);
+	vm_map_unlock_read(map);
+
+	/*
+	 * update statistics
+	 */
+	if (loaned) {
+		struct uvm_cpu *ucpu;
+
+		ucpu = uvm_cpu_get();
+		ucpu->loan_obj_read += loaned;
 		uvm_cpu_put(ucpu);
 	}
 	return 0;
+
+	/*
+	 * We couldn't complete the loan for some reason.
+	 * Undo any work we did so far.
+	 */
+
+fail:
+	KASSERT(mutex_owned(amap->am_lock));
+fail_amap_unlocked:
+	KASSERT(mutex_owned(uobj->vmobjlock));
+	for (i = 0; i < npages; i++) {
+		anon = nanons[i];
+		if (anon != NULL) {
+			KASSERT(amap != NULL);
+			KASSERT(uvm_page_locked_p(anon->an_page));
+			if (anon->an_lock == NULL) {
+				struct vm_page * const pg = anon->an_page;
+
+				KASSERT(anon->an_ref == 1);
+				KASSERT(pg != NULL);
+				KASSERT(pg->loan_count > 0);
+				KASSERT(pg->uanon == anon);
+				mutex_enter(&uvm_pageqlock);
+				pg->loan_count--;
+				pg->uanon = NULL;
+				anon->an_page = NULL;
+				mutex_exit(&uvm_pageqlock);
+				anon->an_ref--;
+				uvm_anon_free(anon);
+			} else {
+				KASSERT(anon->an_lock == amap->am_lock);
+				KASSERT(anon->an_page->loan_count > 0);
+				KASSERT(anon->an_ref > 1);
+				anon->an_ref--;
+			}
+		} else {
+			mutex_enter(&uvm_pageqlock);
+			uvm_pageenqueue(pgs[i]);
+			mutex_exit(&uvm_pageqlock);
+		}
+	}
+	mutex_enter(&uvm_pageqlock);
+	uvm_page_unbusy(pgs, npages);
+	mutex_exit(&uvm_pageqlock);
+	if (amap != NULL) {
+		if (amap->am_obj_lock == NULL) {
+			mutex_exit(uobj->vmobjlock);
+		}
+		amap_unlock(amap);
+	} else {
+		mutex_exit(uobj->vmobjlock);
+	}
+	vm_map_unlock_read(map);
+	return error;
 }
+
+/*
+ * uvm_loan_resolve_orphan: update the state of the page after a possible
+ * ownership change
+ *
+ * if page is owned by an anon but PQ_ANON is not set, the page was loaned
+ * to the anon from an object which dropped ownership, so resolve this by
+ * turning the anon's loan into real ownership (ie. decrement loan_count
+ * again and set PQ_ANON).
+ */
+
+void
+uvm_loan_resolve_orphan(struct vm_page *pg, bool pageqlocked)
+{
+	struct uvm_object * const uobj = pg->uobject;
+	struct vm_anon * const anon = pg->uanon;
+	struct uvm_cpu *ucpu;
+
+	KASSERT(!pageqlocked || mutex_owned(&uvm_pageqlock));
+	KASSERT(uvm_page_locked_p(pg));
+	if (uobj != NULL) {
+		return;
+	}
+	if (anon == NULL) {
+		return;
+	}
+	if ((pg->pqflags & PQ_ANON) != 0) {
+		return;
+	}
+	KASSERT(pg->loan_count > 0);
+	if (!pageqlocked) {
+		mutex_enter(&uvm_pageqlock);
+	}
+	pg->loan_count--;
+	pg->pqflags |= PQ_ANON;
+	if (!pageqlocked) {
+		mutex_exit(&uvm_pageqlock);
+	}
+
+	/*
+	 * adjust statistics after the owner change.
+	 *
+	 * the pagestate should have been decremented when uobj dropped the
+	 * ownership.
+	 */
+	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
+	ucpu = uvm_cpu_get();
+	ucpu->loan_resolve_orphan++;
+	ucpu->pagestate[1][UVM_PAGE_STATUS_DIRTY]++;
+	uvm_cpu_put(ucpu);
+	atomic_inc_uint(&uvmexp.anonpages);
+}
--- a/sys/uvm/uvm_loan.h	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_loan.h	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.h,v 1.17 2011/02/02 15:13:34 chuck Exp $	*/
+/*	$NetBSD: uvm_loan.h,v 1.17.4.1 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -49,7 +49,9 @@
 int uvm_loanuobjpages(struct uvm_object *, voff_t, int,
     struct vm_page **);
 struct vm_page *uvm_loanbreak(struct vm_page *);
-int uvm_loanbreak_anon(struct vm_anon *, struct uvm_object *);
+int uvm_loanbreak_anon(struct vm_anon *);
+int uvm_loanobj(struct uvm_object *, struct uio *);
+void uvm_loan_resolve_orphan(struct vm_page *, bool);
 
 #endif /* _KERNEL */
 
--- a/sys/uvm/uvm_map.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_map.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.305 2011/09/27 01:02:39 jym Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.305.2.1 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.305 2011/09/27 01:02:39 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.305.2.1 2011/12/26 16:03:11 yamt Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvmhist.h"
@@ -4948,11 +4948,13 @@
 	struct uvm_object *uobj;
 	struct vm_map_entry *next;
 	struct vm_map_entry *prev;
+	struct vm_amap *amap; /* neighbour's amap */
 	vsize_t size;
 	int merged = 0;
 	bool copying;
 	int newetype;
 
+	KASSERT(vm_map_locked_p(map));
 	if (VM_MAP_USE_KMAPENT(map)) {
 		return 0;
 	}
@@ -4969,11 +4971,12 @@
 	newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype;
 
 	next = entry->next;
+	amap = next->aref.ar_amap;
 	if (next != &map->header &&
 	    next->start == entry->end &&
-	    ((copying && next->aref.ar_amap != NULL &&
-	    amap_refs(next->aref.ar_amap) == 1) ||
-	    (!copying && next->aref.ar_amap == NULL)) &&
+	    ((copying && amap != NULL && amap_refs(amap) == 1 &&
+	      amap->am_obj_lock == NULL) ||
+	    (!copying && amap == NULL)) &&
 	    UVM_ET_ISCOMPATIBLE(next, newetype,
 	    uobj, entry->flags, entry->protection,
 	    entry->max_protection, entry->inheritance, entry->advice,
@@ -5008,10 +5011,11 @@
 	}
 
 	prev = entry->prev;
+	amap = prev->aref.ar_amap;
 	if (prev != &map->header &&
 	    prev->end == entry->start &&
-	    ((copying && !merged && prev->aref.ar_amap != NULL &&
-	    amap_refs(prev->aref.ar_amap) == 1) ||
+	    ((copying && !merged && amap != NULL && amap_refs(amap) == 1 &&
+	      amap->am_obj_lock == NULL) ||
 	    (!copying && prev->aref.ar_amap == NULL)) &&
 	    UVM_ET_ISCOMPATIBLE(prev, newetype,
 	    uobj, entry->flags, entry->protection,
@@ -5186,6 +5190,8 @@
 {
 
 	if (entry->aref.ar_amap != NULL) {
+		KASSERT(entry->aref.ar_amap->am_obj_lock == NULL ||
+		    !UVM_ET_ISOBJ(entry));
 		amap_lock(entry->aref.ar_amap);
 	}
 	if (UVM_ET_ISOBJ(entry)) {
--- a/sys/uvm/uvm_meter.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_meter.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_meter.c,v 1.56.4.5 2011/11/20 10:52:34 yamt Exp $	*/
+/*	$NetBSD: uvm_meter.c,v 1.56.4.6 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.56.4.5 2011/11/20 10:52:34 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.56.4.6 2011/12/26 16:03:11 yamt Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -197,6 +197,9 @@
 		u.loanbreak_obj += ucpu->loanbreak_obj;
 		u.loanfree_obj += ucpu->loanfree_obj;
 
+		u.loan_oa += ucpu->loan_oa;
+		u.unloan_oa += ucpu->unloan_oa;
+
 		u.loan_anon += ucpu->loan_anon;
 		u.unloan_anon += ucpu->unloan_anon;
 		u.loanbreak_anon += ucpu->loanbreak_anon;
@@ -204,6 +207,18 @@
 
 		u.loan_zero += ucpu->loan_zero;
 		u.unloan_zero += ucpu->unloan_zero;
+
+		u.loanbreak_orphaned += ucpu->loanbreak_orphaned;
+		u.loanfree_orphaned += ucpu->loanfree_orphaned;
+		u.loanbreak_orphaned_anon += ucpu->loanbreak_orphaned_anon;
+		u.loanfree_orphaned_anon += ucpu->loanfree_orphaned_anon;
+
+		u.loanbreak_oa_obj += ucpu->loanbreak_oa_obj;
+		u.loanfree_oa_obj += ucpu->loanfree_oa_obj;
+		u.loanbreak_oa_anon += ucpu->loanbreak_oa_anon;
+		u.loanfree_oa_anon += ucpu->loanfree_oa_anon;
+		u.loan_resolve_orphan += ucpu->loan_resolve_orphan;
+		u.loan_obj_read += ucpu->loan_obj_read;
 	}
 	node = *rnode;
 	node.sysctl_data = &u;
--- a/sys/uvm/uvm_page.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_page.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.c,v 1.178.2.8 2011/11/30 14:33:46 yamt Exp $	*/
+/*	$NetBSD: uvm_page.c,v 1.178.2.9 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.178.2.8 2011/11/30 14:33:46 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.178.2.9 2011/12/26 16:03:11 yamt Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvmhist.h"
@@ -1537,8 +1537,9 @@
 		 */
 
 		if (obj != NULL) {
+			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
 			uvm_pageremove(obj, pg);
-			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
+			pg->pqflags &= ~(PQ_FILE|PQ_AOBJ);
 		} else if (pg->uanon != NULL) {
 			if ((pg->pqflags & PQ_ANON) == 0) {
 				pg->loan_count--;
@@ -1561,6 +1562,7 @@
 #ifdef UVM_PAGE_TRKOWN
 		pg->owner_tag = NULL;
 #endif
+		KASSERT((pg->pqflags & PQ_STAT) == 0);
 		if (pg->loan_count) {
 			KASSERT(pg->uobject == NULL);
 			if (pg->uanon == NULL) {
@@ -1568,7 +1570,13 @@
 			}
 			ucpu = uvm_cpu_get();
 			if (obj != NULL) {
-				ucpu->loanfree_obj += pg->loan_count;
+				if (pg->uanon != NULL) {
+					ucpu->loanfree_oa_obj++;
+					ucpu->loanfree_orphaned +=
+					    pg->loan_count - 1;
+				} else {
+					ucpu->loanfree_obj += pg->loan_count;
+				}
 			} else {
 				ucpu->loanfree_anon += pg->loan_count;
 			}
--- a/sys/uvm/uvm_pdaemon.c	Tue Dec 20 13:46:17 2011 +0000
+++ b/sys/uvm/uvm_pdaemon.c	Mon Dec 26 16:03:10 2011 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdaemon.c,v 1.103.2.2 2011/11/18 00:57:34 yamt Exp $	*/
+/*	$NetBSD: uvm_pdaemon.c,v 1.103.2.3 2011/12/26 16:03:11 yamt Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.103.2.2 2011/11/18 00:57:34 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.103.2.3 2011/12/26 16:03:11 yamt Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
@@ -415,7 +415,6 @@
 kmutex_t *
 uvmpd_trylockowner(struct vm_page *pg)
 {
-	struct uvm_object *uobj = pg->uobject;
 	kmutex_t *lock;
 
 	KASSERT(mutex_owned(&uvm_pageqlock));
@@ -424,20 +423,7 @@
 	if (!mutex_tryenter(lock)) {
 		return NULL;
 	}
-	if (uobj == NULL) {
-
-		/*
-		 * set PQ_ANON if it isn't set already.
-		 */
-
-		if ((pg->pqflags & PQ_ANON) == 0) {
-			KASSERT(pg->loan_count > 0);
-			pg->loan_count--;
-			pg->pqflags |= PQ_ANON;
-			/* anon now owns it */
-		}
-	}
-
+	uvm_loan_resolve_orphan(pg, true);
 	return lock;
 }