stop using alternate pde mapping in xen pmap trunk
authorcherry <cherry@NetBSD.org>
Sat, 28 Jan 2012 07:19:17 +0000
branchtrunk
changeset 208555 3d3108921393
parent 208554 375b6ffe6b75
child 208556 965ff98c3c9d
stop using alternate pde mapping in xen pmap
sys/arch/x86/include/cpu.h
sys/arch/x86/x86/pmap.c
sys/arch/xen/x86/cpu.c
sys/arch/xen/x86/xen_pmap.c
--- a/sys/arch/x86/include/cpu.h	Sat Jan 28 05:32:49 2012 +0000
+++ b/sys/arch/x86/include/cpu.h	Sat Jan 28 07:19:17 2012 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.45 2011/12/30 17:57:49 cherry Exp $	*/
+/*	$NetBSD: cpu.h,v 1.46 2012/01/28 07:19:17 cherry Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -184,13 +184,14 @@
 #if defined(XEN) && (defined(PAE) || defined(__x86_64__))
 	/* Currently active user PGD (can't use rcr3() with Xen) */
 	pd_entry_t *	ci_kpm_pdir;	/* per-cpu PMD (va) */
-	paddr_t		ci_kpm_pdirpa; /* per-cpu PMD (pa) */
+	paddr_t		ci_kpm_pdirpa;  /* per-cpu PMD (pa) */
 #if defined(__x86_64__)
+	/* per-cpu version of normal_pdes */
+	pd_entry_t *	ci_normal_pdes[3]; /* Ok to hardcode. only for x86_64 && XEN */
 	paddr_t		ci_xen_current_user_pgd;
 #endif /* __x86_64__ */
 #endif /* XEN et.al */
 
-
 	char *ci_doubleflt_stack;
 	char *ci_ddbipi_stack;
 
--- a/sys/arch/x86/x86/pmap.c	Sat Jan 28 05:32:49 2012 +0000
+++ b/sys/arch/x86/x86/pmap.c	Sat Jan 28 07:19:17 2012 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.155 2012/01/27 19:48:39 para Exp $	*/
+/*	$NetBSD: pmap.c,v 1.156 2012/01/28 07:19:17 cherry Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.155 2012/01/27 19:48:39 para Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.156 2012/01/28 07:19:17 cherry Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -712,8 +712,6 @@
 	atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
 }
 
-#ifndef XEN
-
 /*
  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
  *
@@ -797,7 +795,13 @@
 	pmap->pm_ncsw = l->l_ncsw;
 	*pmap2 = curpmap;
 	*ptepp = PTE_BASE;
+#ifdef XEN
+	KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE);
+	ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir;
+	*pdeppp = ci->ci_normal_pdes;
+#else /* XEN */
 	*pdeppp = normal_pdes;
+#endif /* XEN */
 }
 
 /*
@@ -817,6 +821,12 @@
 		return;
 	}
 
+	ci = curcpu();
+#if defined(XEN) && defined(__x86_64__)
+	/* Reset per-cpu normal_pdes */
+	KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE);
+	ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE;
+#endif /* XEN && __x86_64__ */
 	/*
 	 * We cannot tolerate context switches while mapped in.
 	 * If it is our own pmap all we have to do is unlock.
@@ -832,7 +842,6 @@
 	 * Mark whatever's on the CPU now as lazy and unlock.
 	 * If the pmap was already installed, we are done.
 	 */
-	ci = curcpu();
 	ci->ci_tlbstate = TLBSTATE_LAZY;
 	ci->ci_want_pmapload = (mypmap != pmap_kernel());
 	mutex_exit(pmap->pm_lock);
@@ -848,7 +857,6 @@
 	pmap_destroy(pmap2);
 }
 
-#endif
 
 inline static void
 pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
@@ -2329,19 +2337,6 @@
 	/*
 	 * reference count is zero, free pmap resources and then free pmap.
 	 */
-#ifdef XEN
-	/*
-	 * Xen lazy APDP handling:
-	 * clear APDP_PDE if pmap is the currently mapped
-	 */
-	if (xpmap_ptom_masked(pmap_pdirpa(pmap, 0)) == (*APDP_PDE & PG_FRAME)) {
-		kpreempt_disable();
-		pmap_unmap_apdp();
-		pmap_pte_flush();
-	        pmap_apte_flush(pmap_kernel());
-	        kpreempt_enable();
-	}
-#endif
 
 	/*
 	 * remove it from global list of pmaps
@@ -2760,17 +2755,6 @@
 #endif
 
 #ifdef i386
-#ifdef XEN
-	/*
-	 * clear APDP slot, in case it points to a page table that has 
-	 * been freed
-	 */
-	if (*APDP_PDE) {
-		pmap_unmap_apdp();
-	}
-	/* lldt() does pmap_pte_flush() */
-#endif /* XEN */
-
 #ifndef XEN
 	ci->ci_tss.tss_ldt = pmap->pm_ldt_sel;
 	ci->ci_tss.tss_cr3 = pcb->pcb_cr3;
@@ -3933,8 +3917,8 @@
 	KASSERT(pmap_initialized);
 	KASSERT(curlwp->l_md.md_gc_pmap != pmap);
 	KASSERT(va < VM_MAX_KERNEL_ADDRESS);
-	KASSERTMSG(va != (vaddr_t)PDP_BASE && va != (vaddr_t)APDP_BASE,
-	    "pmap_enter: trying to map over PDP/APDP!");
+	KASSERTMSG(va != (vaddr_t)PDP_BASE,
+	    "pmap_enter: trying to map over PDP!");
 	KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS ||
 	    pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]),
 	    "pmap_enter: missing kernel PTP for VA %lx!", va);
--- a/sys/arch/xen/x86/cpu.c	Sat Jan 28 05:32:49 2012 +0000
+++ b/sys/arch/xen/x86/cpu.c	Sat Jan 28 07:19:17 2012 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.77 2012/01/09 04:39:14 cherry Exp $	*/
+/*	$NetBSD: cpu.c,v 1.78 2012/01/28 07:19:17 cherry Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.77 2012/01/09 04:39:14 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.78 2012/01/28 07:19:17 cherry Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -460,7 +460,7 @@
 		cpu_intr_init(ci);
 		cpu_get_tsc_freq(ci);
 		cpu_init(ci);
-		pmap_cpu_init_late(ci); /* XXX: cosmetic */
+		pmap_cpu_init_late(ci);
 
 		/* Every processor needs to init it's own ipi h/w (similar to lapic) */
 		xen_ipi_init();
@@ -1265,6 +1265,15 @@
 	 * MD startup.
 	 */
 
+#if defined(__x86_64__)
+	/* Setup per-cpu normal_pdes */
+	int i;
+	extern pd_entry_t * const normal_pdes[];
+	for (i = 0;i < PTP_LEVELS - 1;i++) {
+		ci->ci_normal_pdes[i] = normal_pdes[i];
+	}
+#endif /* __x86_64__ */
+
 	if (ci == &cpu_info_primary)
 		return;
 
@@ -1326,7 +1335,7 @@
 
 #elif defined(__x86_64__)	
 	xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa));
-#endif /* PAE */
+#endif /* PAE , __x86_64__ */
 #endif /* defined(PAE) || defined(__x86_64__) */
 }
 
--- a/sys/arch/xen/x86/xen_pmap.c	Sat Jan 28 05:32:49 2012 +0000
+++ b/sys/arch/xen/x86/xen_pmap.c	Sat Jan 28 07:19:17 2012 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_pmap.c,v 1.15 2012/01/22 18:16:34 cherry Exp $	*/
+/*	$NetBSD: xen_pmap.c,v 1.16 2012/01/28 07:19:17 cherry Exp $	*/
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.15 2012/01/22 18:16:34 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.16 2012/01/28 07:19:17 cherry Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -143,207 +143,11 @@
 
 #define COUNT(x)	/* nothing */
 
-static pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER;
 extern pd_entry_t * const normal_pdes[];
 
 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
 extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
 
-void
-pmap_apte_flush(struct pmap *pmap)
-{
-
-	KASSERT(kpreempt_disabled());
-
-	/*
-	 * Flush the APTE mapping from all other CPUs that
-	 * are using the pmap we are using (who's APTE space
-	 * is the one we've just modified).
-	 *
-	 * XXXthorpej -- find a way to defer the IPI.
-	 */
-	pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_APTE);
-	pmap_tlb_shootnow();
-}
-
-/*
- * Unmap the content of APDP PDEs
- */
-void
-pmap_unmap_apdp(void)
-{
-	int i;
-
-	for (i = 0; i < PDP_SIZE; i++) {
-		pmap_pte_set(APDP_PDE+i, 0);
-#if defined (PAE)
-		/*
-		 * For PAE, there are two places where alternative recursive
-		 * mappings could be found with Xen:
-		 * - in the L2 shadow pages
-		 * - the "real" L2 kernel page (pmap_kl2pd), which is unique
-		 * and static.
-		 * We first clear the APDP for the current pmap. As L2 kernel
-		 * page is unique, we only need to do it once for all pmaps.
-		 */
-		pmap_pte_set(APDP_PDE_SHADOW+i, 0);
-#endif
-	}
-}
-
-/*
- * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
- *
- * => we lock enough pmaps to keep things locked in
- * => must be undone with pmap_unmap_ptes before returning
- */
-
-void
-pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
-	      pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
-{
-	pd_entry_t opde, npde;
-	struct pmap *ourpmap;
-	struct cpu_info *ci;
-	struct lwp *l;
-	bool iscurrent;
-	uint64_t ncsw;
-	int s;
-
-	/* the kernel's pmap is always accessible */
-	if (pmap == pmap_kernel()) {
-		*pmap2 = NULL;
-		*ptepp = PTE_BASE;
-		*pdeppp = normal_pdes;
-		return;
-	}
-	KASSERT(kpreempt_disabled());
-
- retry:
-	l = curlwp;
-	ncsw = l->l_ncsw;
- 	ourpmap = NULL;
-	ci = curcpu();
-#if defined(__x86_64__)
-	/*
-	 * curmap can only be pmap_kernel so at this point
-	 * pmap_is_curpmap is always false
-	 */
-	iscurrent = 0;
-	ourpmap = pmap_kernel();
-#else /* __x86_64__*/
-	if (ci->ci_want_pmapload &&
-	    vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
-		pmap_load();
-		if (l->l_ncsw != ncsw)
-			goto retry;
-	}
-	iscurrent = pmap_is_curpmap(pmap);
-	/* if curpmap then we are always mapped */
-	if (iscurrent) {
-		mutex_enter(pmap->pm_lock);
-		*pmap2 = NULL;
-		*ptepp = PTE_BASE;
-		*pdeppp = normal_pdes;
-		goto out;
-	}
-	ourpmap = ci->ci_pmap;
-#endif /* __x86_64__ */
-
-	/* need to lock both curpmap and pmap: use ordered locking */
-	pmap_reference(ourpmap);
-	if ((uintptr_t) pmap < (uintptr_t) ourpmap) {
-		mutex_enter(pmap->pm_lock);
-		mutex_enter(ourpmap->pm_lock);
-	} else {
-		mutex_enter(ourpmap->pm_lock);
-		mutex_enter(pmap->pm_lock);
-	}
-
-	if (l->l_ncsw != ncsw)
-		goto unlock_and_retry;
-
-	/* need to load a new alternate pt space into curpmap? */
-	COUNT(apdp_pde_map);
-	opde = *APDP_PDE;
-	if (!pmap_valid_entry(opde) ||
-	    pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) {
-		int i;
-		s = splvm();
-		/* Make recursive entry usable in user PGD */
-		for (i = 0; i < PDP_SIZE; i++) {
-			npde = pmap_pa2pte(
-			    pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V;
-			xpq_queue_pte_update(
-			    xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)),
-			    npde);
-			xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]),
-			    npde);
-#ifdef PAE
-			/* update shadow entry too */
-			xpq_queue_pte_update(
-			    xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde);
-#endif /* PAE */
-			xpq_queue_invlpg(
-			    (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]);
-		}
-		if (pmap_valid_entry(opde))
-			pmap_apte_flush(ourpmap);
-		splx(s);
-	}
-	*pmap2 = ourpmap;
-	*ptepp = APTE_BASE;
-	*pdeppp = alternate_pdes;
-	KASSERT(l->l_ncsw == ncsw);
-#if !defined(__x86_64__)
- out:
-#endif
- 	/*
- 	 * might have blocked, need to retry?
- 	 */
-	if (l->l_ncsw != ncsw) {
- unlock_and_retry:
-	    	if (ourpmap != NULL) {
-			mutex_exit(ourpmap->pm_lock);
-			pmap_destroy(ourpmap);
-		}
-		mutex_exit(pmap->pm_lock);
-		goto retry;
-	}
-}
-
-/*
- * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
- */
-
-void
-pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
-{
-
-	if (pmap == pmap_kernel()) {
-		return;
-	}
-	KASSERT(kpreempt_disabled());
-	if (pmap2 == NULL) {
-		mutex_exit(pmap->pm_lock);
-	} else {
-#if defined(__x86_64__)
-		KASSERT(pmap2 == pmap_kernel());
-#else
-		KASSERT(curcpu()->ci_pmap == pmap2);
-#endif
-#if defined(MULTIPROCESSOR)
-		pmap_unmap_apdp();
-		pmap_pte_flush();
-		pmap_apte_flush(pmap2);
-#endif /* MULTIPROCESSOR */
-		COUNT(apdp_pde_unmap);
-		mutex_exit(pmap->pm_lock);
-		mutex_exit(pmap2->pm_lock);
-		pmap_destroy(pmap2);
-	}
-}
-
 int
 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
 {
@@ -439,33 +243,10 @@
 void
 pmap_xen_suspend(void)
 {
-	int i;
 	int s;
-	struct pmap *pm;
 
 	s = splvm();
-
-	pmap_unmap_apdp();
-
-	mutex_enter(&pmaps_lock);
-	/*
-	 * Set APDP entries to 0 in all pmaps.
-	 * Note that for PAE kernels, this only clears the APDP entries
-	 * found in the L2 shadow pages, as pmap_pdirpa() is used to obtain
-	 * the PA of the pmap->pm_pdir[] pages (forming the 4 contiguous
-	 * pages of PAE PD: 3 for user space, 1 for the L2 kernel shadow page)
-	 */
-	LIST_FOREACH(pm, &pmaps, pm_list) {
-		for (i = 0; i < PDP_SIZE; i++) {
-			xpq_queue_pte_update(
-			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_APTE + i)),
-			    0);
-		}
-	}
-	mutex_exit(&pmaps_lock);
-
 	xpq_flush_queue();
-
 	splx(s);
 
 #ifdef PAE