Use the MI "pcu" framework for bookkeeping of npx/fpu states on x86. trunk
authordrochner <drochner@NetBSD.org>
Wed, 23 Oct 2013 20:18:50 +0000
branchtrunk
changeset 221931 11c12b4986f5
parent 221930 d4245aed28e2
child 221932 31d8ee858e99
Use the MI "pcu" framework for bookkeeping of npx/fpu states on x86. This reduces the amount of MD code enormously, and makes it easier to implement support for newer CPU features which require more fpu state, or for fpu usage by the kernel. For access to FPU state across CPUs, an xcall kthread is used now rather than a dedicated IPI. No user visible changes intended.
sys/arch/amd64/amd64/fpu.c
sys/arch/amd64/amd64/genassym.cf
sys/arch/amd64/amd64/locore.S
sys/arch/amd64/amd64/machdep.c
sys/arch/amd64/amd64/netbsd32_machdep.c
sys/arch/amd64/amd64/process_machdep.c
sys/arch/amd64/include/fpu.h
sys/arch/amd64/include/pcb.h
sys/arch/amd64/include/proc.h
sys/arch/amd64/include/types.h
sys/arch/i386/i386/compat_16_machdep.c
sys/arch/i386/i386/genassym.cf
sys/arch/i386/i386/locore.S
sys/arch/i386/i386/machdep.c
sys/arch/i386/i386/process_machdep.c
sys/arch/i386/include/pcb.h
sys/arch/i386/include/proc.h
sys/arch/i386/include/types.h
sys/arch/i386/isa/npx.c
sys/arch/x86/acpi/acpi_wakeup.c
sys/arch/x86/include/cpu.h
sys/arch/x86/include/intrdefs.h
sys/arch/x86/x86/cpu.c
sys/arch/x86/x86/ipi.c
sys/arch/x86/x86/vm_machdep.c
sys/arch/xen/x86/cpu.c
sys/arch/xen/x86/xen_ipi.c
sys/compat/linux/arch/amd64/linux_machdep.c
sys/compat/linux/arch/i386/linux_machdep.c
sys/compat/linux32/arch/amd64/linux32_machdep.c
--- a/sys/arch/amd64/amd64/fpu.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/fpu.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.40 2012/12/31 16:20:17 dsl Exp $	*/
+/*	$NetBSD: fpu.c,v 1.41 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
@@ -100,7 +100,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.40 2012/12/31 16:20:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.41 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -178,7 +178,7 @@
 void
 fputrap(struct trapframe *frame)
 {
-	register struct lwp *l = curcpu()->ci_fpcurlwp;
+	register struct lwp *l = curlwp;
 	struct pcb *pcb = lwp_getpcb(l);
 	struct savefpu *sfp = &pcb->pcb_savefpu;
 	uint32_t mxcsr, statbits;
@@ -188,11 +188,6 @@
 	KPREEMPT_DISABLE(l);
 	x86_enable_intr();
 
-	/*
-	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
-	 * should be set, and we should have gotten a DNA exception.
-	 */
-	KASSERT(l == curlwp);
 	fxsave(sfp);
 	pcb->pcb_savefpu_i387.fp_ex_tw = sfp->fp_fxsave.fx_ftw;
 	pcb->pcb_savefpu_i387.fp_ex_sw = sfp->fp_fxsave.fx_fsw;
@@ -249,70 +244,35 @@
  * Otherwise, we save the previous state, if necessary, and restore
  * our last saved state.
  */
+
+extern const pcu_ops_t fpu_ops;
+
 void
 fpudna(struct cpu_info *ci)
 {
+
+	pcu_load(&fpu_ops);
+}
+
+
+static void
+fpu_state_load(struct lwp *l, u_int flags)
+{
 	uint16_t cw;
 	uint32_t mxcsr;
-	struct lwp *l, *fl;
-	struct pcb *pcb;
-	int s;
-
-	if (ci->ci_fpsaving) {
-		/* Recursive trap. */
-		x86_enable_intr();
-		return;
-	}
-
-	/* Lock out IPIs and disable preemption. */
-	s = splhigh();
-	x86_enable_intr();
+	struct pcb * const pcb = lwp_getpcb(l);
 
-	/* Save state on current CPU. */
-	l = ci->ci_curlwp;
-	pcb = lwp_getpcb(l);
-	fl = ci->ci_fpcurlwp;
-	if (fl != NULL) {
-		/*
-		 * It seems we can get here on Xen even if we didn't
-		 * switch lwp.  In this case do nothing
-		 */
-		if (fl == l) {
-			KASSERT(pcb->pcb_fpcpu == ci);
-			clts();
-			splx(s);
-			return;
-		}
-		KASSERT(fl != l);
-		fpusave_cpu(true);
-		KASSERT(ci->ci_fpcurlwp == NULL);
-	}
+	clts();
+	pcb->pcb_cr0 &= ~CR0_TS;
+	if (!(flags & PCU_RELOAD))
+		return;
 
-	/* Save our state if on a remote CPU. */
-	if (pcb->pcb_fpcpu != NULL) {
-		/* Explicitly disable preemption before dropping spl. */
-		KPREEMPT_DISABLE(l);
-		splx(s);
-		fpusave_lwp(l, true);
-		KASSERT(pcb->pcb_fpcpu == NULL);
-		s = splhigh();
-		KPREEMPT_ENABLE(l);
-	}
-
-	/*
-	 * Restore state on this CPU, or initialize.  Ensure that
-	 * the entire update is atomic with respect to FPU-sync IPIs.
-	 */
-	clts();
-	ci->ci_fpcurlwp = l;
-	pcb->pcb_fpcpu = ci;
-	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
+	if (!(flags & PCU_LOADED)) {
 		fninit();
 		cw = pcb->pcb_savefpu.fp_fxsave.fx_fcw;
 		fldcw(&cw);
 		mxcsr = pcb->pcb_savefpu.fp_fxsave.fx_mxcsr;
 		x86_ldmxcsr(&mxcsr);
-		l->l_md.md_flags |= MDL_USEDFPU;
 	} else {
 		/*
 		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
@@ -336,97 +296,43 @@
 		fldummy(&zero);
 		fxrstor(&pcb->pcb_savefpu);
 	}
-
-	KASSERT(ci == curcpu());
-	splx(s);
-}
-
-/*
- * Save current CPU's FPU state.  Must be called at IPL_HIGH.
- */
-void
-fpusave_cpu(bool save)
-{
-	struct cpu_info *ci;
-	struct pcb *pcb;
-	struct lwp *l;
-
-	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
-
-	ci = curcpu();
-	l = ci->ci_fpcurlwp;
-	if (l == NULL) {
-		return;
-	}
-	pcb = lwp_getpcb(l);
-
-	if (save) {
-		 /*
-		  * Set ci->ci_fpsaving, so that any pending exception will
-		  * be thrown away.  It will be caught again if/when the
-		  * FPU state is restored.
-		  */
-		KASSERT(ci->ci_fpsaving == 0);
-		clts();
-		ci->ci_fpsaving = 1;
-		fxsave(&pcb->pcb_savefpu);
-		ci->ci_fpsaving = 0;
-	}
-
-	stts();
-	pcb->pcb_fpcpu = NULL;
-	ci->ci_fpcurlwp = NULL;
 }
 
-/*
- * Save l's FPU state, which may be on this processor or another processor.
- * It may take some time, so we avoid disabling preemption where possible.
- * Caller must know that the target LWP is stopped, otherwise this routine
- * may race against it.
- */
-void
-fpusave_lwp(struct lwp *l, bool save)
+static void
+fpu_state_save(struct lwp *l, u_int flags)
 {
-	struct cpu_info *oci;
-	struct pcb *pcb;
-	int s, spins, ticks;
+	struct cpu_info *ci;
+	struct pcb * const pcb = lwp_getpcb(l);
+
+	ci = curcpu();
+	/*
+	 * Set ci->ci_fpsaving, so that any pending exception will
+	 * be thrown away.  It will be caught again if/when the
+	 * FPU state is restored.
+	 */
+	KASSERT(ci->ci_fpsaving == 0);
+	clts();
+	ci->ci_fpsaving = 1;
+	fxsave(&pcb->pcb_savefpu);
+	ci->ci_fpsaving = 0;
+}
 
-	spins = 0;
-	ticks = hardclock_ticks;
-	for (;;) {
-		s = splhigh();
-		pcb = lwp_getpcb(l);
-		oci = pcb->pcb_fpcpu;
-		if (oci == NULL) {
-			splx(s);
-			break;
-		}
-		if (oci == curcpu()) {
-			KASSERT(oci->ci_fpcurlwp == l);
-			fpusave_cpu(save);
-			splx(s);
-			break;
-		}
-		splx(s);
-#ifdef XEN
-		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
-			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
-			    cpu_name(oci));
-		}
-#else /* XEN */
-		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
-#endif
-		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
-			x86_pause();
-			spins++;
-		}
-		if (spins > 100000000) {
-			panic("fpusave_lwp: did not");
-		}
-	}
+static void
+fpu_state_release(struct lwp *l, u_int flags)
+{
+	struct pcb * const pcb = lwp_getpcb(l);
+
+	stts();
+	pcb->pcb_cr0 |= CR0_TS;
+}
 
-	if (!save) {
-		/* Ensure we restart with a clean slate. */
-	 	l->l_md.md_flags &= ~MDL_USEDFPU;
-	}
-}
+const pcu_ops_t fpu_ops = {
+	.pcu_id = PCU_FPU,
+	.pcu_state_load = fpu_state_load,
+	.pcu_state_save = fpu_state_save,
+	.pcu_state_release = fpu_state_release,
+};
+
+const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
+	[PCU_FPU] = &fpu_ops,
+};
--- a/sys/arch/amd64/amd64/genassym.cf	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.52 2012/07/15 15:17:56 dsl Exp $
+#	$NetBSD: genassym.cf,v 1.53 2013/10/23 20:18:50 drochner Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -185,7 +185,6 @@
 define	PCB_RSP0		offsetof(struct pcb, pcb_rsp0)
 define	PCB_CR0			offsetof(struct pcb, pcb_cr0)
 define	PCB_ONFAULT		offsetof(struct pcb, pcb_onfault)
-define	PCB_FPCPU		offsetof(struct pcb, pcb_fpcpu)
 define	PCB_FLAGS		offsetof(struct pcb, pcb_flags)
 define	PCB_COMPAT32		PCB_COMPAT32
 define	PCB_FS			offsetof(struct pcb, pcb_fs)
@@ -238,7 +237,6 @@
 define	CPU_INFO_NTRAP		offsetof(struct cpu_info, ci_data.cpu_ntrap)
 define	CPU_INFO_NINTR		offsetof(struct cpu_info, ci_data.cpu_nintr)
 define	CPU_INFO_CURPRIORITY	offsetof(struct cpu_info, ci_schedstate.spc_curpriority)
-define	CPU_INFO_FPCURLWP	offsetof(struct cpu_info, ci_fpcurlwp)
 
 define	CPU_INFO_GDT		offsetof(struct cpu_info, ci_gdt)
 define	CPU_INFO_IPENDING	offsetof(struct cpu_info, ci_ipending)
--- a/sys/arch/amd64/amd64/locore.S	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/locore.S	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.73 2013/06/23 09:00:37 uebayasi Exp $	*/
+/*	$NetBSD: locore.S,v 1.74 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1023,20 +1023,11 @@
 	 */
 2:
 #ifndef XEN
-	movl	$IPL_HIGH,CPUVAR(ILEVEL)
-	movl	PCB_CR0(%r14),%ecx	/* has CR0_TS clear */
+	movl	PCB_CR0(%r14),%ecx
 	movq	%cr0,%rdx
 
-	/*
-	 * If our floating point registers are on a different CPU,
-	 * set CR0_TS so we'll trap rather than reuse bogus state.
-	 */
-	cmpq	CPUVAR(FPCURLWP),%r12
-	je	3f
-	orq	$CR0_TS,%rcx
-
 	/* Reloading CR0 is very expensive - avoid if possible. */
-3:	cmpq	%rdx,%rcx
+	cmpq	%rdx,%rcx
 	je	6f
 	movq	%rcx,%cr0
 
--- a/sys/arch/amd64/amd64/machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.196 2013/10/17 20:57:58 christos Exp $	*/
+/*	$NetBSD: machdep.c,v 1.197 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.196 2013/10/17 20:57:58 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.197 2013/10/23 20:18:50 drochner Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -330,6 +330,8 @@
 
 static int valid_user_selector(struct lwp *, uint64_t);
 
+extern const pcu_ops_t fpu_ops;
+
 /*
  * Machine-dependent startup code
  */
@@ -422,23 +424,17 @@
 void
 x86_64_tls_switch(struct lwp *l)
 {
-	struct cpu_info *ci = curcpu();
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf = l->l_md.md_regs;
 
 	/*
-	 * Raise the IPL to IPL_HIGH.
-	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
-	 * is deferred until mi_switch(), when cpu_switchto() returns.
-	 */
-	(void)splhigh();
-	/*
 	 * If our floating point registers are on a different CPU,
 	 * set CR0_TS so we'll trap rather than reuse bogus state.
 	 */
-	if (l != ci->ci_fpcurlwp) {
+	if (pcb->pcb_cr0 & CR0_TS)
 		HYPERVISOR_fpu_taskswitch(1);
-	}
+	else
+		HYPERVISOR_fpu_taskswitch(0);
 
 	/* Update TLS segment pointers */
 	if (pcb->pcb_flags & PCB_COMPAT32) {
@@ -471,7 +467,7 @@
 	pcb->pcb_iopl = SEL_KPL;
 
 	pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
-	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
+	pcb->pcb_cr0 = rcr0();
 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1;
 
 #if !defined(XEN)
@@ -548,8 +544,7 @@
 	tf->tf_rsp = (uint64_t)f;
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
 
-	/* Ensure FP state is reset, if FP is used. */
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard_all(l);
 }
 
 void
@@ -596,7 +591,7 @@
 	/*
 	 * Don't bother copying out FP state if there is none.
 	 */
-	if (l->l_md.md_flags & MDL_USEDFPU)
+	if (pcu_used_p(&fpu_ops))
 		tocopy = sizeof (struct sigframe_siginfo);
 	else
 		tocopy = sizeof (struct sigframe_siginfo) -
@@ -1322,16 +1317,11 @@
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL) {
-		fpusave_lwp(l, false);
-	}
-
 #ifdef USER_LDT
 	pmap_ldt_cleanup(l);
 #endif
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard(&fpu_ops, false);
 	pcb->pcb_flags = 0;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
 	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@@ -1923,12 +1913,10 @@
 	mcp->_mc_tlsbase = (uintptr_t)l->l_private;;
 	*flags |= _UC_TLSBASE;
 
-	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
+	if (pcu_used_p(&fpu_ops)) {
 		struct pcb *pcb = lwp_getpcb(l);
 
-		if (pcb->pcb_fpcpu) {
-			fpusave_lwp(l, true);
-		}
+		pcu_save(&fpu_ops);
 		memcpy(mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
 		    sizeof (mcp->__fpregs));
 		*flags |= _UC_FPU;
@@ -1981,14 +1969,12 @@
 		l->l_md.md_flags |= MDL_IRET;
 	}
 
-	if (pcb->pcb_fpcpu != NULL)
-		fpusave_lwp(l, false);
-
 	if ((flags & _UC_FPU) != 0) {
+		pcu_discard(&fpu_ops, true);
 		memcpy(&pcb->pcb_savefpu.fp_fxsave, mcp->__fpregs,
 		    sizeof (mcp->__fpregs));
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
+	} else
+		pcu_discard(&fpu_ops, false);
 
 	if ((flags & _UC_TLSBASE) != 0)
 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.81 2013/06/25 21:08:07 dsl Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.82 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.81 2013/06/25 21:08:07 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.82 2013/10/23 20:18:50 drochner Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -60,6 +60,7 @@
 #include <sys/ras.h>
 #include <sys/ptrace.h>
 #include <sys/kauth.h>
+#include <sys/pcu.h>
 
 #include <machine/fpu.h>
 #include <machine/frame.h>
@@ -93,6 +94,8 @@
 
 static int check_sigcontext32(struct lwp *, const struct netbsd32_sigcontext *);
 
+extern const pcu_ops_t fpu_ops;
+
 #ifdef EXEC_AOUT
 /*
  * There is no native a.out -- this function is required
@@ -128,18 +131,14 @@
 
 	pcb = lwp_getpcb(l);
 
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL) {
-		fpusave_lwp(l, false);
-	}
-
 #if defined(USER_LDT) && 0
 	pmap_ldt_cleanup(p);
 #endif
 
+	pcu_discard(&fpu_ops, false);
+
 	netbsd32_adjust_limits(p);
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Force iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
         pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
@@ -261,7 +260,7 @@
 	tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard_all(l); /* XXX no FP context saved, we'll lose */
 
 	tf->tf_rip = (uint64_t)catcher;
 	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL);
@@ -364,7 +363,7 @@
 	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard_all(l);
 
 	/* Remember that we're now on the signal stack. */
 	if (onstack)
@@ -888,14 +887,11 @@
 		/*
 		 * If we were using the FPU, forget that we were.
 		 */
-		if (pcb->pcb_fpcpu != NULL) {
-			fpusave_lwp(l, false);
-		}
+		pcu_discard(&fpu_ops, true);
 		memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs,
 		    sizeof (pcb->pcb_savefpu.fp_fxsave));
-		/* If not set already. */
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
+	} else
+		pcu_discard(&fpu_ops, false);
 
 	mutex_enter(p->p_lock);
 	if (flags & _UC_SETSTACK)
@@ -945,12 +941,10 @@
 	*flags |= _UC_TLSBASE;
 
 	/* Save floating point register context, if any. */
-	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
+	if (pcu_used_p(&fpu_ops)) {
 		struct pcb *pcb = lwp_getpcb(l);
 
-		if (pcb->pcb_fpcpu) {
-			fpusave_lwp(l, true);
-		}
+		pcu_save(&fpu_ops);
 		memcpy(&mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
 		    sizeof (pcb->pcb_savefpu.fp_fxsave));
 		*flags |= _UC_FPU;
--- a/sys/arch/amd64/amd64/process_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/amd64/process_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.21 2012/07/08 20:14:11 dsl Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.22 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -53,7 +53,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.21 2012/07/08 20:14:11 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.22 2013/10/23 20:18:50 drochner Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -62,6 +62,7 @@
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
+#include <sys/pcu.h>
 
 #include <machine/psl.h>
 #include <machine/reg.h>
@@ -75,6 +76,8 @@
 static inline int verr_ldt(struct pmap *, int sel);
 #endif
 
+extern const pcu_ops_t fpu_ops;
+
 static inline struct trapframe *
 process_frame(struct lwp *l)
 {
@@ -107,8 +110,8 @@
 {
 	struct fxsave64 *frame = process_fpframe(l);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
-		fpusave_lwp(l, true);
+	if (pcu_used_p(&fpu_ops)) {
+		pcu_save(&fpu_ops);
 	} else {
 		uint16_t cw;
 		uint32_t mxcsr, mxcsr_mask;
@@ -127,7 +130,6 @@
 		frame->fx_ftw = 0x00;	/* abridged tag; all empty */
 		frame->fx_mxcsr = mxcsr;
 		frame->fx_mxcsr_mask = mxcsr_mask;
-		l->l_md.md_flags |= MDL_USEDFPU;
 	}
 
 	memcpy(&regs->fxstate, frame, sizeof(*regs));
@@ -162,12 +164,7 @@
 {
 	struct fxsave64 *frame = process_fpframe(l);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
-		fpusave_lwp(l, false);
-	} else {
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
-
+	pcu_discard(&fpu_ops, true);
 	memcpy(frame, &regs->fxstate, sizeof(*regs));
 	return (0);
 }
--- a/sys/arch/amd64/include/fpu.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/include/fpu.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.h,v 1.7 2012/12/31 16:20:17 dsl Exp $	*/
+/*	$NetBSD: fpu.h,v 1.8 2013/10/23 20:18:50 drochner Exp $	*/
 
 #ifndef	_AMD64_FPU_H_
 #define	_AMD64_FPU_H_
@@ -77,12 +77,7 @@
 struct cpu_info;
 
 void fpuinit(struct cpu_info *);
-void fpudrop(void);
-void fpusave(struct lwp *);
-void fpudiscard(struct lwp *);
 void fputrap(struct trapframe *);
-void fpusave_lwp(struct lwp *, bool);
-void fpusave_cpu(bool);
 
 #endif
 
--- a/sys/arch/amd64/include/pcb.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/include/pcb.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.18 2012/12/31 16:20:17 dsl Exp $	*/
+/*	$NetBSD: pcb.h,v 1.19 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -97,7 +97,6 @@
 	struct	savefpu pcb_savefpu __aligned(16); /* floating point state */
 	uint32_t pcb_unused_1[4];	/* unused */
 	void     *pcb_onfault;		/* copyin/out fault recovery */
-	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	uint64_t  pcb_fs;
 	uint64_t  pcb_gs;
 	int pcb_iopl;
--- a/sys/arch/amd64/include/proc.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/include/proc.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.15 2012/07/15 15:17:56 dsl Exp $	*/
+/*	$NetBSD: proc.h,v 1.16 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*
  * Copyright (c) 1991 Regents of the University of California.
@@ -53,7 +53,6 @@
 	volatile int md_astpending;
 };
 
-#define	MDL_USEDFPU	0x0001	/* has used the FPU */
 #define	MDL_COMPAT32	0x0008	/* i386, always return via iret */
 #define	MDL_IRET	0x0010	/* force return via iret, not sysret */
 
--- a/sys/arch/amd64/include/types.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/amd64/include/types.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: types.h,v 1.41 2012/01/21 16:48:56 chs Exp $	*/
+/*	$NetBSD: types.h,v 1.42 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -79,7 +79,6 @@
 #define	__HAVE_NEW_STYLE_BUS_H
 #define	__HAVE_CPU_COUNTER
 #define	__HAVE_CPU_DATA_FIRST
-#define	__HAVE_MD_CPU_OFFLINE
 #define	__HAVE_SYSCALL_INTERN
 #define	__HAVE_MINIMAL_EMUL
 #define	__HAVE_ATOMIC64_OPS
@@ -103,6 +102,11 @@
 #endif
 #endif
 
+#if defined(_KERNEL) || defined(_KMEMUSER)
+#define	PCU_FPU		0	/* FPU */
+#define	PCU_UNIT_COUNT	1
+#endif
+
 #else	/*	!__x86_64__	*/
 
 #include <i386/types.h>
--- a/sys/arch/i386/i386/compat_16_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/i386/compat_16_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: compat_16_machdep.c,v 1.22 2012/04/22 20:36:52 christos Exp $	*/
+/*	$NetBSD: compat_16_machdep.c,v 1.23 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: compat_16_machdep.c,v 1.22 2012/04/22 20:36:52 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: compat_16_machdep.c,v 1.23 2013/10/23 20:18:50 drochner Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_vm86.h"
@@ -260,9 +260,7 @@
 		/* NOTREACHED */
 	}
 
-	int svufpu = l->l_md.md_flags & MDL_USEDFPU;
 	buildcontext(l, sel, catcher, fp);
-	l->l_md.md_flags |= svufpu;
 
 	/* Remember that we're now on the signal stack. */
 	if (onstack)
--- a/sys/arch/i386/i386/genassym.cf	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/i386/genassym.cf	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.94 2012/09/30 20:54:52 dsl Exp $
+#	$NetBSD: genassym.cf,v 1.95 2013/10/23 20:18:50 drochner Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -204,7 +204,6 @@
 define	PCB_ESP0		offsetof(struct pcb, pcb_esp0)
 define	PCB_CR0			offsetof(struct pcb, pcb_cr0)
 define	PCB_ONFAULT		offsetof(struct pcb, pcb_onfault)
-define	PCB_FPCPU		offsetof(struct pcb, pcb_fpcpu)
 define	PCB_FSD			offsetof(struct pcb, pcb_fsd)
 define	PCB_GSD			offsetof(struct pcb, pcb_gsd)
 define	PCB_IOMAP		offsetof(struct pcb, pcb_iomap)
@@ -263,7 +262,6 @@
 define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
 define	TLBSTATE_VALID		TLBSTATE_VALID
 define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
-define	CPU_INFO_FPCURLWP	offsetof(struct cpu_info, ci_fpcurlwp)
 define	CPU_INFO_CURLDT		offsetof(struct cpu_info, ci_curldt)
 define	CPU_INFO_IDLELWP	offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define	CPU_INFO_PMAP		offsetof(struct cpu_info, ci_pmap)
--- a/sys/arch/i386/i386/locore.S	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/i386/locore.S	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.106 2013/06/23 09:00:36 uebayasi Exp $	*/
+/*	$NetBSD: locore.S,v 1.107 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -129,7 +129,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.106 2013/06/23 09:00:36 uebayasi Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.107 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_compat_oldboot.h"
 #include "opt_ddb.h"
@@ -1077,20 +1077,11 @@
 	call	_C_LABEL(i386_tls_switch)
 	addl	$4,%esp
 #else /* !XEN */
-	movl	$IPL_HIGH,CPUVAR(ILEVEL)
-	movl	PCB_CR0(%ebx),%ecx		/* has CR0_TS clear */
+	movl	PCB_CR0(%ebx),%ecx
 	movl	%cr0,%edx
 
-	/*
-	 * If our floating point registers are on a different CPU,
-	 * set CR0_TS so we'll trap rather than reuse bogus state.
-	 */
-	cmpl	CPUVAR(FPCURLWP),%edi
-	je	3f
-	orl	$CR0_TS,%ecx
-
 	/* Reloading CR0 is very expensive - avoid if possible. */
-3:	cmpl	%edx,%ecx
+	cmpl	%edx,%ecx
 	je	4f
 	movl	%ecx,%cr0
 #endif /* !XEN */
--- a/sys/arch/i386/i386/machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/i386/machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.734 2013/04/12 16:59:38 christos Exp $	*/
+/*	$NetBSD: machdep.c,v 1.735 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.734 2013/04/12 16:59:38 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.735 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_beep.h"
 #include "opt_compat_ibcs2.h"
@@ -266,6 +266,7 @@
 void (*initclock_func)(void) = i8254_initclocks;
 #endif
 
+extern const pcu_ops_t fpu_ops;
 
 /*
  * Size of memory segments, before any memory is stolen.
@@ -498,7 +499,7 @@
 	struct pcb *pcb = lwp_getpcb(l);
 
 	pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
-	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
+	pcb->pcb_cr0 = rcr0();
 	pcb->pcb_esp0 = uvm_lwp_getuarea(l) + KSTACK_SIZE - 16;
 	pcb->pcb_iopl = SEL_KPL;
 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
@@ -548,21 +549,16 @@
 {
 	struct cpu_info *ci = curcpu();
 	struct pcb *pcb = lwp_getpcb(l);
-	/*
-         * Raise the IPL to IPL_HIGH.
-	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
-	 * is deferred until mi_switch(), when cpu_switchto() returns.
-	 */
-	(void)splhigh();
 
         /*
 	 * If our floating point registers are on a different CPU,
 	 * set CR0_TS so we'll trap rather than reuse bogus state.
 	 */
 
-	if (l != ci->ci_fpcurlwp) {
+	if (pcb->pcb_cr0 & CR0_TS) {
 		HYPERVISOR_fpu_taskswitch(1);
-	}
+	} else
+		HYPERVISOR_fpu_taskswitch(0);
 
 	/* Update TLS segment pointers */
 	update_descriptor(&ci->ci_gdt[GUFS_SEL],
@@ -672,7 +668,7 @@
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard_all(l);
 }
 
 void
@@ -867,18 +863,11 @@
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 
-#if NNPX > 0
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL) {
-		npxsave_lwp(l, false);
-	}
-#endif
-
 #ifdef USER_LDT
 	pmap_ldt_cleanup(l);
 #endif
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard(&fpu_ops, false);
 	if (i386_use_fxsave) {
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__;
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
@@ -1633,7 +1622,7 @@
 	*flags |= _UC_TLSBASE;
 
 	/* Save floating point register context, if any. */
-	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
+	if (pcu_used_p(&fpu_ops)) {
 		struct pcb *pcb = lwp_getpcb(l);
 #if NNPX > 0
 
@@ -1641,9 +1630,7 @@
 		 * If this process is the current FP owner, dump its
 		 * context to the PCB first.
 		 */
-		if (pcb->pcb_fpcpu) {
-			npxsave_lwp(l, true);
-		}
+		pcu_save(&fpu_ops);
 #endif
 		if (i386_use_fxsave) {
 			memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
@@ -1736,17 +1723,9 @@
 	if ((flags & _UC_TLSBASE) != 0)
 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
-#if NNPX > 0
-	/*
-	 * If we were using the FPU, forget that we were.
-	 */
-	if (pcb->pcb_fpcpu != NULL) {
-		npxsave_lwp(l, false);
-	}
-#endif
-
 	/* Restore floating point register context, if any. */
 	if ((flags & _UC_FPU) != 0) {
+		pcu_discard(&fpu_ops, true);
 		if (flags & _UC_FXSAVE) {
 			if (i386_use_fxsave) {
 				memcpy(
@@ -1770,8 +1749,8 @@
 				    sizeof (pcb->pcb_savefpu.sv_87));
 			}
 		}
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
+	} else
+		pcu_discard(&fpu_ops, false);
 	mutex_enter(p->p_lock);
 	if (flags & _UC_SETSTACK)
 		l->l_sigstk.ss_flags |= SS_ONSTACK;
--- a/sys/arch/i386/i386/process_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/i386/process_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.72 2009/11/21 03:11:00 rmind Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.73 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.72 2009/11/21 03:11:00 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.73 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_vm86.h"
 #include "opt_ptrace.h"
@@ -76,6 +76,8 @@
 #include <machine/vm86.h>
 #endif
 
+extern const pcu_ops_t fpu_ops;
+
 static inline struct trapframe *
 process_frame(struct lwp *l)
 {
@@ -246,9 +248,9 @@
 {
 	union savefpu *frame = process_fpframe(l);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
+	if (pcu_used_p(&fpu_ops)) {
 #if NNPX > 0
-		npxsave_lwp(l, true);
+		pcu_save(&fpu_ops);
 #endif
 	} else {
 		/*
@@ -274,7 +276,6 @@
 			frame->sv_87.sv_env.en_sw = 0x0000;
 			frame->sv_87.sv_env.en_tw = 0xffff;
 		}
-		l->l_md.md_flags |= MDL_USEDFPU;
 	}
 
 	if (i386_use_fxsave) {
@@ -349,13 +350,7 @@
 {
 	union savefpu *frame = process_fpframe(l);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
-#if NNPX > 0
-		npxsave_lwp(l, false);
-#endif
-	} else {
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
+	pcu_discard(&fpu_ops, true);
 
 	if (i386_use_fxsave) {
 		struct save87 s87;
@@ -400,13 +395,9 @@
 	if (i386_use_fxsave == 0)
 		return (EINVAL);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
+	if (pcu_used_p(&fpu_ops)) {
 #if NNPX > 0
-		struct pcb *pcb = lwp_getpcb(l);
-
-		if (pcb->pcb_fpcpu != NULL) {
-			npxsave_lwp(l, true);
-		}
+		pcu_save(&fpu_ops);
 #endif
 	} else {
 		/*
@@ -423,8 +414,6 @@
 		frame->sv_xmm.sv_env.en_mxcsr = mxcsr;
 		frame->sv_xmm.sv_env.en_sw = 0x0000;
 		frame->sv_xmm.sv_env.en_tw = 0x00;
-
-		l->l_md.md_flags |= MDL_USEDFPU;  
 	}
 
 	memcpy(regs, &frame->sv_xmm, sizeof(*regs));
@@ -439,18 +428,8 @@
 	if (i386_use_fxsave == 0)
 		return (EINVAL);
 
-	if (l->l_md.md_flags & MDL_USEDFPU) {
-#if NNPX > 0
-		struct pcb *pcb = lwp_getpcb(l);
-
-		/* If we were using the FPU, drop it. */
-		if (pcb->pcb_fpcpu != NULL) {
-			npxsave_lwp(l, false);
-		}
-#endif
-	} else {
-		l->l_md.md_flags |= MDL_USEDFPU;
-	}
+	pcu_save(&fpu_ops); /* keep i387 regs */
+	pcu_discard(&fpu_ops, true);
 
 	memcpy(&frame->sv_xmm, regs, sizeof(*regs));
 	return (0);
--- a/sys/arch/i386/include/pcb.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/include/pcb.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.48 2010/04/23 16:07:33 joerg Exp $	*/
+/*	$NetBSD: pcb.h,v 1.49 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2009 The NetBSD Foundation, Inc.
@@ -100,7 +100,6 @@
 	int	vm86_eflags;		/* virtual eflags for vm86 mode */
 	int	vm86_flagmask;		/* flag mask for vm86 mode */
 	void	*vm86_userp;		/* XXX performance hack */
-	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	char	*pcb_iomap;		/* I/O permission bitmap */
 };
 
--- a/sys/arch/i386/include/proc.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/include/proc.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.38 2011/01/14 02:06:26 rmind Exp $	*/
+/*	$NetBSD: proc.h,v 1.39 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*
  * Copyright (c) 1991 Regents of the University of California.
@@ -52,7 +52,6 @@
 };
 
 /* md_flags */
-#define	MDL_USEDFPU	0x0001	/* has used the FPU */
 #define	MDL_IOPL	0x0002	/* XEN: i/o privilege */
 
 struct mdproc {
--- a/sys/arch/i386/include/types.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/include/types.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: types.h,v 1.74 2011/07/06 18:46:04 dyoung Exp $	*/
+/*	$NetBSD: types.h,v 1.75 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -104,7 +104,6 @@
 #define	__HAVE_NEW_STYLE_BUS_H
 #define	__HAVE_CPU_DATA_FIRST
 #define	__HAVE_CPU_COUNTER
-#define	__HAVE_MD_CPU_OFFLINE
 #define	__HAVE_SYSCALL_INTERN
 #define	__HAVE_MINIMAL_EMUL
 #define	__HAVE_OLD_DISKLABEL
@@ -121,4 +120,9 @@
 #define	__HAVE_RAS
 #endif
 
+#if defined(_KERNEL) || defined(_KMEMUSER)
+#define PCU_FPU		0	/* FPU */
+#define PCU_UNIT_COUNT	1
+#endif
+
 #endif	/* _I386_MACHTYPES_H_ */
--- a/sys/arch/i386/isa/npx.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/i386/isa/npx.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npx.c,v 1.143 2011/08/11 18:36:13 cherry Exp $	*/
+/*	$NetBSD: npx.c,v 1.144 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -96,7 +96,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.143 2011/08/11 18:36:13 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.144 2013/10/23 20:18:50 drochner Exp $");
 
 #if 0
 #define IPRINTF(x)	printf x
@@ -374,7 +374,7 @@
 npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
-	struct lwp *l = ci->ci_fpcurlwp;
+	struct lwp *l = curlwp;
 	union savefpu *addr;
 	struct npx_softc *sc;
 	struct pcb *pcb;
@@ -541,73 +541,33 @@
 	return(0);
 }
 
+extern const pcu_ops_t fpu_ops;
+
 /*
  * Implement device not available (DNA) exception
- *
- * If we were the last lwp to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore
- * our last saved state.
  */
 static int
 npxdna(struct cpu_info *ci)
 {
-	struct lwp *l, *fl;
-	struct pcb *pcb;
-	int s;
 
-	if (ci->ci_fpsaving) {
-		/* Recursive trap. */
-		return 1;
-	}
-
-	/* Lock out IPIs and disable preemption. */
-	s = splhigh();
 #ifndef XEN
 	x86_enable_intr();
 #endif
-	/* Save state on current CPU. */
-	l = ci->ci_curlwp;
-	pcb = lwp_getpcb(l);
-
-	fl = ci->ci_fpcurlwp;
-	if (fl != NULL) {
-		/*
-		 * It seems we can get here on Xen even if we didn't
-		 * switch lwp.  In this case do nothing
-		 */
-		if (fl == l) {
-			KASSERT(pcb->pcb_fpcpu == ci);
-			ci->ci_fpused = 1;
-			clts();
-			splx(s);
-			return 1;
-		}
-		KASSERT(fl != l);
-		npxsave_cpu(true);
-		KASSERT(ci->ci_fpcurlwp == NULL);
-	}
+	pcu_load(&fpu_ops);
+	return 1;
+}
 
-	/* Save our state if on a remote CPU. */
-	if (pcb->pcb_fpcpu != NULL) {
-		/* Explicitly disable preemption before dropping spl. */
-		KPREEMPT_DISABLE(l);
-		splx(s);
-		npxsave_lwp(l, true);
-		KASSERT(pcb->pcb_fpcpu == NULL);
-		s = splhigh();
-		KPREEMPT_ENABLE(l);
-	}
+static void
+npx_state_load(struct lwp *l, u_int flags)
+{
+	struct pcb * const pcb = lwp_getpcb(l);
 
-	/*
-	 * Restore state on this CPU, or initialize.  Ensure that
-	 * the entire update is atomic with respect to FPU-sync IPIs.
-	 */
 	clts();
-	ci->ci_fpcurlwp = l;
-	pcb->pcb_fpcpu = ci;
-	ci->ci_fpused = 1;
+	pcb->pcb_cr0 &= ~CR0_TS;
+	if (!(flags & PCU_RELOAD))
+		return;
 
-	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
+	if (!(flags & PCU_LOADED)) {
 		fninit();
 		if (i386_use_fxsave) {
 			fldcw(&pcb->pcb_savefpu.
@@ -616,7 +576,6 @@
 			fldcw(&pcb->pcb_savefpu.
 			    sv_87.sv_env.en_cw);
 		}
-		l->l_md.md_flags |= MDL_USEDFPU;
 	} else if (i386_use_fxsave) {
 		/*
 		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
@@ -642,108 +601,52 @@
 	} else {
 		frstor(&pcb->pcb_savefpu.sv_87);
 	}
-
-	KASSERT(ci == curcpu());
-	splx(s);
-	return 1;
-}
-
-/*
- * Save current CPU's FPU state.  Must be called at IPL_HIGH.
- */
-void
-npxsave_cpu(bool save)
-{
-	struct cpu_info *ci;
-	struct lwp *l;
-	struct pcb *pcb;
-
-	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
-
-	ci = curcpu();
-	l = ci->ci_fpcurlwp;
-	if (l == NULL)
-		return;
-
-	pcb = lwp_getpcb(l);
-
-	if (save) {
-		 /*
-		  * Set ci->ci_fpsaving, so that any pending exception will
-		  * be thrown away.  It will be caught again if/when the
-		  * FPU state is restored.
-		  */
-		KASSERT(ci->ci_fpsaving == 0);
-		clts();
-		ci->ci_fpsaving = 1;
-		if (i386_use_fxsave) {
-			fxsave(&pcb->pcb_savefpu.sv_xmm);
-		} else {
-			fnsave(&pcb->pcb_savefpu.sv_87);
-		}
-		ci->ci_fpsaving = 0;
-	}
-
-	stts();
-	pcb->pcb_fpcpu = NULL;
-	ci->ci_fpcurlwp = NULL;
-	ci->ci_fpused = 1;
 }
 
-/*
- * Save l's FPU state, which may be on this processor or another processor.
- * It may take some time, so we avoid disabling preemption where possible.
- * Caller must know that the target LWP is stopped, otherwise this routine
- * may race against it.
- */
-void
-npxsave_lwp(struct lwp *l, bool save)
+static void
+npx_state_save(struct lwp *l, u_int flags)
 {
-	struct cpu_info *oci;
-	struct pcb *pcb;
-	int s, spins, ticks;
+	struct cpu_info *ci;
+	struct pcb * const pcb = lwp_getpcb(l);
+
+	ci = curcpu();
 
-	spins = 0;
-	ticks = hardclock_ticks;
-	for (;;) {
-		s = splhigh();
-		pcb = lwp_getpcb(l);
-		oci = pcb->pcb_fpcpu;
-		if (oci == NULL) {
-			splx(s);
-			break;
-		}
-		if (oci == curcpu()) {
-			KASSERT(oci->ci_fpcurlwp == l);
-			npxsave_cpu(save);
-			splx(s);
-			break;
-		}
-		splx(s);
-#ifdef XEN
-		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
-			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
-			    cpu_name(oci));
-		}
-#else /* XEN */
-		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
-#endif
-		while (pcb->pcb_fpcpu == oci &&
-		    ticks == hardclock_ticks) {
-			x86_pause();
-			spins++;
-		}
-		if (spins > 100000000) {
-			panic("npxsave_lwp: did not");
-		}
+	/*
+	 * Set ci->ci_fpsaving, so that any pending exception will
+	 * be thrown away.  It will be caught again if/when the
+	 * FPU state is restored.
+	 */
+	KASSERT(ci->ci_fpsaving == 0);
+	clts();
+	ci->ci_fpsaving = 1;
+	if (i386_use_fxsave) {
+		fxsave(&pcb->pcb_savefpu.sv_xmm);
+	} else {
+		fnsave(&pcb->pcb_savefpu.sv_87);
 	}
+	ci->ci_fpsaving = 0;
+}
 
-	if (!save) {
-		/* Ensure we restart with a clean slate. */
-	 	l->l_md.md_flags &= ~MDL_USEDFPU;
-	}
+static void
+npx_state_release(struct lwp *l, u_int flags)
+{
+	struct pcb * const pcb = lwp_getpcb(l);
+	
+	stts();
+	pcb->pcb_cr0 |= CR0_TS;
 }
 
+const pcu_ops_t fpu_ops = {
+	.pcu_id = PCU_FPU,
+	.pcu_state_load = npx_state_load,
+	.pcu_state_save = npx_state_save,
+	.pcu_state_release = npx_state_release,
+};
+
+const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
+	[PCU_FPU] = &fpu_ops,
+};
+
 /* 
  * The following mechanism is used to ensure that the FPE_... value
  * that is passed as a trapcode to the signal handler of the user
@@ -945,16 +848,20 @@
 npxtrap(struct lwp *l)
 {
 	u_short control, status;
+#if 0
 	struct cpu_info *ci = curcpu();
 	struct lwp *fl = ci->ci_fpcurlwp;
+#endif
+	struct pcb *pcb = lwp_getpcb(l);
 
 	if (!i386_fpu_present) {
-		printf("%s: fpcurthread = %p, curthread = %p, npx_type = %d\n",
-		    __func__, fl, l, npx_type);
+		printf("%s: curthread = %p, npx_type = %d\n",
+		    __func__, l, npx_type);
 		panic("npxtrap from nowhere");
 	}
 	kpreempt_disable();
 
+#if 0
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
@@ -971,6 +878,15 @@
 
 	if (fl == l)
 		fnclex();
+#else
+	if (i386_use_fxsave) {
+		fxsave(&pcb->pcb_savefpu.sv_xmm);
+	} else {
+		fnsave(&pcb->pcb_savefpu.sv_87);
+	}
+	control = GET_FPU_CW(pcb);
+	status = GET_FPU_SW(pcb);
+#endif
 	kpreempt_enable();
 	return fpetable[status & ((~control & 0x3f) | 0x40)];
 }
--- a/sys/arch/x86/acpi/acpi_wakeup.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/acpi/acpi_wakeup.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: acpi_wakeup.c,v 1.32 2012/08/26 01:04:03 jakllsch Exp $	*/
+/*	$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.32 2012/08/26 01:04:03 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $");
 
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.32 2012/08/26 01:04:03 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -309,11 +309,7 @@
 	AcpiSetFirmwareWakingVector(acpi_wakeup_paddr);
 
 	s = splhigh();
-#ifdef __i386__
-	npxsave_cpu(true);
-#else
-	fpusave_cpu(true);
-#endif
+	pcu_save_all_on_cpu();
 	x86_disable_intr();
 
 #ifdef MULTIPROCESSOR
--- a/sys/arch/x86/include/cpu.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/include/cpu.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.54 2013/10/17 20:59:16 christos Exp $	*/
+/*	$NetBSD: cpu.h,v 1.55 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -100,9 +100,7 @@
 	 */
 	struct cpu_info *ci_next;	/* next cpu */
 	struct lwp *ci_curlwp;		/* current owner of the processor */
-	struct lwp *ci_fpcurlwp;	/* current owner of the FPU */
 	int	ci_fpsaving;		/* save in progress */
-	int	ci_fpused;		/* XEN: FPU was used by curlwp */
 	cpuid_t ci_cpuid;		/* our CPU ID */
 	int	_unused;
 	uint32_t ci_acpiid;		/* our ACPI/MADT ID */
@@ -425,10 +423,6 @@
 
 void	cpu_probe_features(struct cpu_info *);
 
-/* npx.c */
-void	npxsave_lwp(struct lwp *, bool);
-void	npxsave_cpu(bool);
-
 /* vm_machdep.c */
 paddr_t	kvtop(void *);
 
--- a/sys/arch/x86/include/intrdefs.h	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/include/intrdefs.h	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: intrdefs.h,v 1.17 2011/11/06 11:40:47 cherry Exp $	*/
+/*	$NetBSD: intrdefs.h,v 1.18 2013/10/23 20:18:50 drochner Exp $	*/
 
 #ifndef _X86_INTRDEFS_H_
 #define _X86_INTRDEFS_H_
@@ -61,7 +61,7 @@
 #define X86_IPI_HALT			0x00000001
 #define X86_IPI_MICROSET		0x00000002
 #define X86_IPI__UNUSED1		0x00000004
-#define X86_IPI_SYNCH_FPU		0x00000008
+#define X86_IPI__UNUSED2		0x00000008
 #define X86_IPI_MTRR			0x00000010
 #define X86_IPI_GDT			0x00000020
 #define X86_IPI_XCALL			0x00000040
--- a/sys/arch/x86/x86/cpu.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/x86/cpu.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.102 2012/12/12 22:43:35 pgoyette Exp $	*/
+/*	$NetBSD: cpu.c,v 1.103 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.102 2012/12/12 22:43:35 pgoyette Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.103 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -893,15 +893,14 @@
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 
-	db_printf("addr		dev	id	flags	ipis	curlwp 		fpcurlwp\n");
+	db_printf("addr		dev	id	flags	ipis	curlwp\n");
 	for (CPU_INFO_FOREACH(cii, ci)) {
-		db_printf("%p	%s	%ld	%x	%x	%10p	%10p\n",
+		db_printf("%p	%s	%ld	%x	%x	%10p\n",
 		    ci,
 		    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
 		    (long)ci->ci_cpuid,
 		    ci->ci_flags, ci->ci_ipis,
-		    ci->ci_curlwp,
-		    ci->ci_fpcurlwp);
+		    ci->ci_curlwp);
 	}
 }
 #endif
@@ -1117,22 +1116,6 @@
 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
 }
 
-void
-cpu_offline_md(void)
-{
-	int s;
-
-	s = splhigh();
-#ifdef i386
-#if NNPX > 0
-	npxsave_cpu(true);
-#endif
-#else
-	fpusave_cpu(true);
-#endif
-	splx(s);
-}
-
 /* XXX joerg restructure and restart CPUs individually */
 static bool
 cpu_stop(device_t dv)
--- a/sys/arch/x86/x86/ipi.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/x86/ipi.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ipi.c,v 1.18 2010/06/22 18:29:03 rmind Exp $	*/
+/*	$NetBSD: ipi.c,v 1.19 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2008, 2009 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.18 2010/06/22 18:29:03 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.19 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_mtrr.h"
 
@@ -56,20 +56,6 @@
 
 #include "acpica.h"
 
-#ifdef __x86_64__
-#include <machine/fpu.h>
-static void	x86_ipi_synch_fpu(struct cpu_info *);
-#else
-/* XXXfpu */
-#include "npx.h"
-#if NNPX > 0
-static void	x86_ipi_synch_fpu(struct cpu_info *);
-#define		fpusave_cpu(x)		npxsave_cpu(x)
-#else
-#define		x86_ipi_synch_fpu	NULL
-#endif
-#endif
-
 static void	x86_ipi_halt(struct cpu_info *);
 static void	x86_ipi_kpreempt(struct cpu_info *);
 static void	x86_ipi_xcall(struct cpu_info *);
@@ -91,7 +77,7 @@
 	x86_ipi_halt,
 	NULL,
 	NULL,
-	x86_ipi_synch_fpu,
+	NULL,
 	x86_ipi_reload_mtrr,
 	gdt_reload_cpu,
 	x86_ipi_xcall,
@@ -180,15 +166,6 @@
 	}
 }
 
-#if defined(__x86_64__) || NNPX > 0	/* XXXfpu */
-static void
-x86_ipi_synch_fpu(struct cpu_info *ci)
-{
-
-	fpusave_cpu(true);
-}
-#endif
-
 #ifdef MTRR
 static void
 x86_ipi_reload_mtrr(struct cpu_info *ci)
--- a/sys/arch/x86/x86/vm_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/x86/x86/vm_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.17 2013/10/23 20:18:50 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.17 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_mtrr.h"
 
@@ -103,17 +103,6 @@
 #include <machine/mtrr.h>
 #endif
 
-#ifdef __x86_64__
-#include <machine/fpu.h>
-#else
-#include "npx.h"
-#if NNPX > 0
-#define fpusave_lwp(x, y)	npxsave_lwp(x, y)
-#else
-#define fpusave_lwp(x, y)
-#endif
-#endif
-
 void
 cpu_proc_fork(struct proc *p1, struct proc *p2)
 {
@@ -145,14 +134,6 @@
 	pcb2 = lwp_getpcb(l2);
 
 	/*
-	 * If parent LWP was using FPU, then we have to save the FPU h/w
-	 * state to PCB so that we can copy it.
-	 */
-	if (pcb1->pcb_fpcpu != NULL) {
-		fpusave_lwp(l1, true);
-	}
-
-	/*
 	 * Sync the PCB before we copy it.
 	 */
 	if (l1 == curlwp) {
@@ -168,6 +149,7 @@
 #if defined(XEN)
 	pcb2->pcb_iopl = SEL_KPL;
 #endif
+	pcb2->pcb_cr0 |= CR0_TS;
 
 	/*
 	 * Set the kernel stack address (from the address to uarea) and
@@ -244,12 +226,6 @@
 void
 cpu_lwp_free(struct lwp *l, int proc)
 {
-	struct pcb *pcb = lwp_getpcb(l);
-
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL) {
-		fpusave_lwp(l, false);
-	}
 
 #ifdef MTRR
 	if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
--- a/sys/arch/xen/x86/cpu.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/xen/x86/cpu.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.93 2012/06/24 13:56:10 jym Exp $	*/
+/*	$NetBSD: cpu.c,v 1.94 2013/10/23 20:18:50 drochner Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.93 2012/06/24 13:56:10 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.94 2013/10/23 20:18:50 drochner Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -735,15 +735,14 @@
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 
-	db_printf("addr		dev	id	flags	ipis	curlwp 		fpcurlwp\n");
+	db_printf("addr		dev	id	flags	ipis	curlwp\n");
 	for (CPU_INFO_FOREACH(cii, ci)) {
-		db_printf("%p	%s	%ld	%x	%x	%10p	%10p\n",
+		db_printf("%p	%s	%ld	%x	%x	%10p\n",
 		    ci,
 		    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
 		    (long)ci->ci_cpuid,
 		    ci->ci_flags, ci->ci_ipis,
-		    ci->ci_curlwp,
-		    ci->ci_fpcurlwp);
+		    ci->ci_curlwp);
 	}
 }
 #endif /* DDB */
@@ -1053,20 +1052,6 @@
 
 }
 
-void
-cpu_offline_md(void)
-{
-        int s;
-
-        s = splhigh();
-#ifdef __i386__
-        npxsave_cpu(true);
-#else   
-        fpusave_cpu(true);
-#endif
-        splx(s);
-}
-
 void    
 cpu_get_tsc_freq(struct cpu_info *ci)
 {
--- a/sys/arch/xen/x86/xen_ipi.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/arch/xen/x86/xen_ipi.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xen_ipi.c,v 1.12 2013/09/14 13:07:55 joerg Exp $ */
+/* $NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -33,10 +33,10 @@
 
 /* 
  * Based on: x86/ipi.c
- * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.12 2013/09/14 13:07:55 joerg Exp $"); 
+ * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $"); 
  */
 
-__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.12 2013/09/14 13:07:55 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $");
 
 #include <sys/types.h>
 
@@ -48,11 +48,6 @@
 #include <sys/errno.h>
 #include <sys/systm.h>
 
-#ifdef __x86_64__
-#include <machine/fpu.h>
-#else
-#include <machine/npx.h>
-#endif /* __x86_64__ */
 #include <machine/frame.h>
 #include <machine/segments.h>
 
@@ -69,7 +64,6 @@
 #endif /* __x86_64__ */
 
 static void xen_ipi_halt(struct cpu_info *, struct intrframe *);
-static void xen_ipi_synch_fpu(struct cpu_info *, struct intrframe *);
 static void xen_ipi_ddb(struct cpu_info *, struct intrframe *);
 static void xen_ipi_xcall(struct cpu_info *, struct intrframe *);
 static void xen_ipi_hvcb(struct cpu_info *, struct intrframe *);
@@ -77,7 +71,7 @@
 static void (*ipifunc[XEN_NIPIS])(struct cpu_info *, struct intrframe *) =
 {	/* In order of priority (see: xen/include/intrdefs.h */
 	xen_ipi_halt,
-	xen_ipi_synch_fpu,
+	NULL,
 	xen_ipi_ddb,
 	xen_ipi_xcall,
 	xen_ipi_hvcb
@@ -217,19 +211,6 @@
 }
 
 static void
-xen_ipi_synch_fpu(struct cpu_info *ci, struct intrframe *intrf)
-{
-	KASSERT(ci != NULL);
-	KASSERT(intrf != NULL);
-
-#ifdef __x86_64__
-	fpusave_cpu(true);
-#else
-	npxsave_cpu(true);
-#endif /* __x86_64__ */
-}
-
-static void
 xen_ipi_ddb(struct cpu_info *ci, struct intrframe *intrf)
 {
 	KASSERT(ci != NULL);
--- a/sys/compat/linux/arch/amd64/linux_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/compat/linux/arch/amd64/linux_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.40 2012/07/08 20:14:12 dsl Exp $ */
+/*	$NetBSD: linux_machdep.c,v 1.41 2013/10/23 20:18:51 drochner Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -33,7 +33,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.40 2012/07/08 20:14:12 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.41 2013/10/23 20:18:51 drochner Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -44,6 +44,7 @@
 #include <sys/ptrace.h> /* for process_read_fpregs() */
 #include <sys/ucontext.h>
 #include <sys/conf.h>
+#include <sys/pcu.h>
 
 #include <machine/reg.h>
 #include <machine/pcb.h>
@@ -64,6 +65,7 @@
 #include <dev/wscons/wsdisplay_usl_io.h>
 #endif
 
+extern const pcu_ops_t fpu_ops;
 
 #include <compat/linux/common/linux_signal.h>
 #include <compat/linux/common/linux_errno.h>
@@ -84,11 +86,8 @@
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL)
-		fpusave_lwp(l, 0);
+	pcu_discard(&fpu_ops, false);
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
 	pcb->pcb_flags = 0;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
 	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@@ -156,7 +155,7 @@
 	/* 
 	 * Save FPU state, if any 
 	 */
-	if (l->l_md.md_flags & MDL_USEDFPU) {
+	if (pcu_used_p(&fpu_ops)) {
 		sp = (char *)
 		    (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL);
 		fpsp = (struct linux__fpstate *)sp;
--- a/sys/compat/linux/arch/i386/linux_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/compat/linux/arch/i386/linux_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.151 2011/11/18 04:07:44 christos Exp $	*/
+/*	$NetBSD: linux_machdep.c,v 1.152 2013/10/23 20:18:51 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.151 2011/11/18 04:07:44 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.152 2013/10/23 20:18:51 drochner Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_vm86.h"
@@ -139,7 +139,7 @@
 	pmap_ldt_cleanup(l);
 #endif
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
+	pcu_discard_all(l);
 
 	if (i386_use_fxsave) {
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
--- a/sys/compat/linux32/arch/amd64/linux32_machdep.c	Wed Oct 23 18:57:40 2013 +0000
+++ b/sys/compat/linux32/arch/amd64/linux32_machdep.c	Wed Oct 23 20:18:50 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $ */
+/*	$NetBSD: linux32_machdep.c,v 1.32 2013/10/23 20:18:51 drochner Exp $ */
 
 /*-
  * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@@ -31,11 +31,12 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.32 2013/10/23 20:18:51 drochner Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/exec.h>
+#include <sys/pcu.h>
 
 #include <machine/vmparam.h>
 #include <machine/cpufunc.h>
@@ -81,6 +82,8 @@
 static int linux32_restore_sigcontext(struct lwp *, 
     struct linux32_sigcontext *, register_t *);
 
+extern const pcu_ops_t fpu_ops;
+
 void
 linux32_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
 {
@@ -273,9 +276,7 @@
 	struct trapframe *tf;
 	struct proc *p = l->l_proc;
 
-	/* If we were using the FPU, forget about it. */
-	if (pcb->pcb_fpcpu != NULL)
-		fpusave_lwp(l, 0);
+	pcu_discard(&fpu_ops, false);
 
 #if defined(USER_LDT) && 0
 	pmap_ldt_cleanup(p);
@@ -283,7 +284,6 @@
 
 	netbsd32_adjust_limits(p);
 
-	l->l_md.md_flags &= ~MDL_USEDFPU;
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Forces iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;