revert fpu/pcu changes until we figure out what's wrong; they cause random trunk
authorchristos <christos@NetBSD.org>
Sun, 01 Dec 2013 01:05:16 +0000
branchtrunk
changeset 222876 9a38d3896021
parent 222875 4db13db568ee
child 222877 80682ad466e4
revert fpu/pcu changes until we figure out what's wrong; they cause random freezes
sys/arch/amd64/amd64/fpu.c
sys/arch/amd64/amd64/genassym.cf
sys/arch/amd64/amd64/locore.S
sys/arch/amd64/amd64/machdep.c
sys/arch/amd64/amd64/netbsd32_machdep.c
sys/arch/amd64/amd64/process_machdep.c
sys/arch/amd64/include/fpu.h
sys/arch/amd64/include/pcb.h
sys/arch/amd64/include/proc.h
sys/arch/amd64/include/types.h
sys/arch/i386/i386/compat_16_machdep.c
sys/arch/i386/i386/genassym.cf
sys/arch/i386/i386/locore.S
sys/arch/i386/i386/machdep.c
sys/arch/i386/i386/process_machdep.c
sys/arch/i386/include/pcb.h
sys/arch/i386/include/proc.h
sys/arch/i386/include/types.h
sys/arch/i386/isa/npx.c
sys/arch/x86/acpi/acpi_wakeup.c
sys/arch/x86/include/cpu.h
sys/arch/x86/include/intrdefs.h
sys/arch/x86/x86/cpu.c
sys/arch/x86/x86/ipi.c
sys/arch/x86/x86/vm_machdep.c
sys/arch/xen/x86/cpu.c
sys/arch/xen/x86/xen_ipi.c
sys/compat/linux/arch/amd64/linux_machdep.c
sys/compat/linux/arch/i386/linux_machdep.c
sys/compat/linux32/arch/amd64/linux32_machdep.c
--- a/sys/arch/amd64/amd64/fpu.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/fpu.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.42 2013/10/27 16:25:01 rmind Exp $	*/
+/*	$NetBSD: fpu.c,v 1.43 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
@@ -95,13 +95,24 @@
  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
  */
 
+/*
+ * XXXfvdl update copyright notice. this started out as a stripped isa/npx.c
+ */
+
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.42 2013/10/27 16:25:01 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.43 2013/12/01 01:05:16 christos Exp $");
+
+#include "opt_multiprocessor.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/conf.h>
 #include <sys/cpu.h>
+#include <sys/file.h>
 #include <sys/proc.h>
+#include <sys/ioctl.h>
+#include <sys/device.h>
+#include <sys/vmmeter.h>
 #include <sys/kernel.h>
 
 #include <sys/bus.h>
@@ -175,6 +186,11 @@
 	KPREEMPT_DISABLE(l);
 	x86_enable_intr();
 
+	/*
+	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
+	 * should be set, and we should have gotten a DNA exception.
+	 */
+	KASSERT(l == curlwp);
 	fxsave(sfp);
 	pcb->pcb_savefpu_i387.fp_ex_tw = sfp->fp_fxsave.fx_ftw;
 	pcb->pcb_savefpu_i387.fp_ex_sw = sfp->fp_fxsave.fx_fsw;
@@ -233,34 +249,70 @@
  * Otherwise, we save the previous state, if necessary, and restore
  * our last saved state.
  */
-
-extern const pcu_ops_t fpu_ops;
-
 void
 fpudna(struct cpu_info *ci)
 {
-	pcu_load(&fpu_ops);
-}
+	uint16_t cw;
+	uint32_t mxcsr;
+	struct lwp *l, *fl;
+	struct pcb *pcb;
+	int s;
+
+	if (ci->ci_fpsaving) {
+		/* Recursive trap. */
+		x86_enable_intr();
+		return;
+	}
+
+	/* Lock out IPIs and disable preemption. */
+	s = splhigh();
+	x86_enable_intr();
 
-static void
-fpu_state_load(struct lwp *l, u_int flags)
-{
-	struct pcb *pcb = lwp_getpcb(l);
+	/* Save state on current CPU. */
+	l = ci->ci_curlwp;
+	pcb = lwp_getpcb(l);
+	fl = ci->ci_fpcurlwp;
+	if (fl != NULL) {
+		/*
+		 * It seems we can get here on Xen even if we didn't
+		 * switch lwp.  In this case do nothing
+		 */
+		if (fl == l) {
+			KASSERT(pcb->pcb_fpcpu == ci);
+			clts();
+			splx(s);
+			return;
+		}
+		KASSERT(fl != l);
+		fpusave_cpu(true);
+		KASSERT(ci->ci_fpcurlwp == NULL);
+	}
 
+	/* Save our state if on a remote CPU. */
+	if (pcb->pcb_fpcpu != NULL) {
+		/* Explicitly disable preemption before dropping spl. */
+		KPREEMPT_DISABLE(l);
+		splx(s);
+		fpusave_lwp(l, true);
+		KASSERT(pcb->pcb_fpcpu == NULL);
+		s = splhigh();
+		KPREEMPT_ENABLE(l);
+	}
+
+	/*
+	 * Restore state on this CPU, or initialize.  Ensure that
+	 * the entire update is atomic with respect to FPU-sync IPIs.
+	 */
 	clts();
-	pcb->pcb_cr0 &= ~CR0_TS;
-	if ((flags & PCU_RELOAD) == 0)
-		return;
-
-	if ((flags & PCU_LOADED) == 0) {
-		uint32_t mxcsr;
-		uint16_t cw;
-
+	ci->ci_fpcurlwp = l;
+	pcb->pcb_fpcpu = ci;
+	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
 		fninit();
 		cw = pcb->pcb_savefpu.fp_fxsave.fx_fcw;
 		fldcw(&cw);
 		mxcsr = pcb->pcb_savefpu.fp_fxsave.fx_mxcsr;
 		x86_ldmxcsr(&mxcsr);
+		l->l_md.md_flags |= MDL_USEDFPU;
 	} else {
 		/*
 		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
@@ -286,33 +338,97 @@
 		fldummy(&zero);
 		fxrstor(&pcb->pcb_savefpu);
 	}
+
+	KASSERT(ci == curcpu());
+	splx(s);
 }
 
-static void
-fpu_state_save(struct lwp *l, u_int flags)
+/*
+ * Save current CPU's FPU state.  Must be called at IPL_HIGH.
+ */
+void
+fpusave_cpu(bool save)
 {
-	struct pcb *pcb = lwp_getpcb(l);
+	struct cpu_info *ci;
+	struct pcb *pcb;
+	struct lwp *l;
+
+	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
+
+	ci = curcpu();
+	l = ci->ci_fpcurlwp;
+	if (l == NULL) {
+		return;
+	}
+	pcb = lwp_getpcb(l);
 
-	clts();
-	fxsave(&pcb->pcb_savefpu);
+	if (save) {
+		 /*
+		  * Set ci->ci_fpsaving, so that any pending exception will
+		  * be thrown away.  It will be caught again if/when the
+		  * FPU state is restored.
+		  */
+		KASSERT(ci->ci_fpsaving == 0);
+		clts();
+		ci->ci_fpsaving = 1;
+		fxsave(&pcb->pcb_savefpu);
+		ci->ci_fpsaving = 0;
+	}
+
+	stts();
+	pcb->pcb_fpcpu = NULL;
+	ci->ci_fpcurlwp = NULL;
 }
 
-static void
-fpu_state_release(struct lwp *l, u_int flags)
+/*
+ * Save l's FPU state, which may be on this processor or another processor.
+ * It may take some time, so we avoid disabling preemption where possible.
+ * Caller must know that the target LWP is stopped, otherwise this routine
+ * may race against it.
+ */
+void
+fpusave_lwp(struct lwp *l, bool save)
 {
-	struct pcb *pcb = lwp_getpcb(l);
-
-	stts();
-	pcb->pcb_cr0 |= CR0_TS;
-}
+	struct cpu_info *oci;
+	struct pcb *pcb;
+	int s, spins, ticks;
 
-const pcu_ops_t fpu_ops = {
-	.pcu_id = PCU_FPU,
-	.pcu_state_load = fpu_state_load,
-	.pcu_state_save = fpu_state_save,
-	.pcu_state_release = fpu_state_release,
-};
+	spins = 0;
+	ticks = hardclock_ticks;
+	for (;;) {
+		s = splhigh();
+		pcb = lwp_getpcb(l);
+		oci = pcb->pcb_fpcpu;
+		if (oci == NULL) {
+			splx(s);
+			break;
+		}
+		if (oci == curcpu()) {
+			KASSERT(oci->ci_fpcurlwp == l);
+			fpusave_cpu(save);
+			splx(s);
+			break;
+		}
+		splx(s);
+#ifdef XEN
+		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
+			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
+			    cpu_name(oci));
+		}
+#else /* XEN */
+		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
+#endif
+		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
+			x86_pause();
+			spins++;
+		}
+		if (spins > 100000000) {
+			panic("fpusave_lwp: did not");
+		}
+	}
 
-const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
-	[PCU_FPU] = &fpu_ops,
-};
+	if (!save) {
+		/* Ensure we restart with a clean slate. */
+	 	l->l_md.md_flags &= ~MDL_USEDFPU;
+	}
+}
--- a/sys/arch/amd64/amd64/genassym.cf	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.53 2013/10/23 20:18:50 drochner Exp $
+#	$NetBSD: genassym.cf,v 1.54 2013/12/01 01:05:16 christos Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -185,6 +185,7 @@
 define	PCB_RSP0		offsetof(struct pcb, pcb_rsp0)
 define	PCB_CR0			offsetof(struct pcb, pcb_cr0)
 define	PCB_ONFAULT		offsetof(struct pcb, pcb_onfault)
+define	PCB_FPCPU		offsetof(struct pcb, pcb_fpcpu)
 define	PCB_FLAGS		offsetof(struct pcb, pcb_flags)
 define	PCB_COMPAT32		PCB_COMPAT32
 define	PCB_FS			offsetof(struct pcb, pcb_fs)
@@ -237,6 +238,7 @@
 define	CPU_INFO_NTRAP		offsetof(struct cpu_info, ci_data.cpu_ntrap)
 define	CPU_INFO_NINTR		offsetof(struct cpu_info, ci_data.cpu_nintr)
 define	CPU_INFO_CURPRIORITY	offsetof(struct cpu_info, ci_schedstate.spc_curpriority)
+define	CPU_INFO_FPCURLWP	offsetof(struct cpu_info, ci_fpcurlwp)
 
 define	CPU_INFO_GDT		offsetof(struct cpu_info, ci_gdt)
 define	CPU_INFO_IPENDING	offsetof(struct cpu_info, ci_ipending)
--- a/sys/arch/amd64/amd64/locore.S	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/locore.S	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.74 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: locore.S,v 1.75 2013/12/01 01:05:16 christos Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1023,11 +1023,20 @@
 	 */
 2:
 #ifndef XEN
-	movl	PCB_CR0(%r14),%ecx
+	movl	$IPL_HIGH,CPUVAR(ILEVEL)
+	movl	PCB_CR0(%r14),%ecx	/* has CR0_TS clear */
 	movq	%cr0,%rdx
 
+	/*
+	 * If our floating point registers are on a different CPU,
+	 * set CR0_TS so we'll trap rather than reuse bogus state.
+	 */
+	cmpq	CPUVAR(FPCURLWP),%r12
+	je	3f
+	orq	$CR0_TS,%rcx
+
 	/* Reloading CR0 is very expensive - avoid if possible. */
-	cmpq	%rdx,%rcx
+3:	cmpq	%rdx,%rcx
 	je	6f
 	movq	%rcx,%cr0
 
--- a/sys/arch/amd64/amd64/machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.199 2013/11/11 11:10:45 joerg Exp $	*/
+/*	$NetBSD: machdep.c,v 1.200 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.199 2013/11/11 11:10:45 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.200 2013/12/01 01:05:16 christos Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -330,8 +330,6 @@
 
 static int valid_user_selector(struct lwp *, uint64_t);
 
-extern const pcu_ops_t fpu_ops;
-
 /*
  * Machine-dependent startup code
  */
@@ -424,17 +422,23 @@
 void
 x86_64_tls_switch(struct lwp *l)
 {
+	struct cpu_info *ci = curcpu();
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf = l->l_md.md_regs;
 
 	/*
+	 * Raise the IPL to IPL_HIGH.
+	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
+	 * is deferred until mi_switch(), when cpu_switchto() returns.
+	 */
+	(void)splhigh();
+	/*
 	 * If our floating point registers are on a different CPU,
 	 * set CR0_TS so we'll trap rather than reuse bogus state.
 	 */
-	if (pcb->pcb_cr0 & CR0_TS)
+	if (l != ci->ci_fpcurlwp) {
 		HYPERVISOR_fpu_taskswitch(1);
-	else
-		HYPERVISOR_fpu_taskswitch(0);
+	}
 
 	/* Update TLS segment pointers */
 	if (pcb->pcb_flags & PCB_COMPAT32) {
@@ -467,7 +471,7 @@
 	pcb->pcb_iopl = SEL_KPL;
 
 	pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
-	pcb->pcb_cr0 = rcr0();
+	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1;
 
 #if !defined(XEN)
@@ -544,7 +548,8 @@
 	tf->tf_rsp = (uint64_t)f;
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
 
-	pcu_discard_all(l);
+	/* Ensure FP state is reset, if FP is used. */
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 }
 
 void
@@ -591,7 +596,7 @@
 	/*
 	 * Don't bother copying out FP state if there is none.
 	 */
-	if (pcu_used_p(&fpu_ops))
+	if (l->l_md.md_flags & MDL_USEDFPU)
 		tocopy = sizeof (struct sigframe_siginfo);
 	else
 		tocopy = sizeof (struct sigframe_siginfo) -
@@ -1318,11 +1323,16 @@
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL) {
+		fpusave_lwp(l, false);
+	}
+
 #ifdef USER_LDT
 	pmap_ldt_cleanup(l);
 #endif
 
-	pcu_discard(&fpu_ops, false);
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 	pcb->pcb_flags = 0;
 	if (pack->ep_osversion >= 699002600)
 		pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
@@ -1917,10 +1927,12 @@
 	mcp->_mc_tlsbase = (uintptr_t)l->l_private;;
 	*flags |= _UC_TLSBASE;
 
-	if (pcu_used_p(&fpu_ops)) {
+	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
 		struct pcb *pcb = lwp_getpcb(l);
 
-		pcu_save(&fpu_ops);
+		if (pcb->pcb_fpcpu) {
+			fpusave_lwp(l, true);
+		}
 		memcpy(mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
 		    sizeof (mcp->__fpregs));
 		*flags |= _UC_FPU;
@@ -1973,12 +1985,14 @@
 		l->l_md.md_flags |= MDL_IRET;
 	}
 
+	if (pcb->pcb_fpcpu != NULL)
+		fpusave_lwp(l, false);
+
 	if ((flags & _UC_FPU) != 0) {
-		pcu_discard(&fpu_ops, true);
 		memcpy(&pcb->pcb_savefpu.fp_fxsave, mcp->__fpregs,
 		    sizeof (mcp->__fpregs));
-	} else
-		pcu_discard(&fpu_ops, false);
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
 
 	if ((flags & _UC_TLSBASE) != 0)
 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.84 2013/11/11 11:10:45 joerg Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.85 2013/12/01 01:05:16 christos Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.84 2013/11/11 11:10:45 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.85 2013/12/01 01:05:16 christos Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -60,7 +60,6 @@
 #include <sys/ras.h>
 #include <sys/ptrace.h>
 #include <sys/kauth.h>
-#include <sys/pcu.h>
 
 #include <machine/fpu.h>
 #include <machine/frame.h>
@@ -94,8 +93,6 @@
 
 static int check_sigcontext32(struct lwp *, const struct netbsd32_sigcontext *);
 
-extern const pcu_ops_t fpu_ops;
-
 #ifdef EXEC_AOUT
 /*
  * There is no native a.out -- this function is required
@@ -131,14 +128,18 @@
 
 	pcb = lwp_getpcb(l);
 
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL) {
+		fpusave_lwp(l, false);
+	}
+
 #if defined(USER_LDT) && 0
 	pmap_ldt_cleanup(p);
 #endif
 
-	pcu_discard(&fpu_ops, false);
-
 	netbsd32_adjust_limits(p);
 
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Force iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
 	if (pack->ep_osversion >= 699002600)
@@ -263,7 +264,7 @@
 	tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	pcu_discard_all(l); /* XXX no FP context saved, we'll lose */
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 
 	tf->tf_rip = (uint64_t)catcher;
 	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL);
@@ -366,7 +367,7 @@
 	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	pcu_discard_all(l);
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 
 	/* Remember that we're now on the signal stack. */
 	if (onstack)
@@ -890,11 +891,14 @@
 		/*
 		 * If we were using the FPU, forget that we were.
 		 */
-		pcu_discard(&fpu_ops, true);
+		if (pcb->pcb_fpcpu != NULL) {
+			fpusave_lwp(l, false);
+		}
 		memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs,
 		    sizeof (pcb->pcb_savefpu.fp_fxsave));
-	} else
-		pcu_discard(&fpu_ops, false);
+		/* If not set already. */
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
 
 	mutex_enter(p->p_lock);
 	if (flags & _UC_SETSTACK)
@@ -944,10 +948,12 @@
 	*flags |= _UC_TLSBASE;
 
 	/* Save floating point register context, if any. */
-	if (pcu_used_p(&fpu_ops)) {
+	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
 		struct pcb *pcb = lwp_getpcb(l);
 
-		pcu_save(&fpu_ops);
+		if (pcb->pcb_fpcpu) {
+			fpusave_lwp(l, true);
+		}
 		memcpy(&mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
 		    sizeof (pcb->pcb_savefpu.fp_fxsave));
 		*flags |= _UC_FPU;
--- a/sys/arch/amd64/amd64/process_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/amd64/process_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.22 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.23 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -53,7 +53,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.22 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.23 2013/12/01 01:05:16 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -62,7 +62,6 @@
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
-#include <sys/pcu.h>
 
 #include <machine/psl.h>
 #include <machine/reg.h>
@@ -76,8 +75,6 @@
 static inline int verr_ldt(struct pmap *, int sel);
 #endif
 
-extern const pcu_ops_t fpu_ops;
-
 static inline struct trapframe *
 process_frame(struct lwp *l)
 {
@@ -110,8 +107,8 @@
 {
 	struct fxsave64 *frame = process_fpframe(l);
 
-	if (pcu_used_p(&fpu_ops)) {
-		pcu_save(&fpu_ops);
+	if (l->l_md.md_flags & MDL_USEDFPU) {
+		fpusave_lwp(l, true);
 	} else {
 		uint16_t cw;
 		uint32_t mxcsr, mxcsr_mask;
@@ -130,6 +127,7 @@
 		frame->fx_ftw = 0x00;	/* abridged tag; all empty */
 		frame->fx_mxcsr = mxcsr;
 		frame->fx_mxcsr_mask = mxcsr_mask;
+		l->l_md.md_flags |= MDL_USEDFPU;
 	}
 
 	memcpy(&regs->fxstate, frame, sizeof(*regs));
@@ -164,7 +162,12 @@
 {
 	struct fxsave64 *frame = process_fpframe(l);
 
-	pcu_discard(&fpu_ops, true);
+	if (l->l_md.md_flags & MDL_USEDFPU) {
+		fpusave_lwp(l, false);
+	} else {
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
+
 	memcpy(frame, &regs->fxstate, sizeof(*regs));
 	return (0);
 }
--- a/sys/arch/amd64/include/fpu.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/include/fpu.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.h,v 1.9 2013/11/11 11:10:45 joerg Exp $	*/
+/*	$NetBSD: fpu.h,v 1.10 2013/12/01 01:05:16 christos Exp $	*/
 
 #ifndef	_AMD64_FPU_H_
 #define	_AMD64_FPU_H_
@@ -79,7 +79,12 @@
 struct cpu_info;
 
 void fpuinit(struct cpu_info *);
+void fpudrop(void);
+void fpusave(struct lwp *);
+void fpudiscard(struct lwp *);
 void fputrap(struct trapframe *);
+void fpusave_lwp(struct lwp *, bool);
+void fpusave_cpu(bool);
 
 #endif
 
--- a/sys/arch/amd64/include/pcb.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/include/pcb.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.19 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: pcb.h,v 1.20 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -97,6 +97,7 @@
 	struct	savefpu pcb_savefpu __aligned(16); /* floating point state */
 	uint32_t pcb_unused_1[4];	/* unused */
 	void     *pcb_onfault;		/* copyin/out fault recovery */
+	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	uint64_t  pcb_fs;
 	uint64_t  pcb_gs;
 	int pcb_iopl;
--- a/sys/arch/amd64/include/proc.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/include/proc.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.16 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: proc.h,v 1.17 2013/12/01 01:05:16 christos Exp $	*/
 
 /*
  * Copyright (c) 1991 Regents of the University of California.
@@ -53,6 +53,7 @@
 	volatile int md_astpending;
 };
 
+#define	MDL_USEDFPU	0x0001	/* has used the FPU */
 #define	MDL_COMPAT32	0x0008	/* i386, always return via iret */
 #define	MDL_IRET	0x0010	/* force return via iret, not sysret */
 
--- a/sys/arch/amd64/include/types.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/amd64/include/types.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: types.h,v 1.42 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: types.h,v 1.43 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -79,6 +79,7 @@
 #define	__HAVE_NEW_STYLE_BUS_H
 #define	__HAVE_CPU_COUNTER
 #define	__HAVE_CPU_DATA_FIRST
+#define	__HAVE_MD_CPU_OFFLINE
 #define	__HAVE_SYSCALL_INTERN
 #define	__HAVE_MINIMAL_EMUL
 #define	__HAVE_ATOMIC64_OPS
@@ -102,11 +103,6 @@
 #endif
 #endif
 
-#if defined(_KERNEL) || defined(_KMEMUSER)
-#define	PCU_FPU		0	/* FPU */
-#define	PCU_UNIT_COUNT	1
-#endif
-
 #else	/*	!__x86_64__	*/
 
 #include <i386/types.h>
--- a/sys/arch/i386/i386/compat_16_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/i386/compat_16_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: compat_16_machdep.c,v 1.23 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: compat_16_machdep.c,v 1.24 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: compat_16_machdep.c,v 1.23 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: compat_16_machdep.c,v 1.24 2013/12/01 01:05:16 christos Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_vm86.h"
@@ -260,7 +260,9 @@
 		/* NOTREACHED */
 	}
 
+	int svufpu = l->l_md.md_flags & MDL_USEDFPU;
 	buildcontext(l, sel, catcher, fp);
+	l->l_md.md_flags |= svufpu;
 
 	/* Remember that we're now on the signal stack. */
 	if (onstack)
--- a/sys/arch/i386/i386/genassym.cf	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/i386/genassym.cf	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.95 2013/10/23 20:18:50 drochner Exp $
+#	$NetBSD: genassym.cf,v 1.96 2013/12/01 01:05:16 christos Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -204,6 +204,7 @@
 define	PCB_ESP0		offsetof(struct pcb, pcb_esp0)
 define	PCB_CR0			offsetof(struct pcb, pcb_cr0)
 define	PCB_ONFAULT		offsetof(struct pcb, pcb_onfault)
+define	PCB_FPCPU		offsetof(struct pcb, pcb_fpcpu)
 define	PCB_FSD			offsetof(struct pcb, pcb_fsd)
 define	PCB_GSD			offsetof(struct pcb, pcb_gsd)
 define	PCB_IOMAP		offsetof(struct pcb, pcb_iomap)
@@ -262,6 +263,7 @@
 define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
 define	TLBSTATE_VALID		TLBSTATE_VALID
 define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
+define	CPU_INFO_FPCURLWP	offsetof(struct cpu_info, ci_fpcurlwp)
 define	CPU_INFO_CURLDT		offsetof(struct cpu_info, ci_curldt)
 define	CPU_INFO_IDLELWP	offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define	CPU_INFO_PMAP		offsetof(struct cpu_info, ci_pmap)
--- a/sys/arch/i386/i386/locore.S	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/i386/locore.S	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.107 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: locore.S,v 1.108 2013/12/01 01:05:16 christos Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -129,7 +129,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.107 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.108 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_compat_oldboot.h"
 #include "opt_ddb.h"
@@ -1077,11 +1077,20 @@
 	call	_C_LABEL(i386_tls_switch)
 	addl	$4,%esp
 #else /* !XEN */
-	movl	PCB_CR0(%ebx),%ecx
+	movl	$IPL_HIGH,CPUVAR(ILEVEL)
+	movl	PCB_CR0(%ebx),%ecx		/* has CR0_TS clear */
 	movl	%cr0,%edx
 
+	/*
+	 * If our floating point registers are on a different CPU,
+	 * set CR0_TS so we'll trap rather than reuse bogus state.
+	 */
+	cmpl	CPUVAR(FPCURLWP),%edi
+	je	3f
+	orl	$CR0_TS,%ecx
+
 	/* Reloading CR0 is very expensive - avoid if possible. */
-	cmpl	%edx,%ecx
+3:	cmpl	%edx,%ecx
 	je	4f
 	movl	%ecx,%cr0
 #endif /* !XEN */
--- a/sys/arch/i386/i386/machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/i386/machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.738 2013/11/11 11:10:45 joerg Exp $	*/
+/*	$NetBSD: machdep.c,v 1.739 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.738 2013/11/11 11:10:45 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.739 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_beep.h"
 #include "opt_compat_ibcs2.h"
@@ -266,7 +266,6 @@
 void (*initclock_func)(void) = i8254_initclocks;
 #endif
 
-extern const pcu_ops_t fpu_ops;
 
 /*
  * Size of memory segments, before any memory is stolen.
@@ -499,7 +498,7 @@
 	struct pcb *pcb = lwp_getpcb(l);
 
 	pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
-	pcb->pcb_cr0 = rcr0();
+	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
 	pcb->pcb_esp0 = uvm_lwp_getuarea(l) + KSTACK_SIZE - 16;
 	pcb->pcb_iopl = SEL_KPL;
 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
@@ -547,16 +546,21 @@
 {
 	struct cpu_info *ci = curcpu();
 	struct pcb *pcb = lwp_getpcb(l);
+	/*
+         * Raise the IPL to IPL_HIGH.
+	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
+	 * is deferred until mi_switch(), when cpu_switchto() returns.
+	 */
+	(void)splhigh();
 
         /*
 	 * If our floating point registers are on a different CPU,
 	 * set CR0_TS so we'll trap rather than reuse bogus state.
 	 */
 
-	if (pcb->pcb_cr0 & CR0_TS) {
+	if (l != ci->ci_fpcurlwp) {
 		HYPERVISOR_fpu_taskswitch(1);
-	} else
-		HYPERVISOR_fpu_taskswitch(0);
+	}
 
 	/* Update TLS segment pointers */
 	update_descriptor(&ci->ci_gdt[GUFS_SEL],
@@ -666,7 +670,7 @@
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* Ensure FP state is reset, if FP is used. */
-	pcu_discard_all(l);
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 }
 
 void
@@ -864,16 +868,23 @@
 	struct trapframe *tf;
 	uint16_t control;
 
+#if NNPX > 0
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL) {
+		npxsave_lwp(l, false);
+	}
+#endif
+
 #ifdef USER_LDT
 	pmap_ldt_cleanup(l);
 #endif
 
-	pcu_discard(&fpu_ops, false);
 	if (pack->ep_osversion >= 699002600)
 		control = __INITIAL_NPXCW__;
 	else
 		control = __NetBSD_COMPAT_NPXCW__;
 
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 	if (i386_use_fxsave) {
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = control;
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
@@ -1627,7 +1638,7 @@
 	*flags |= _UC_TLSBASE;
 
 	/* Save floating point register context, if any. */
-	if (pcu_used_p(&fpu_ops)) {
+	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
 		struct pcb *pcb = lwp_getpcb(l);
 #if NNPX > 0
 
@@ -1635,7 +1646,9 @@
 		 * If this process is the current FP owner, dump its
 		 * context to the PCB first.
 		 */
-		pcu_save(&fpu_ops);
+		if (pcb->pcb_fpcpu) {
+			npxsave_lwp(l, true);
+		}
 #endif
 		if (i386_use_fxsave) {
 			memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
@@ -1728,9 +1741,17 @@
 	if ((flags & _UC_TLSBASE) != 0)
 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
+#if NNPX > 0
+	/*
+	 * If we were using the FPU, forget that we were.
+	 */
+	if (pcb->pcb_fpcpu != NULL) {
+		npxsave_lwp(l, false);
+	}
+#endif
+
 	/* Restore floating point register context, if any. */
 	if ((flags & _UC_FPU) != 0) {
-		pcu_discard(&fpu_ops, true);
 		if (flags & _UC_FXSAVE) {
 			if (i386_use_fxsave) {
 				memcpy(
@@ -1754,8 +1775,8 @@
 				    sizeof (pcb->pcb_savefpu.sv_87));
 			}
 		}
-	} else
-		pcu_discard(&fpu_ops, false);
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
 	mutex_enter(p->p_lock);
 	if (flags & _UC_SETSTACK)
 		l->l_sigstk.ss_flags |= SS_ONSTACK;
--- a/sys/arch/i386/i386/process_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/i386/process_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.73 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.74 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.73 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.74 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_vm86.h"
 #include "opt_ptrace.h"
@@ -76,8 +76,6 @@
 #include <machine/vm86.h>
 #endif
 
-extern const pcu_ops_t fpu_ops;
-
 static inline struct trapframe *
 process_frame(struct lwp *l)
 {
@@ -248,9 +246,9 @@
 {
 	union savefpu *frame = process_fpframe(l);
 
-	if (pcu_used_p(&fpu_ops)) {
+	if (l->l_md.md_flags & MDL_USEDFPU) {
 #if NNPX > 0
-		pcu_save(&fpu_ops);
+		npxsave_lwp(l, true);
 #endif
 	} else {
 		/*
@@ -276,6 +274,7 @@
 			frame->sv_87.sv_env.en_sw = 0x0000;
 			frame->sv_87.sv_env.en_tw = 0xffff;
 		}
+		l->l_md.md_flags |= MDL_USEDFPU;
 	}
 
 	if (i386_use_fxsave) {
@@ -350,7 +349,13 @@
 {
 	union savefpu *frame = process_fpframe(l);
 
-	pcu_discard(&fpu_ops, true);
+	if (l->l_md.md_flags & MDL_USEDFPU) {
+#if NNPX > 0
+		npxsave_lwp(l, false);
+#endif
+	} else {
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
 
 	if (i386_use_fxsave) {
 		struct save87 s87;
@@ -395,9 +400,13 @@
 	if (i386_use_fxsave == 0)
 		return (EINVAL);
 
-	if (pcu_used_p(&fpu_ops)) {
+	if (l->l_md.md_flags & MDL_USEDFPU) {
 #if NNPX > 0
-		pcu_save(&fpu_ops);
+		struct pcb *pcb = lwp_getpcb(l);
+
+		if (pcb->pcb_fpcpu != NULL) {
+			npxsave_lwp(l, true);
+		}
 #endif
 	} else {
 		/*
@@ -414,6 +423,8 @@
 		frame->sv_xmm.sv_env.en_mxcsr = mxcsr;
 		frame->sv_xmm.sv_env.en_sw = 0x0000;
 		frame->sv_xmm.sv_env.en_tw = 0x00;
+
+		l->l_md.md_flags |= MDL_USEDFPU;  
 	}
 
 	memcpy(regs, &frame->sv_xmm, sizeof(*regs));
@@ -428,8 +439,18 @@
 	if (i386_use_fxsave == 0)
 		return (EINVAL);
 
-	pcu_save(&fpu_ops); /* keep i387 regs */
-	pcu_discard(&fpu_ops, true);
+	if (l->l_md.md_flags & MDL_USEDFPU) {
+#if NNPX > 0
+		struct pcb *pcb = lwp_getpcb(l);
+
+		/* If we were using the FPU, drop it. */
+		if (pcb->pcb_fpcpu != NULL) {
+			npxsave_lwp(l, false);
+		}
+#endif
+	} else {
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
 
 	memcpy(&frame->sv_xmm, regs, sizeof(*regs));
 	return (0);
--- a/sys/arch/i386/include/pcb.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/include/pcb.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.49 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: pcb.h,v 1.50 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2009 The NetBSD Foundation, Inc.
@@ -100,6 +100,7 @@
 	int	vm86_eflags;		/* virtual eflags for vm86 mode */
 	int	vm86_flagmask;		/* flag mask for vm86 mode */
 	void	*vm86_userp;		/* XXX performance hack */
+	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	char	*pcb_iomap;		/* I/O permission bitmap */
 };
 
--- a/sys/arch/i386/include/proc.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/include/proc.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.39 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: proc.h,v 1.40 2013/12/01 01:05:16 christos Exp $	*/
 
 /*
  * Copyright (c) 1991 Regents of the University of California.
@@ -52,6 +52,7 @@
 };
 
 /* md_flags */
+#define	MDL_USEDFPU	0x0001	/* has used the FPU */
 #define	MDL_IOPL	0x0002	/* XEN: i/o privilege */
 
 struct mdproc {
--- a/sys/arch/i386/include/types.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/include/types.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: types.h,v 1.75 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: types.h,v 1.76 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -104,6 +104,7 @@
 #define	__HAVE_NEW_STYLE_BUS_H
 #define	__HAVE_CPU_DATA_FIRST
 #define	__HAVE_CPU_COUNTER
+#define	__HAVE_MD_CPU_OFFLINE
 #define	__HAVE_SYSCALL_INTERN
 #define	__HAVE_MINIMAL_EMUL
 #define	__HAVE_OLD_DISKLABEL
@@ -120,9 +121,4 @@
 #define	__HAVE_RAS
 #endif
 
-#if defined(_KERNEL) || defined(_KMEMUSER)
-#define PCU_FPU		0	/* FPU */
-#define PCU_UNIT_COUNT	1
-#endif
-
 #endif	/* _I386_MACHTYPES_H_ */
--- a/sys/arch/i386/isa/npx.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/i386/isa/npx.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: npx.c,v 1.145 2013/11/08 02:24:11 christos Exp $	*/
+/*	$NetBSD: npx.c,v 1.146 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -96,7 +96,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.145 2013/11/08 02:24:11 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.146 2013/12/01 01:05:16 christos Exp $");
 
 #if 0
 #define IPRINTF(x)	printf x
@@ -374,7 +374,7 @@
 npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
-	struct lwp *l = curlwp;
+	struct lwp *l = ci->ci_fpcurlwp;
 	union savefpu *addr;
 	struct pcb *pcb;
 	ksiginfo_t ksi;
@@ -540,33 +540,73 @@
 	return(0);
 }
 
-extern const pcu_ops_t fpu_ops;
-
 /*
  * Implement device not available (DNA) exception
+ *
+ * If we were the last lwp to use the FPU, we can simply return.
+ * Otherwise, we save the previous state, if necessary, and restore
+ * our last saved state.
  */
 static int
 npxdna(struct cpu_info *ci)
 {
+	struct lwp *l, *fl;
+	struct pcb *pcb;
+	int s;
 
+	if (ci->ci_fpsaving) {
+		/* Recursive trap. */
+		return 1;
+	}
+
+	/* Lock out IPIs and disable preemption. */
+	s = splhigh();
 #ifndef XEN
 	x86_enable_intr();
 #endif
-	pcu_load(&fpu_ops);
-	return 1;
-}
+	/* Save state on current CPU. */
+	l = ci->ci_curlwp;
+	pcb = lwp_getpcb(l);
+
+	fl = ci->ci_fpcurlwp;
+	if (fl != NULL) {
+		/*
+		 * It seems we can get here on Xen even if we didn't
+		 * switch lwp.  In this case do nothing
+		 */
+		if (fl == l) {
+			KASSERT(pcb->pcb_fpcpu == ci);
+			ci->ci_fpused = 1;
+			clts();
+			splx(s);
+			return 1;
+		}
+		KASSERT(fl != l);
+		npxsave_cpu(true);
+		KASSERT(ci->ci_fpcurlwp == NULL);
+	}
 
-static void
-npx_state_load(struct lwp *l, u_int flags)
-{
-	struct pcb * const pcb = lwp_getpcb(l);
+	/* Save our state if on a remote CPU. */
+	if (pcb->pcb_fpcpu != NULL) {
+		/* Explicitly disable preemption before dropping spl. */
+		KPREEMPT_DISABLE(l);
+		splx(s);
+		npxsave_lwp(l, true);
+		KASSERT(pcb->pcb_fpcpu == NULL);
+		s = splhigh();
+		KPREEMPT_ENABLE(l);
+	}
 
+	/*
+	 * Restore state on this CPU, or initialize.  Ensure that
+	 * the entire update is atomic with respect to FPU-sync IPIs.
+	 */
 	clts();
-	pcb->pcb_cr0 &= ~CR0_TS;
-	if (!(flags & PCU_RELOAD))
-		return;
+	ci->ci_fpcurlwp = l;
+	pcb->pcb_fpcpu = ci;
+	ci->ci_fpused = 1;
 
-	if (!(flags & PCU_LOADED)) {
+	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
 		fninit();
 		if (i386_use_fxsave) {
 			fldcw(&pcb->pcb_savefpu.
@@ -575,6 +615,7 @@
 			fldcw(&pcb->pcb_savefpu.
 			    sv_87.sv_env.en_cw);
 		}
+		l->l_md.md_flags |= MDL_USEDFPU;
 	} else if (i386_use_fxsave) {
 		/*
 		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
@@ -600,51 +641,107 @@
 	} else {
 		frstor(&pcb->pcb_savefpu.sv_87);
 	}
+
+	KASSERT(ci == curcpu());
+	splx(s);
+	return 1;
 }
 
-static void
-npx_state_save(struct lwp *l, u_int flags)
+/*
+ * Save current CPU's FPU state.  Must be called at IPL_HIGH.
+ */
+void
+npxsave_cpu(bool save)
 {
 	struct cpu_info *ci;
-	struct pcb * const pcb = lwp_getpcb(l);
+	struct lwp *l;
+	struct pcb *pcb;
+
+	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
 
 	ci = curcpu();
+	l = ci->ci_fpcurlwp;
+	if (l == NULL)
+		return;
 
-	/*
-	 * Set ci->ci_fpsaving, so that any pending exception will
-	 * be thrown away.  It will be caught again if/when the
-	 * FPU state is restored.
-	 */
-	KASSERT(ci->ci_fpsaving == 0);
-	clts();
-	ci->ci_fpsaving = 1;
-	if (i386_use_fxsave) {
-		fxsave(&pcb->pcb_savefpu.sv_xmm);
-	} else {
-		fnsave(&pcb->pcb_savefpu.sv_87);
+	pcb = lwp_getpcb(l);
+
+	if (save) {
+		 /*
+		  * Set ci->ci_fpsaving, so that any pending exception will
+		  * be thrown away.  It will be caught again if/when the
+		  * FPU state is restored.
+		  */
+		KASSERT(ci->ci_fpsaving == 0);
+		clts();
+		ci->ci_fpsaving = 1;
+		if (i386_use_fxsave) {
+			fxsave(&pcb->pcb_savefpu.sv_xmm);
+		} else {
+			fnsave(&pcb->pcb_savefpu.sv_87);
+		}
+		ci->ci_fpsaving = 0;
 	}
-	ci->ci_fpsaving = 0;
+
+	stts();
+	pcb->pcb_fpcpu = NULL;
+	ci->ci_fpcurlwp = NULL;
+	ci->ci_fpused = 1;
 }
 
-static void
-npx_state_release(struct lwp *l, u_int flags)
+/*
+ * Save l's FPU state, which may be on this processor or another processor.
+ * It may take some time, so we avoid disabling preemption where possible.
+ * Caller must know that the target LWP is stopped, otherwise this routine
+ * may race against it.
+ */
+void
+npxsave_lwp(struct lwp *l, bool save)
 {
-	struct pcb * const pcb = lwp_getpcb(l);
-	
-	stts();
-	pcb->pcb_cr0 |= CR0_TS;
-}
+	struct cpu_info *oci;
+	struct pcb *pcb;
+	int s, spins, ticks;
 
-const pcu_ops_t fpu_ops = {
-	.pcu_id = PCU_FPU,
-	.pcu_state_load = npx_state_load,
-	.pcu_state_save = npx_state_save,
-	.pcu_state_release = npx_state_release,
-};
+	spins = 0;
+	ticks = hardclock_ticks;
+	for (;;) {
+		s = splhigh();
+		pcb = lwp_getpcb(l);
+		oci = pcb->pcb_fpcpu;
+		if (oci == NULL) {
+			splx(s);
+			break;
+		}
+		if (oci == curcpu()) {
+			KASSERT(oci->ci_fpcurlwp == l);
+			npxsave_cpu(save);
+			splx(s);
+			break;
+		}
+		splx(s);
+#ifdef XEN
+		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
+			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
+			    cpu_name(oci));
+		}
+#else /* XEN */
+		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
+#endif
+		while (pcb->pcb_fpcpu == oci &&
+		    ticks == hardclock_ticks) {
+			x86_pause();
+			spins++;
+		}
+		if (spins > 100000000) {
+			panic("npxsave_lwp: did not");
+		}
+	}
 
-const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
-	[PCU_FPU] = &fpu_ops,
-};
+	if (!save) {
+		/* Ensure we restart with a clean slate. */
+	 	l->l_md.md_flags &= ~MDL_USEDFPU;
+	}
+}
 
 /* 
  * The following mechanism is used to ensure that the FPE_... value
@@ -847,20 +944,16 @@
 npxtrap(struct lwp *l)
 {
 	u_short control, status;
-#if 0
 	struct cpu_info *ci = curcpu();
 	struct lwp *fl = ci->ci_fpcurlwp;
-#endif
-	struct pcb *pcb = lwp_getpcb(l);
 
 	if (!i386_fpu_present) {
-		printf("%s: curthread = %p, npx_type = %d\n",
-		    __func__, l, npx_type);
+		printf("%s: fpcurthread = %p, curthread = %p, npx_type = %d\n",
+		    __func__, fl, l, npx_type);
 		panic("npxtrap from nowhere");
 	}
 	kpreempt_disable();
 
-#if 0
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
@@ -877,15 +970,6 @@
 
 	if (fl == l)
 		fnclex();
-#else
-	if (i386_use_fxsave) {
-		fxsave(&pcb->pcb_savefpu.sv_xmm);
-	} else {
-		fnsave(&pcb->pcb_savefpu.sv_87);
-	}
-	control = GET_FPU_CW(pcb);
-	status = GET_FPU_SW(pcb);
-#endif
 	kpreempt_enable();
 	return fpetable[status & ((~control & 0x3f) | 0x40)];
 }
--- a/sys/arch/x86/acpi/acpi_wakeup.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/acpi/acpi_wakeup.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: acpi_wakeup.c,v 1.34 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.34 2013/12/01 01:05:16 christos Exp $");
 
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.33 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.34 2013/12/01 01:05:16 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -309,7 +309,11 @@
 	AcpiSetFirmwareWakingVector(acpi_wakeup_paddr);
 
 	s = splhigh();
-	pcu_save_all_on_cpu();
+#ifdef __i386__
+	npxsave_cpu(true);
+#else
+	fpusave_cpu(true);
+#endif
 	x86_disable_intr();
 
 #ifdef MULTIPROCESSOR
--- a/sys/arch/x86/include/cpu.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/include/cpu.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.57 2013/11/10 00:50:13 christos Exp $	*/
+/*	$NetBSD: cpu.h,v 1.58 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -100,7 +100,9 @@
 	 */
 	struct cpu_info *ci_next;	/* next cpu */
 	struct lwp *ci_curlwp;		/* current owner of the processor */
+	struct lwp *ci_fpcurlwp;	/* current owner of the FPU */
 	int	ci_fpsaving;		/* save in progress */
+	int	ci_fpused;		/* XEN: FPU was used by curlwp */
 	cpuid_t ci_cpuid;		/* our CPU ID */
 	int	_unused;
 	uint32_t ci_acpiid;		/* our ACPI/MADT ID */
@@ -423,6 +425,10 @@
 
 void	cpu_probe_features(struct cpu_info *);
 
+/* npx.c */
+void	npxsave_lwp(struct lwp *, bool);
+void	npxsave_cpu(bool);
+
 /* vm_machdep.c */
 paddr_t	kvtop(void *);
 
--- a/sys/arch/x86/include/intrdefs.h	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/include/intrdefs.h	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: intrdefs.h,v 1.18 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: intrdefs.h,v 1.19 2013/12/01 01:05:16 christos Exp $	*/
 
 #ifndef _X86_INTRDEFS_H_
 #define _X86_INTRDEFS_H_
@@ -61,7 +61,7 @@
 #define X86_IPI_HALT			0x00000001
 #define X86_IPI_MICROSET		0x00000002
 #define X86_IPI__UNUSED1		0x00000004
-#define X86_IPI__UNUSED2		0x00000008
+#define X86_IPI_SYNCH_FPU		0x00000008
 #define X86_IPI_MTRR			0x00000010
 #define X86_IPI_GDT			0x00000020
 #define X86_IPI_XCALL			0x00000040
--- a/sys/arch/x86/x86/cpu.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/x86/cpu.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.106 2013/11/15 08:47:55 msaitoh Exp $	*/
+/*	$NetBSD: cpu.c,v 1.107 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.106 2013/11/15 08:47:55 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.107 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -893,14 +893,15 @@
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 
-	db_printf("addr		dev	id	flags	ipis	curlwp\n");
+	db_printf("addr		dev	id	flags	ipis	curlwp 		fpcurlwp\n");
 	for (CPU_INFO_FOREACH(cii, ci)) {
-		db_printf("%p	%s	%ld	%x	%x	%10p\n",
+		db_printf("%p	%s	%ld	%x	%x	%10p	%10p\n",
 		    ci,
 		    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
 		    (long)ci->ci_cpuid,
 		    ci->ci_flags, ci->ci_ipis,
-		    ci->ci_curlwp);
+		    ci->ci_curlwp,
+		    ci->ci_fpcurlwp);
 	}
 }
 #endif
@@ -1116,6 +1117,22 @@
 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
 }
 
+void
+cpu_offline_md(void)
+{
+	int s;
+
+	s = splhigh();
+#ifdef i386
+#if NNPX > 0
+	npxsave_cpu(true);
+#endif
+#else
+	fpusave_cpu(true);
+#endif
+	splx(s);
+}
+
 /* XXX joerg restructure and restart CPUs individually */
 static bool
 cpu_stop(device_t dv)
--- a/sys/arch/x86/x86/ipi.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/x86/ipi.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ipi.c,v 1.19 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: ipi.c,v 1.20 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2008, 2009 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.19 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.20 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_mtrr.h"
 
@@ -56,6 +56,20 @@
 
 #include "acpica.h"
 
+#ifdef __x86_64__
+#include <machine/fpu.h>
+static void	x86_ipi_synch_fpu(struct cpu_info *);
+#else
+/* XXXfpu */
+#include "npx.h"
+#if NNPX > 0
+static void	x86_ipi_synch_fpu(struct cpu_info *);
+#define		fpusave_cpu(x)		npxsave_cpu(x)
+#else
+#define		x86_ipi_synch_fpu	NULL
+#endif
+#endif
+
 static void	x86_ipi_halt(struct cpu_info *);
 static void	x86_ipi_kpreempt(struct cpu_info *);
 static void	x86_ipi_xcall(struct cpu_info *);
@@ -77,7 +91,7 @@
 	x86_ipi_halt,
 	NULL,
 	NULL,
-	NULL,
+	x86_ipi_synch_fpu,
 	x86_ipi_reload_mtrr,
 	gdt_reload_cpu,
 	x86_ipi_xcall,
@@ -166,6 +180,15 @@
 	}
 }
 
+#if defined(__x86_64__) || NNPX > 0	/* XXXfpu */
+static void
+x86_ipi_synch_fpu(struct cpu_info *ci)
+{
+
+	fpusave_cpu(true);
+}
+#endif
+
 #ifdef MTRR
 static void
 x86_ipi_reload_mtrr(struct cpu_info *ci)
--- a/sys/arch/x86/x86/vm_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/x86/x86/vm_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.17 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.18 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.17 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.18 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_mtrr.h"
 
@@ -103,6 +103,17 @@
 #include <machine/mtrr.h>
 #endif
 
+#ifdef __x86_64__
+#include <machine/fpu.h>
+#else
+#include "npx.h"
+#if NNPX > 0
+#define fpusave_lwp(x, y)	npxsave_lwp(x, y)
+#else
+#define fpusave_lwp(x, y)
+#endif
+#endif
+
 void
 cpu_proc_fork(struct proc *p1, struct proc *p2)
 {
@@ -134,6 +145,14 @@
 	pcb2 = lwp_getpcb(l2);
 
 	/*
+	 * If parent LWP was using FPU, then we have to save the FPU h/w
+	 * state to PCB so that we can copy it.
+	 */
+	if (pcb1->pcb_fpcpu != NULL) {
+		fpusave_lwp(l1, true);
+	}
+
+	/*
 	 * Sync the PCB before we copy it.
 	 */
 	if (l1 == curlwp) {
@@ -149,7 +168,6 @@
 #if defined(XEN)
 	pcb2->pcb_iopl = SEL_KPL;
 #endif
-	pcb2->pcb_cr0 |= CR0_TS;
 
 	/*
 	 * Set the kernel stack address (from the address to uarea) and
@@ -226,6 +244,12 @@
 void
 cpu_lwp_free(struct lwp *l, int proc)
 {
+	struct pcb *pcb = lwp_getpcb(l);
+
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL) {
+		fpusave_lwp(l, false);
+	}
 
 #ifdef MTRR
 	if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
--- a/sys/arch/xen/x86/cpu.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/xen/x86/cpu.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.94 2013/10/23 20:18:50 drochner Exp $	*/
+/*	$NetBSD: cpu.c,v 1.95 2013/12/01 01:05:16 christos Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.94 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.95 2013/12/01 01:05:16 christos Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -735,14 +735,15 @@
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 
-	db_printf("addr		dev	id	flags	ipis	curlwp\n");
+	db_printf("addr		dev	id	flags	ipis	curlwp 		fpcurlwp\n");
 	for (CPU_INFO_FOREACH(cii, ci)) {
-		db_printf("%p	%s	%ld	%x	%x	%10p\n",
+		db_printf("%p	%s	%ld	%x	%x	%10p	%10p\n",
 		    ci,
 		    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
 		    (long)ci->ci_cpuid,
 		    ci->ci_flags, ci->ci_ipis,
-		    ci->ci_curlwp);
+		    ci->ci_curlwp,
+		    ci->ci_fpcurlwp);
 	}
 }
 #endif /* DDB */
@@ -1052,6 +1053,20 @@
 
 }
 
+void
+cpu_offline_md(void)
+{
+        int s;
+
+        s = splhigh();
+#ifdef __i386__
+        npxsave_cpu(true);
+#else   
+        fpusave_cpu(true);
+#endif
+        splx(s);
+}
+
 void    
 cpu_get_tsc_freq(struct cpu_info *ci)
 {
--- a/sys/arch/xen/x86/xen_ipi.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/arch/xen/x86/xen_ipi.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $ */
+/* $NetBSD: xen_ipi.c,v 1.14 2013/12/01 01:05:16 christos Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -33,10 +33,10 @@
 
 /* 
  * Based on: x86/ipi.c
- * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $"); 
+ * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.14 2013/12/01 01:05:16 christos Exp $"); 
  */
 
-__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.13 2013/10/23 20:18:50 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.14 2013/12/01 01:05:16 christos Exp $");
 
 #include <sys/types.h>
 
@@ -48,6 +48,11 @@
 #include <sys/errno.h>
 #include <sys/systm.h>
 
+#ifdef __x86_64__
+#include <machine/fpu.h>
+#else
+#include <machine/npx.h>
+#endif /* __x86_64__ */
 #include <machine/frame.h>
 #include <machine/segments.h>
 
@@ -64,6 +69,7 @@
 #endif /* __x86_64__ */
 
 static void xen_ipi_halt(struct cpu_info *, struct intrframe *);
+static void xen_ipi_synch_fpu(struct cpu_info *, struct intrframe *);
 static void xen_ipi_ddb(struct cpu_info *, struct intrframe *);
 static void xen_ipi_xcall(struct cpu_info *, struct intrframe *);
 static void xen_ipi_hvcb(struct cpu_info *, struct intrframe *);
@@ -71,7 +77,7 @@
 static void (*ipifunc[XEN_NIPIS])(struct cpu_info *, struct intrframe *) =
 {	/* In order of priority (see: xen/include/intrdefs.h */
 	xen_ipi_halt,
-	NULL,
+	xen_ipi_synch_fpu,
 	xen_ipi_ddb,
 	xen_ipi_xcall,
 	xen_ipi_hvcb
@@ -211,6 +217,19 @@
 }
 
 static void
+xen_ipi_synch_fpu(struct cpu_info *ci, struct intrframe *intrf)
+{
+	KASSERT(ci != NULL);
+	KASSERT(intrf != NULL);
+
+#ifdef __x86_64__
+	fpusave_cpu(true);
+#else
+	npxsave_cpu(true);
+#endif /* __x86_64__ */
+}
+
+static void
 xen_ipi_ddb(struct cpu_info *ci, struct intrframe *intrf)
 {
 	KASSERT(ci != NULL);
--- a/sys/compat/linux/arch/amd64/linux_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/compat/linux/arch/amd64/linux_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.42 2013/11/18 01:32:32 chs Exp $ */
+/*	$NetBSD: linux_machdep.c,v 1.43 2013/12/01 01:05:16 christos Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -33,7 +33,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.42 2013/11/18 01:32:32 chs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.43 2013/12/01 01:05:16 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -44,7 +44,6 @@
 #include <sys/ptrace.h> /* for process_read_fpregs() */
 #include <sys/ucontext.h>
 #include <sys/conf.h>
-#include <sys/pcu.h>
 
 #include <machine/reg.h>
 #include <machine/pcb.h>
@@ -65,7 +64,6 @@
 #include <dev/wscons/wsdisplay_usl_io.h>
 #endif
 
-extern const pcu_ops_t fpu_ops;
 
 #include <compat/linux/common/linux_signal.h>
 #include <compat/linux/common/linux_errno.h>
@@ -86,8 +84,11 @@
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 
-	pcu_discard(&fpu_ops, false);
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL)
+		fpusave_lwp(l, 0);
 
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 	pcb->pcb_flags = 0;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
 	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@@ -155,7 +156,7 @@
 	/* 
 	 * Save FPU state, if any 
 	 */
-	if (pcu_used_p(&fpu_ops)) {
+	if (l->l_md.md_flags & MDL_USEDFPU) {
 		sp = (char *)
 		    (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL);
 		fpsp = (struct linux__fpstate *)sp;
--- a/sys/compat/linux/arch/i386/linux_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/compat/linux/arch/i386/linux_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.152 2013/10/23 20:18:51 drochner Exp $	*/
+/*	$NetBSD: linux_machdep.c,v 1.153 2013/12/01 01:05:16 christos Exp $	*/
 
 /*-
  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.152 2013/10/23 20:18:51 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.153 2013/12/01 01:05:16 christos Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_vm86.h"
@@ -139,7 +139,7 @@
 	pmap_ldt_cleanup(l);
 #endif
 
-	pcu_discard_all(l);
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 
 	if (i386_use_fxsave) {
 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
--- a/sys/compat/linux32/arch/amd64/linux32_machdep.c	Sun Dec 01 01:05:15 2013 +0000
+++ b/sys/compat/linux32/arch/amd64/linux32_machdep.c	Sun Dec 01 01:05:16 2013 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux32_machdep.c,v 1.32 2013/10/23 20:18:51 drochner Exp $ */
+/*	$NetBSD: linux32_machdep.c,v 1.33 2013/12/01 01:05:16 christos Exp $ */
 
 /*-
  * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@@ -31,12 +31,11 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.32 2013/10/23 20:18:51 drochner Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.33 2013/12/01 01:05:16 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/exec.h>
-#include <sys/pcu.h>
 
 #include <machine/vmparam.h>
 #include <machine/cpufunc.h>
@@ -82,8 +81,6 @@
 static int linux32_restore_sigcontext(struct lwp *, 
     struct linux32_sigcontext *, register_t *);
 
-extern const pcu_ops_t fpu_ops;
-
 void
 linux32_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
 {
@@ -276,7 +273,9 @@
 	struct trapframe *tf;
 	struct proc *p = l->l_proc;
 
-	pcu_discard(&fpu_ops, false);
+	/* If we were using the FPU, forget about it. */
+	if (pcb->pcb_fpcpu != NULL)
+		fpusave_lwp(l, 0);
 
 #if defined(USER_LDT) && 0
 	pmap_ldt_cleanup(p);
@@ -284,6 +283,7 @@
 
 	netbsd32_adjust_limits(p);
 
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Forces iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;