Move sys/arch/amd64/amd64/fpu.c and sys/arch/amd64/include/fpu.h trunk
authordsl <dsl@NetBSD.org>
Tue, 11 Feb 2014 20:17:16 +0000
branchtrunk
changeset 224519 f4e68ef6a381
parent 224518 a28556c24ae0
child 224520 a4ac1d595c42
Move sys/arch/amd64/amd64/fpu.c and sys/arch/amd64/include/fpu.h into sys/arch/x86 in preparation for using the same code for i386.
distrib/sets/lists/comp/md.amd64
sys/arch/amd64/amd64/fpu.c
sys/arch/amd64/amd64/machdep.c
sys/arch/amd64/amd64/netbsd32_machdep.c
sys/arch/amd64/amd64/process_machdep.c
sys/arch/amd64/amd64/trap.c
sys/arch/amd64/conf/files.amd64
sys/arch/amd64/include/Makefile
sys/arch/amd64/include/fenv.h
sys/arch/amd64/include/fpu.h
sys/arch/amd64/include/frame.h
sys/arch/amd64/include/pcb.h
sys/arch/amd64/include/reg.h
sys/arch/x86/acpi/acpi_wakeup.c
sys/arch/x86/include/Makefile
sys/arch/x86/include/fpu.h
sys/arch/x86/x86/fpu.c
sys/arch/x86/x86/ipi.c
sys/arch/x86/x86/vm_machdep.c
sys/arch/xen/conf/files.xen
sys/arch/xen/x86/cpu.c
sys/arch/xen/x86/xen_ipi.c
sys/compat/linux/arch/amd64/linux_machdep.c
--- a/distrib/sets/lists/comp/md.amd64	Tue Feb 11 18:13:45 2014 +0000
+++ b/distrib/sets/lists/comp/md.amd64	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: md.amd64,v 1.213 2014/02/07 23:18:04 dsl Exp $
+# $NetBSD: md.amd64,v 1.214 2014/02/11 20:17:16 dsl Exp $
 
 ./usr/include/amd64				comp-c-include
 ./usr/include/amd64/ansi.h			comp-c-include
@@ -19,7 +19,7 @@
 ./usr/include/amd64/endian_machdep.h		comp-c-include
 ./usr/include/amd64/fenv.h			comp-c-include
 ./usr/include/amd64/float.h			comp-c-include
-./usr/include/amd64/fpu.h			comp-c-include
+./usr/include/amd64/fpu.h			comp-c-include		obsolete
 ./usr/include/amd64/frame.h			comp-c-include
 ./usr/include/amd64/frame_regs.h		comp-c-include
 ./usr/include/amd64/gdt.h			comp-c-include
@@ -431,6 +431,7 @@
 ./usr/include/x86/cputypes.h			comp-c-include
 ./usr/include/x86/cpuvar.h			comp-c-include
 ./usr/include/x86/float.h			comp-c-include
+./usr/include/x86/fpu.h				comp-c-include
 ./usr/include/x86/ieee.h			comp-c-include
 ./usr/include/x86/ieeefp.h			comp-c-include
 ./usr/include/x86/intr.h			comp-c-include
--- a/sys/arch/amd64/amd64/fpu.c	Tue Feb 11 18:13:45 2014 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,484 +0,0 @@
-/*	$NetBSD: fpu.c,v 1.48 2014/02/09 23:02:25 dsl Exp $	*/
-
-/*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
- * rights reserved.
- *
- * This code is derived from software developed for The NetBSD Foundation
- * by Andrew Doran.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*-
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)npx.c	7.2 (Berkeley) 5/12/91
- */
-
-/*-
- * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
- * Copyright (c) 1990 William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)npx.c	7.2 (Berkeley) 5/12/91
- */
-
-/*
- * XXXfvdl update copyright notice. this started out as a stripped isa/npx.c
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.48 2014/02/09 23:02:25 dsl Exp $");
-
-#include "opt_multiprocessor.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/cpu.h>
-#include <sys/file.h>
-#include <sys/proc.h>
-#include <sys/kernel.h>
-
-#include <machine/cpu.h>
-#include <machine/intr.h>
-#include <machine/cpufunc.h>
-#include <machine/pcb.h>
-#include <machine/trap.h>
-#include <machine/specialreg.h>
-#include <machine/fpu.h>
-
-#ifdef XEN
-#define clts() HYPERVISOR_fpu_taskswitch(0)
-#define stts() HYPERVISOR_fpu_taskswitch(1)
-#endif
-
-/*
- * We do lazy initialization and switching using the TS bit in cr0 and the
- * MDL_USEDFPU bit in mdlwp.
- *
- * DNA exceptions are handled like this:
- *
- * 1) If there is no FPU, return and go to the emulator.
- * 2) If someone else has used the FPU, save its state into that lwp's PCB.
- * 3a) If MDL_USEDFPU is not set, set it and initialize the FPU.
- * 3b) Otherwise, reload the lwp's previous FPU state.
- *
- * When a lwp is created or exec()s, its saved cr0 image has the TS bit
- * set and the MDL_USEDFPU bit clear.  The MDL_USEDFPU bit is set when the
- * lwp first gets a DNA and the FPU is initialized.  The TS bit is turned
- * off when the FPU is used, and turned on again later when the lwp's FPU
- * state is saved.
- */
-
-void		fpudna(struct cpu_info *);
-
-/* 
- * The following table is used to ensure that the FPE_... value
- * that is passed as a trapcode to the signal handler of the user
- * process does not have more than one bit set.
- * 
- * Multiple bits may be set if SSE simd instructions generate errors
- * on more than one value or if the user process modifies the control
- * word while a status word bit is already set (which this is a sign
- * of bad coding).
- * We have no choise than to narrow them down to one bit, since we must
- * not send a trapcode that is not exactly one of the FPE_ macros.
- *
- * The mechanism has a static table with 127 entries.  Each combination
- * of the 7 FPU status word exception bits directly translates to a
- * position in this table, where a single FPE_... value is stored.
- * This FPE_... value stored there is considered the "most important"
- * of the exception bits and will be sent as the signal code.  The
- * precedence of the bits is based upon Intel Document "Numerical
- * Applications", Chapter "Special Computational Situations".
- *
- * The code to choose one of these values does these steps:
- * 1) Throw away status word bits that cannot be masked.
- * 2) Throw away the bits currently masked in the control word,
- *    assuming the user isn't interested in them anymore.
- * 3) Reinsert status word bit 7 (stack fault) if it is set, which
- *    cannot be masked but must be presered.
- *    'Stack fault' is a sub-class of 'invalid operation'.
- * 4) Use the remaining bits to point into the trapcode table.
- *
- * The 6 maskable bits in order of their preference, as stated in the
- * above referenced Intel manual:
- * 1  Invalid operation (FP_X_INV)
- * 1a   Stack underflow
- * 1b   Stack overflow
- * 1c   Operand of unsupported format
- * 1d   SNaN operand.
- * 2  QNaN operand (not an exception, irrelavant here)
- * 3  Any other invalid-operation not mentioned above or zero divide
- *      (FP_X_INV, FP_X_DZ)
- * 4  Denormal operand (FP_X_DNML)
- * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
- * 6  Inexact result (FP_X_IMP) 
- *
- * NB: the above seems to mix up the mxscr error bits and the x87 ones.
- * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
- * status.
- *
- * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
- * are swapped).
- *
- * This table assumes that any stack fault is cleared - so that an INVOP
- * fault will only be reported as FLTSUB once.
- * This might not happen if the mask is being changed.
- */
-#define FPE_xxx1(f) (f & EN_SW_INVOP \
-		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
-	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
-	: f & EN_SW_DENORM ? FPE_FLTUND \
-	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
-	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
-	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
-	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
-#define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
-#define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
-#define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
-#define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
-#define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
-static const uint8_t fpetable[128] = {
-	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
-};
-#undef FPE_xxx1
-#undef FPE_xxx2
-#undef FPE_xxx4
-#undef FPE_xxx8
-#undef FPE_xxx16
-#undef FPE_xxx32
-
-/*
- * Init the FPU.
- */
-void
-fpuinit(struct cpu_info *ci)
-{
-	clts();
-	fninit();
-	stts();
-}
-
-/*
- * This is a synchronous trap on either an x87 instruction (due to an
- * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
- * instruction due to an error on the instruction itself.
- *
- * If trap actually generates a signal, then the fpu state is saved
- * and then copied onto the process's user-stack, and then recovered
- * from there when the signal returns (or from the jmp_buf if the
- * signal handler exits with a longjmp()).
- *
- * All this code need to do is save the reason for the trap.
- * For x87 interrupts the status word bits need clearing to stop the
- * trap re-occurring.
- *
- * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
- *
- * Since this is a synchronous trap, the fpu registers must still belong
- * to the correct process (we trap through an interrupt gate so that
- * interrupts are disabled on entry).
- * Interrupts (these better include IPIs) are left disabled until we've
- * finished looking at fpu registers.
- *
- * For amd64 the calling code (in amd64_trap.S) has already checked
- * that we trapped from usermode.
- */
-
-void
-fputrap(struct trapframe *frame)
-{
-	uint32_t statbits;
-	ksiginfo_t ksi;
-
-	/*
-	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
-	 * should be set, and we should have gotten a DNA exception.
-	 */
-	KASSERT(curcpu()->ci_fpcurlwp == curlwp);
-
-	if (frame->tf_trapno == T_XMM) {
-		uint32_t mxcsr;
-		x86_stmxcsr(&mxcsr);
-		statbits = mxcsr;
-		/* Clear the sticky status bits */
-		mxcsr &= ~0x3f;
-		x86_ldmxcsr(&mxcsr);
-
-		/* Remove masked interrupts and non-status bits */
-		statbits &= ~(statbits >> 7) & 0x3f;
-		/* Mark this is an XMM status */
-		statbits |= 0x10000;
-	} else {
-		uint16_t cw, sw;
-		/* Get current control and status words */
-		fnstcw(&cw);
-		fnstsw(&sw);
-		/* Clear any pending exceptions from status word */
-		fnclex();
-
-		/* Removed masked interrupts */
-		statbits = sw & ~(cw & 0x3f);
-	}
-
-	/* Doesn't matter now if we get pre-empted */
-	x86_enable_intr();
-
-	KSI_INIT_TRAP(&ksi);
-	ksi.ksi_signo = SIGFPE;
-	ksi.ksi_addr = (void *)frame->tf_rip;
-	ksi.ksi_code = fpetable[statbits & 0x7f];
-	ksi.ksi_trap = statbits;
-	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
-}
-
-/*
- * Implement device not available (DNA) exception
- *
- * If we were the last lwp to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore
- * our last saved state.
- */
-void
-fpudna(struct cpu_info *ci)
-{
-	uint16_t cw;
-	uint32_t mxcsr;
-	struct lwp *l, *fl;
-	struct pcb *pcb;
-	int s;
-
-	/* Lock out IPIs and disable preemption. */
-	s = splhigh();
-	x86_enable_intr();
-
-	/* Save state on current CPU. */
-	l = ci->ci_curlwp;
-	pcb = lwp_getpcb(l);
-	fl = ci->ci_fpcurlwp;
-	if (fl != NULL) {
-		/*
-		 * It seems we can get here on Xen even if we didn't
-		 * switch lwp.  In this case do nothing
-		 */
-		if (fl == l) {
-			KASSERT(pcb->pcb_fpcpu == ci);
-			clts();
-			splx(s);
-			return;
-		}
-		KASSERT(fl != l);
-		fpusave_cpu(true);
-		KASSERT(ci->ci_fpcurlwp == NULL);
-	}
-
-	/* Save our state if on a remote CPU. */
-	if (pcb->pcb_fpcpu != NULL) {
-		/* Explicitly disable preemption before dropping spl. */
-		KPREEMPT_DISABLE(l);
-		splx(s);
-		fpusave_lwp(l, true);
-		KASSERT(pcb->pcb_fpcpu == NULL);
-		s = splhigh();
-		KPREEMPT_ENABLE(l);
-	}
-
-	/*
-	 * Restore state on this CPU, or initialize.  Ensure that
-	 * the entire update is atomic with respect to FPU-sync IPIs.
-	 */
-	clts();
-	ci->ci_fpcurlwp = l;
-	pcb->pcb_fpcpu = ci;
-	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
-		fninit();
-		cw = pcb->pcb_savefpu.sv_xmm.fx_cw;
-		fldcw(&cw);
-		mxcsr = pcb->pcb_savefpu.sv_xmm.fx_mxcsr;
-		x86_ldmxcsr(&mxcsr);
-		l->l_md.md_flags |= MDL_USEDFPU;
-	} else {
-		/*
-		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
-		 * leaking other process's execution history. Clear them
-		 * manually.
-		 */
-		static const double zero = 0.0;
-		uint16_t status;
-
-		/*
-		 * Clear the ES bit in the x87 status word if it is currently
-		 * set, in order to avoid causing a fault in the upcoming load.
-		 */
-		fnstsw(&status);
-		if (status & 0x80)
-			fnclex();
-
-		/*
-		 * Load the dummy variable into the x87 stack.  This mangles
-		 * the x87 stack, but we don't care since we're about to call
-		 * fxrstor() anyway.
-		 */
-		fldummy(&zero);
-		fxrstor(&pcb->pcb_savefpu);
-	}
-
-	KASSERT(ci == curcpu());
-	splx(s);
-}
-
-/*
- * Save current CPU's FPU state.  Must be called at IPL_HIGH.
- */
-void
-fpusave_cpu(bool save)
-{
-	struct cpu_info *ci;
-	struct pcb *pcb;
-	struct lwp *l;
-
-	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
-
-	ci = curcpu();
-	l = ci->ci_fpcurlwp;
-	if (l == NULL) {
-		return;
-	}
-	pcb = lwp_getpcb(l);
-
-	if (save) {
-		clts();
-		fxsave(&pcb->pcb_savefpu);
-	}
-
-	stts();
-	pcb->pcb_fpcpu = NULL;
-	ci->ci_fpcurlwp = NULL;
-}
-
-/*
- * Save l's FPU state, which may be on this processor or another processor.
- * It may take some time, so we avoid disabling preemption where possible.
- * Caller must know that the target LWP is stopped, otherwise this routine
- * may race against it.
- */
-void
-fpusave_lwp(struct lwp *l, bool save)
-{
-	struct cpu_info *oci;
-	struct pcb *pcb;
-	int s, spins, ticks;
-
-	spins = 0;
-	ticks = hardclock_ticks;
-	for (;;) {
-		s = splhigh();
-		pcb = lwp_getpcb(l);
-		oci = pcb->pcb_fpcpu;
-		if (oci == NULL) {
-			splx(s);
-			break;
-		}
-		if (oci == curcpu()) {
-			KASSERT(oci->ci_fpcurlwp == l);
-			fpusave_cpu(save);
-			splx(s);
-			break;
-		}
-		splx(s);
-#ifdef XEN
-		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
-			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
-			    cpu_name(oci));
-		}
-#else /* XEN */
-		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
-#endif
-		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
-			x86_pause();
-			spins++;
-		}
-		if (spins > 100000000) {
-			panic("fpusave_lwp: did not");
-		}
-	}
-
-	if (!save) {
-		/* Ensure we restart with a clean slate. */
-	 	l->l_md.md_flags &= ~MDL_USEDFPU;
-	}
-}
--- a/sys/arch/amd64/amd64/machdep.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/amd64/machdep.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.202 2014/02/07 22:40:22 dsl Exp $	*/
+/*	$NetBSD: machdep.c,v 1.203 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.202 2014/02/07 22:40:22 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.203 2014/02/11 20:17:16 dsl Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -174,7 +174,7 @@
 #include <machine/reg.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/mtrr.h>
 #include <machine/mpbiosvar.h>
 
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.89 2014/02/07 22:40:22 dsl Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.90 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.89 2014/02/07 22:40:22 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.90 2014/02/11 20:17:16 dsl Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -61,7 +61,7 @@
 #include <sys/ptrace.h>
 #include <sys/kauth.h>
 
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/frame.h>
 #include <machine/reg.h>
 #include <machine/vmparam.h>
--- a/sys/arch/amd64/amd64/process_machdep.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/amd64/process_machdep.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.26 2014/02/07 22:40:22 dsl Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.27 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -53,7 +53,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.26 2014/02/07 22:40:22 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.27 2014/02/11 20:17:16 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -66,7 +66,7 @@
 #include <machine/psl.h>
 #include <machine/reg.h>
 #include <machine/segments.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 
 static inline struct trapframe *process_frame(struct lwp *);
 static inline struct fxsave *process_fpframe(struct lwp *);
--- a/sys/arch/amd64/amd64/trap.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/amd64/trap.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.75 2013/01/14 00:06:11 christos Exp $	*/
+/*	$NetBSD: trap.c,v 1.76 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.75 2013/01/14 00:06:11 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.76 2014/02/11 20:17:16 dsl Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -91,7 +91,7 @@
 #include <uvm/uvm_extern.h>
 
 #include <machine/cpufunc.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
 #include <machine/trap.h>
--- a/sys/arch/amd64/conf/files.amd64	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/conf/files.amd64	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: files.amd64,v 1.81 2014/02/07 22:40:22 dsl Exp $
+#	$NetBSD: files.amd64,v 1.82 2014/02/11 20:17:16 dsl Exp $
 #
 # new style config file for amd64 architecture
 #
@@ -42,7 +42,7 @@
 file	arch/amd64/amd64/machdep.c
 file	arch/amd64/amd64/process_machdep.c
 file	arch/amd64/amd64/trap.c
-file	arch/amd64/amd64/fpu.c
+file	arch/x86/x86/fpu.c
 file	arch/amd64/amd64/lock_stubs.S
 file	dev/cons.c
 
--- a/sys/arch/amd64/include/Makefile	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/include/Makefile	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.15 2012/11/05 00:57:41 alnsn Exp $
+#	$NetBSD: Makefile,v 1.16 2014/02/11 20:17:16 dsl Exp $
 
 INCSDIR= /usr/include/amd64
 
@@ -7,7 +7,7 @@
 	cdefs.h cpu.h \
 	disklabel.h \
 	elf_machdep.h endian.h endian_machdep.h \
-	float.h fpu.h frame.h frame_regs.h \
+	float.h frame.h frame_regs.h \
 	gdt.h \
 	ieee.h ieeefp.h fenv.h \
 	int_const.h int_fmtio.h int_limits.h int_mwgwtypes.h int_types.h \
--- a/sys/arch/amd64/include/fenv.h	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/include/fenv.h	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: fenv.h,v 1.1 2010/07/31 21:47:54 joerg Exp $	*/
+/*	$NetBSD: fenv.h,v 1.2 2014/02/11 20:17:16 dsl Exp $	*/
 /*-
  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
  * All rights reserved.
@@ -29,7 +29,7 @@
 #define _AMD64_FENV_H_
 
 #include <sys/stdint.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 
 /*
  * Each symbol representing a floating point exception expands to an integer
--- a/sys/arch/amd64/include/fpu.h	Tue Feb 11 18:13:45 2014 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-/*	$NetBSD: fpu.h,v 1.12 2014/02/07 22:40:22 dsl Exp $	*/
-
-#ifndef	_AMD64_FPU_H_
-#define	_AMD64_FPU_H_
-
-#include <x86/cpu_extended_state.h>
-
-#ifdef _KERNEL
-/*
- * XXX
- */
-struct trapframe;
-struct cpu_info;
-
-void fpuinit(struct cpu_info *);
-void fpudrop(void);
-void fpusave(struct lwp *);
-void fpudiscard(struct lwp *);
-void fputrap(struct trapframe *);
-void fpusave_lwp(struct lwp *, bool);
-void fpusave_cpu(bool);
-
-#endif
-
-#endif /* _AMD64_FPU_H_ */
--- a/sys/arch/amd64/include/frame.h	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/include/frame.h	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: frame.h,v 1.15 2008/10/26 00:08:15 mrg Exp $	*/
+/*	$NetBSD: frame.h,v 1.16 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
 #ifdef __x86_64__
 
 #include <sys/signal.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/frame_regs.h>
 
 /*
--- a/sys/arch/amd64/include/pcb.h	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/include/pcb.h	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.23 2014/02/07 22:40:22 dsl Exp $	*/
+/*	$NetBSD: pcb.h,v 1.24 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
 
 #include <machine/segments.h>
 #include <machine/tss.h>
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/sysarch.h>
 
 #define	NIOPORTS	1024		/* # of ports we allow to be mapped */
--- a/sys/arch/amd64/include/reg.h	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/amd64/include/reg.h	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: reg.h,v 1.8 2014/02/07 22:40:22 dsl Exp $	*/
+/*	$NetBSD: reg.h,v 1.9 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -39,7 +39,7 @@
 
 #ifdef __x86_64__
 
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #include <machine/mcontext.h>
 
 /*
--- a/sys/arch/x86/acpi/acpi_wakeup.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/x86/acpi/acpi_wakeup.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: acpi_wakeup.c,v 1.35 2014/01/26 19:16:17 dsl Exp $	*/
+/*	$NetBSD: acpi_wakeup.c,v 1.36 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.35 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.36 2014/02/11 20:17:16 dsl Exp $");
 
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.35 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.36 2014/02/11 20:17:16 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -97,11 +97,6 @@
 #define ACPI_MACHDEP_PRIVATE
 #include <machine/acpi_machdep.h>
 #include <machine/cpu.h>
-#ifdef __i386__
-#  include <machine/npx.h>
-#else
-#  include <machine/fpu.h>
-#endif
 #include <machine/mtrr.h>
 
 #include <x86/cpuvar.h>
--- a/sys/arch/x86/include/Makefile	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/x86/include/Makefile	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-# 	$NetBSD: Makefile,v 1.18 2014/02/07 21:52:46 dsl Exp $
+# 	$NetBSD: Makefile,v 1.19 2014/02/11 20:17:16 dsl Exp $
 
 INCSDIR=/usr/include/x86
 
@@ -11,6 +11,7 @@
 	cputypes.h \
 	cpuvar.h \
 	float.h \
+	fpu.h \
 	ieee.h ieeefp.h \
 	intr.h intrdefs.h \
 	lock.h \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/x86/include/fpu.h	Tue Feb 11 20:17:16 2014 +0000
@@ -0,0 +1,25 @@
+/*	$NetBSD: fpu.h,v 1.1 2014/02/11 20:17:16 dsl Exp $	*/
+
+#ifndef	_AMD64_FPU_H_
+#define	_AMD64_FPU_H_
+
+#include <x86/cpu_extended_state.h>
+
+#ifdef _KERNEL
+/*
+ * XXX
+ */
+struct trapframe;
+struct cpu_info;
+
+void fpuinit(struct cpu_info *);
+void fpudrop(void);
+void fpusave(struct lwp *);
+void fpudiscard(struct lwp *);
+void fputrap(struct trapframe *);
+void fpusave_lwp(struct lwp *, bool);
+void fpusave_cpu(bool);
+
+#endif
+
+#endif /* _AMD64_FPU_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/x86/x86/fpu.c	Tue Feb 11 20:17:16 2014 +0000
@@ -0,0 +1,484 @@
+/*	$NetBSD: fpu.c,v 1.1 2014/02/11 20:17:16 dsl Exp $	*/
+
+/*-
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
+ * rights reserved.
+ *
+ * This code is derived from software developed for The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+/*-
+ * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1990 William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+/*
+ * XXXfvdl update copyright notice. this started out as a stripped isa/npx.c
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.1 2014/02/11 20:17:16 dsl Exp $");
+
+#include "opt_multiprocessor.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cpu.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+
+#include <machine/cpu.h>
+#include <machine/intr.h>
+#include <machine/cpufunc.h>
+#include <machine/pcb.h>
+#include <machine/trap.h>
+#include <machine/specialreg.h>
+#include <x86/fpu.h>
+
+#ifdef XEN
+#define clts() HYPERVISOR_fpu_taskswitch(0)
+#define stts() HYPERVISOR_fpu_taskswitch(1)
+#endif
+
+/*
+ * We do lazy initialization and switching using the TS bit in cr0 and the
+ * MDL_USEDFPU bit in mdlwp.
+ *
+ * DNA exceptions are handled like this:
+ *
+ * 1) If there is no FPU, return and go to the emulator.
+ * 2) If someone else has used the FPU, save its state into that lwp's PCB.
+ * 3a) If MDL_USEDFPU is not set, set it and initialize the FPU.
+ * 3b) Otherwise, reload the lwp's previous FPU state.
+ *
+ * When a lwp is created or exec()s, its saved cr0 image has the TS bit
+ * set and the MDL_USEDFPU bit clear.  The MDL_USEDFPU bit is set when the
+ * lwp first gets a DNA and the FPU is initialized.  The TS bit is turned
+ * off when the FPU is used, and turned on again later when the lwp's FPU
+ * state is saved.
+ */
+
+void		fpudna(struct cpu_info *);
+
+/* 
+ * The following table is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ * 
+ * Multiple bits may be set if SSE simd instructions generate errors
+ * on more than one value or if the user process modifies the control
+ * word while a status word bit is already set (which this is a sign
+ * of bad coding).
+ * We have no choise than to narrow them down to one bit, since we must
+ * not send a trapcode that is not exactly one of the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries.  Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code.  The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The code to choose one of these values does these steps:
+ * 1) Throw away status word bits that cannot be masked.
+ * 2) Throw away the bits currently masked in the control word,
+ *    assuming the user isn't interested in them anymore.
+ * 3) Reinsert status word bit 7 (stack fault) if it is set, which
+ *    cannot be masked but must be presered.
+ *    'Stack fault' is a sub-class of 'invalid operation'.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1  Invalid operation (FP_X_INV)
+ * 1a   Stack underflow
+ * 1b   Stack overflow
+ * 1c   Operand of unsupported format
+ * 1d   SNaN operand.
+ * 2  QNaN operand (not an exception, irrelavant here)
+ * 3  Any other invalid-operation not mentioned above or zero divide
+ *      (FP_X_INV, FP_X_DZ)
+ * 4  Denormal operand (FP_X_DNML)
+ * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6  Inexact result (FP_X_IMP) 
+ *
+ * NB: the above seems to mix up the mxscr error bits and the x87 ones.
+ * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
+ * status.
+ *
+ * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
+ * are swapped).
+ *
+ * This table assumes that any stack fault is cleared - so that an INVOP
+ * fault will only be reported as FLTSUB once.
+ * This might not happen if the mask is being changed.
+ */
+#define FPE_xxx1(f) (f & EN_SW_INVOP \
+		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
+	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
+	: f & EN_SW_DENORM ? FPE_FLTUND \
+	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
+	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
+	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
+	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
+#define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
+#define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
+#define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
+#define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
+#define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
+static const uint8_t fpetable[128] = {
+	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
+};
+#undef FPE_xxx1
+#undef FPE_xxx2
+#undef FPE_xxx4
+#undef FPE_xxx8
+#undef FPE_xxx16
+#undef FPE_xxx32
+
+/*
+ * Init the FPU.
+ */
+void
+fpuinit(struct cpu_info *ci)
+{
+	clts();
+	fninit();
+	stts();
+}
+
+/*
+ * This is a synchronous trap on either an x87 instruction (due to an
+ * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
+ * instruction due to an error on the instruction itself.
+ *
+ * If trap actually generates a signal, then the fpu state is saved
+ * and then copied onto the process's user-stack, and then recovered
+ * from there when the signal returns (or from the jmp_buf if the
+ * signal handler exits with a longjmp()).
+ *
+ * All this code need to do is save the reason for the trap.
+ * For x87 interrupts the status word bits need clearing to stop the
+ * trap re-occurring.
+ *
+ * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
+ *
+ * Since this is a synchronous trap, the fpu registers must still belong
+ * to the correct process (we trap through an interrupt gate so that
+ * interrupts are disabled on entry).
+ * Interrupts (these better include IPIs) are left disabled until we've
+ * finished looking at fpu registers.
+ *
+ * For amd64 the calling code (in amd64_trap.S) has already checked
+ * that we trapped from usermode.
+ */
+
+void
+fputrap(struct trapframe *frame)
+{
+	uint32_t statbits;
+	ksiginfo_t ksi;
+
+	/*
+	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
+	 * should be set, and we should have gotten a DNA exception.
+	 */
+	KASSERT(curcpu()->ci_fpcurlwp == curlwp);
+
+	if (frame->tf_trapno == T_XMM) {
+		uint32_t mxcsr;
+		x86_stmxcsr(&mxcsr);
+		statbits = mxcsr;
+		/* Clear the sticky status bits */
+		mxcsr &= ~0x3f;
+		x86_ldmxcsr(&mxcsr);
+
+		/* Remove masked interrupts and non-status bits */
+		statbits &= ~(statbits >> 7) & 0x3f;
+		/* Mark this is an XMM status */
+		statbits |= 0x10000;
+	} else {
+		uint16_t cw, sw;
+		/* Get current control and status words */
+		fnstcw(&cw);
+		fnstsw(&sw);
+		/* Clear any pending exceptions from status word */
+		fnclex();
+
+		/* Removed masked interrupts */
+		statbits = sw & ~(cw & 0x3f);
+	}
+
+	/* Doesn't matter now if we get pre-empted */
+	x86_enable_intr();
+
+	KSI_INIT_TRAP(&ksi);
+	ksi.ksi_signo = SIGFPE;
+	ksi.ksi_addr = (void *)frame->tf_rip;
+	ksi.ksi_code = fpetable[statbits & 0x7f];
+	ksi.ksi_trap = statbits;
+	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
+}
+
+/*
+ * Implement device not available (DNA) exception
+ *
+ * If we were the last lwp to use the FPU, we can simply return.
+ * Otherwise, we save the previous state, if necessary, and restore
+ * our last saved state.
+ */
+void
+fpudna(struct cpu_info *ci)
+{
+	uint16_t cw;
+	uint32_t mxcsr;
+	struct lwp *l, *fl;
+	struct pcb *pcb;
+	int s;
+
+	/* Lock out IPIs and disable preemption. */
+	s = splhigh();
+	x86_enable_intr();
+
+	/* Save state on current CPU. */
+	l = ci->ci_curlwp;
+	pcb = lwp_getpcb(l);
+	fl = ci->ci_fpcurlwp;
+	if (fl != NULL) {
+		/*
+		 * It seems we can get here on Xen even if we didn't
+		 * switch lwp.  In this case do nothing
+		 */
+		if (fl == l) {
+			KASSERT(pcb->pcb_fpcpu == ci);
+			clts();
+			splx(s);
+			return;
+		}
+		KASSERT(fl != l);
+		fpusave_cpu(true);
+		KASSERT(ci->ci_fpcurlwp == NULL);
+	}
+
+	/* Save our state if on a remote CPU. */
+	if (pcb->pcb_fpcpu != NULL) {
+		/* Explicitly disable preemption before dropping spl. */
+		KPREEMPT_DISABLE(l);
+		splx(s);
+		fpusave_lwp(l, true);
+		KASSERT(pcb->pcb_fpcpu == NULL);
+		s = splhigh();
+		KPREEMPT_ENABLE(l);
+	}
+
+	/*
+	 * Restore state on this CPU, or initialize.  Ensure that
+	 * the entire update is atomic with respect to FPU-sync IPIs.
+	 */
+	clts();
+	ci->ci_fpcurlwp = l;
+	pcb->pcb_fpcpu = ci;
+	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
+		fninit();
+		cw = pcb->pcb_savefpu.sv_xmm.fx_cw;
+		fldcw(&cw);
+		mxcsr = pcb->pcb_savefpu.sv_xmm.fx_mxcsr;
+		x86_ldmxcsr(&mxcsr);
+		l->l_md.md_flags |= MDL_USEDFPU;
+	} else {
+		/*
+		 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
+		 * leaking other process's execution history. Clear them
+		 * manually.
+		 */
+		static const double zero = 0.0;
+		uint16_t status;
+
+		/*
+		 * Clear the ES bit in the x87 status word if it is currently
+		 * set, in order to avoid causing a fault in the upcoming load.
+		 */
+		fnstsw(&status);
+		if (status & 0x80)
+			fnclex();
+
+		/*
+		 * Load the dummy variable into the x87 stack.  This mangles
+		 * the x87 stack, but we don't care since we're about to call
+		 * fxrstor() anyway.
+		 */
+		fldummy(&zero);
+		fxrstor(&pcb->pcb_savefpu);
+	}
+
+	KASSERT(ci == curcpu());
+	splx(s);
+}
+
+/*
+ * Save current CPU's FPU state.  Must be called at IPL_HIGH.
+ */
+void
+fpusave_cpu(bool save)
+{
+	struct cpu_info *ci;
+	struct pcb *pcb;
+	struct lwp *l;
+
+	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
+
+	ci = curcpu();
+	l = ci->ci_fpcurlwp;
+	if (l == NULL) {
+		return;
+	}
+	pcb = lwp_getpcb(l);
+
+	if (save) {
+		clts();
+		fxsave(&pcb->pcb_savefpu);
+	}
+
+	stts();
+	pcb->pcb_fpcpu = NULL;
+	ci->ci_fpcurlwp = NULL;
+}
+
+/*
+ * Save l's FPU state, which may be on this processor or another processor.
+ * It may take some time, so we avoid disabling preemption where possible.
+ * Caller must know that the target LWP is stopped, otherwise this routine
+ * may race against it.
+ */
+void
+fpusave_lwp(struct lwp *l, bool save)
+{
+	struct cpu_info *oci;
+	struct pcb *pcb;
+	int s, spins, ticks;
+
+	spins = 0;
+	ticks = hardclock_ticks;
+	for (;;) {
+		s = splhigh();
+		pcb = lwp_getpcb(l);
+		oci = pcb->pcb_fpcpu;
+		if (oci == NULL) {
+			splx(s);
+			break;
+		}
+		if (oci == curcpu()) {
+			KASSERT(oci->ci_fpcurlwp == l);
+			fpusave_cpu(save);
+			splx(s);
+			break;
+		}
+		splx(s);
+#ifdef XEN
+		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
+			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
+			    cpu_name(oci));
+		}
+#else /* XEN */
+		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
+#endif
+		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
+			x86_pause();
+			spins++;
+		}
+		if (spins > 100000000) {
+			panic("fpusave_lwp: did not");
+		}
+	}
+
+	if (!save) {
+		/* Ensure we restart with a clean slate. */
+	 	l->l_md.md_flags &= ~MDL_USEDFPU;
+	}
+}
--- a/sys/arch/x86/x86/ipi.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/x86/x86/ipi.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: ipi.c,v 1.21 2014/01/26 19:16:17 dsl Exp $	*/
+/*	$NetBSD: ipi.c,v 1.22 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2008, 2009 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.21 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.22 2014/02/11 20:17:16 dsl Exp $");
 
 #include "opt_mtrr.h"
 
@@ -57,7 +57,7 @@
 #include "acpica.h"
 
 #ifdef __x86_64__
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #endif
 
 static void	x86_ipi_halt(struct cpu_info *);
--- a/sys/arch/x86/x86/vm_machdep.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/x86/x86/vm_machdep.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.20 2014/01/26 19:16:17 dsl Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.21 2014/02/11 20:17:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.20 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.21 2014/02/11 20:17:16 dsl Exp $");
 
 #include "opt_mtrr.h"
 
@@ -103,10 +103,6 @@
 #include <machine/mtrr.h>
 #endif
 
-#ifdef __x86_64__
-#include <machine/fpu.h>
-#endif
-
 void
 cpu_proc_fork(struct proc *p1, struct proc *p2)
 {
--- a/sys/arch/xen/conf/files.xen	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/xen/conf/files.xen	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: files.xen,v 1.131 2014/02/07 22:40:22 dsl Exp $
+#	$NetBSD: files.xen,v 1.132 2014/02/11 20:17:16 dsl Exp $
 #	NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp 
 #	NetBSD: files.i386,v 1.254 2004/03/25 23:32:10 jmc Exp 
 
@@ -79,7 +79,7 @@
 file	arch/amd64/amd64/machdep.c
 file	arch/amd64/amd64/process_machdep.c
 file	arch/amd64/amd64/trap.c
-file	arch/amd64/amd64/fpu.c
+file	arch/x86/x86/fpu.c
 file	arch/amd64/amd64/lock_stubs.S
 endif
 
--- a/sys/arch/xen/x86/cpu.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/xen/x86/cpu.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.96 2014/01/26 19:16:17 dsl Exp $	*/
+/*	$NetBSD: cpu.c,v 1.97 2014/02/11 20:17:16 dsl Exp $	*/
 /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
 
 /*-
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.96 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.97 2014/02/11 20:17:16 dsl Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -105,7 +105,7 @@
 #ifdef i386
 #include <machine/npx.h>
 #else
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #endif
 
 #include <xen/xen.h>
--- a/sys/arch/xen/x86/xen_ipi.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/arch/xen/x86/xen_ipi.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xen_ipi.c,v 1.15 2014/01/26 19:16:17 dsl Exp $ */
+/* $NetBSD: xen_ipi.c,v 1.16 2014/02/11 20:17:16 dsl Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -33,10 +33,10 @@
 
 /* 
  * Based on: x86/ipi.c
- * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.15 2014/01/26 19:16:17 dsl Exp $"); 
+ * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.16 2014/02/11 20:17:16 dsl Exp $"); 
  */
 
-__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.15 2014/01/26 19:16:17 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.16 2014/02/11 20:17:16 dsl Exp $");
 
 #include <sys/types.h>
 
@@ -49,7 +49,7 @@
 #include <sys/systm.h>
 
 #ifdef __x86_64__
-#include <machine/fpu.h>
+#include <x86/fpu.h>
 #else
 #include <machine/npx.h>
 #endif /* __x86_64__ */
--- a/sys/compat/linux/arch/amd64/linux_machdep.c	Tue Feb 11 18:13:45 2014 +0000
+++ b/sys/compat/linux/arch/amd64/linux_machdep.c	Tue Feb 11 20:17:16 2014 +0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.45 2014/02/07 22:40:22 dsl Exp $ */
+/*	$NetBSD: linux_machdep.c,v 1.46 2014/02/11 20:17:16 dsl Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -33,7 +33,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.45 2014/02/07 22:40:22 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.46 2014/02/11 20:17:16 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -47,7 +47,6 @@
 
 #include <machine/reg.h>
 #include <machine/pcb.h>
-#include <machine/fpu.h>
 #include <machine/mcontext.h>
 #include <machine/specialreg.h>
 #include <machine/vmparam.h>