Add port to the Xen virtual machine monitor. trunk
authorcl <cl@NetBSD.org>
Thu, 11 Mar 2004 21:44:08 +0000
branchtrunk
changeset 124671 e975e3939a56
parent 124670 4603d3e78e1c
child 124672 04292780867f
Add port to the Xen virtual machine monitor. (see http://www.cl.cam.ac.uk/Research/SRG/netos/xen/)
sys/arch/README
sys/arch/xen/compile/Makefile
sys/arch/xen/conf/GENERIC
sys/arch/xen/conf/GENERIC.local
sys/arch/xen/conf/Makefile.xen
sys/arch/xen/conf/XEN
sys/arch/xen/conf/files.compat
sys/arch/xen/conf/files.xen
sys/arch/xen/conf/kern.ldscript
sys/arch/xen/conf/majors.i386
sys/arch/xen/conf/majors.xen
sys/arch/xen/conf/std.xen
sys/arch/xen/i386/autoconf.c
sys/arch/xen/i386/cpu.c
sys/arch/xen/i386/gdt.c
sys/arch/xen/i386/genassym.cf
sys/arch/xen/i386/hypervisor.c
sys/arch/xen/i386/identcpu.c
sys/arch/xen/i386/locore.S
sys/arch/xen/i386/machdep.c
sys/arch/xen/i386/mainbus.c
sys/arch/xen/i386/npx.c
sys/arch/xen/i386/npx_hv.c
sys/arch/xen/i386/pmap.c
sys/arch/xen/i386/spl.S
sys/arch/xen/i386/trap.c
sys/arch/xen/i386/vector.S
sys/arch/xen/i386/xen_machdep.c
sys/arch/xen/include/cpu.h
sys/arch/xen/include/cpufunc.h
sys/arch/xen/include/events.h
sys/arch/xen/include/frameasm.h
sys/arch/xen/include/hypervisor-ifs/block.h
sys/arch/xen/include/hypervisor-ifs/dom0_ops.h
sys/arch/xen/include/hypervisor-ifs/dom_mem_ops.h
sys/arch/xen/include/hypervisor-ifs/hypervisor-if.h
sys/arch/xen/include/hypervisor-ifs/kbd.h
sys/arch/xen/include/hypervisor-ifs/network.h
sys/arch/xen/include/hypervisor-ifs/vbd.h
sys/arch/xen/include/hypervisor.h
sys/arch/xen/include/if_xennetvar.h
sys/arch/xen/include/intr.h
sys/arch/xen/include/pic.h
sys/arch/xen/include/pmap.h
sys/arch/xen/include/segments.h
sys/arch/xen/include/xen.h
sys/arch/xen/include/xenfunc.h
sys/arch/xen/include/xenpmap.h
sys/arch/xen/x86/consinit.c
sys/arch/xen/x86/intr.c
sys/arch/xen/xen/clock.c
sys/arch/xen/xen/console.c
sys/arch/xen/xen/events.c
sys/arch/xen/xen/if_xennet.c
sys/arch/xen/xen/xen_debug.c
--- a/sys/arch/README	Thu Mar 11 21:24:32 2004 +0000
+++ b/sys/arch/README	Thu Mar 11 21:44:08 2004 +0000
@@ -1,4 +1,4 @@
-$NetBSD: README,v 1.33 2004/02/13 10:08:55 wiz Exp $
+$NetBSD: README,v 1.34 2004/03/11 21:44:08 cl Exp $
 
 acorn26:	Acorn ARM2- and ARM3-based machines (arm, 20000509)
 acorn32:	Acorn computers Ltd. ARM 6/7/SA based machines (arm, 20011118)
@@ -56,6 +56,7 @@
 sun3:		Sun m680[23]0 based machines (m68k, 19930625)
 vax:		Digital Equipment Corp. VAX machines (vax, 19940802)
 x68k:		Sharp X68000, X68030 (m68k, 19960505)
+xen:		Xen virtual machine monitor (xen, 20040311)
 
 Generic architectural features shared among multiple ports
 arm:		ARM CPU based platform files
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/compile/Makefile	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,3 @@
+#	$NetBSD: Makefile,v 1.1 2004/03/11 21:44:08 cl Exp $
+
+.include <bsd.prog.mk>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/GENERIC	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,269 @@
+# $NetBSD: GENERIC,v 1.1 2004/03/11 21:44:08 cl Exp $
+# NetBSD: GENERIC,v 1.594 2004/02/25 18:56:26 perry Exp 
+#
+# GENERIC machine description file
+#
+# This machine description file is used to generate the default NetBSD
+# kernel.  The generic kernel does not include all options, subsystems
+# and device drivers, but should be useful for most applications.
+#
+# The machine description file can be customised for your specific
+# machine to reduce the kernel size and improve its performance.
+#
+# For further information on compiling NetBSD kernels, see the config(8)
+# man page.
+#
+# For further information on hardware support for this architecture, see
+# the intro(4) man page.  For further information about kernel options
+# for this architecture, see the options(4) man page.  For an explanation
+# of each device driver in this file see the section 4 man page for the
+# device.
+
+include 	"arch/xen/conf/std.xen"
+
+options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
+
+#ident 		"GENERIC-$Revision: 1.1 $"
+
+maxusers	32		# estimated number of users
+
+#
+options		XEN
+options		HZ=50
+
+# CPU support.  At least one is REQUIRED.
+options 	I686_CPU
+
+# CPU-related options.
+#options 	VM86		# virtual 8086 emulation
+#options 	USER_LDT	# user-settable LDT; used by WINE
+
+#options 	MTRR		# memory-type range register syscall support
+# doesn't work with MP just yet..
+#options 	PERFCTRS	# performance-monitoring counters support
+
+# delay between "rebooting ..." message and hardware reset, in milliseconds
+#options 	CPURESET_DELAY=2000
+
+# force Xen console
+options		CONSDEVNAME="\"xen\""
+
+# This option allows you to force a serial console at the specified
+# I/O address.   see console(4) for details.
+#options 	CONSDEVNAME="\"com\"",CONADDR=0x2f8,CONSPEED=57600
+#	you don't want the option below ON iff you are using the
+#	serial console option of the new boot strap code.
+#options 	CONS_OVERRIDE	# Always use above! independent of boot info
+
+# Standard system options
+
+options 	INSECURE	# disable kernel security levels - X needs this
+
+options 	RTC_OFFSET=0	# hardware clock is this many mins. west of GMT
+#options 	NTP		# NTP phase/frequency locked loop
+#options 	NO_TSC_TIME	# Don't use TSC microtime, even if available.
+				# Improves time behavior under VMware.
+
+options 	KTRACE		# system call tracing via ktrace(1)
+options 	SYSTRACE	# system call vetting via systrace(1)
+
+options 	SYSVMSG		# System V-like message queues
+options 	SYSVSEM		# System V-like semaphores
+#options 	SEMMNI=10	# number of semaphore identifiers
+#options 	SEMMNS=60	# number of semaphores in system
+#options 	SEMUME=10	# max number of undo entries per process
+#options 	SEMMNU=30	# number of undo structures in system
+options 	SYSVSHM		# System V-like memory sharing
+#options 	SHMMAXPGS=2048	# 2048 pages is the default
+options 	P1003_1B_SEMAPHORE	# p1003.1b semaphore support
+
+options 	LKM		# loadable kernel modules
+
+options 	USERCONF	# userconf(4) support
+#options 	PIPE_SOCKETPAIR	# smaller, but slower pipe(2)
+
+# Beep when it is safe to power down the system (requires sysbeep)
+#options 	BEEP_ONHALT
+# Some tunable details of the above feature (default values used below)
+#options 	BEEP_ONHALT_COUNT=3	# Times to beep
+#options 	BEEP_ONHALT_PITCH=1500	# Default frequency (in Hz)
+#options 	BEEP_ONHALT_PERIOD=250	# Default duration (in msecs)
+
+# Enable experimental buffer queue strategy for better responsiveness under
+# high disk I/O load. Likely stable but not yet the default.
+#options 	NEW_BUFQ_STRATEGY
+
+# Diagnostic/debugging support options
+#options 	DIAGNOSTIC	# expensive kernel consistency checks
+#options 	DEBUG		# expensive debugging checks/support
+#options 	KMEMSTATS	# kernel memory statistics (vmstat -m)
+options 	DDB		# in-kernel debugger
+#options 	DDB_ONPANIC=1	# see also sysctl(8): `ddb.onpanic'
+options 	DDB_HISTORY_SIZE=512	# enable history editing in DDB
+#options 	KGDB		# remote debugger
+#options 	KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600
+#makeoptions	DEBUG="-g"	# compile full symbol table
+
+# Compatibility options
+options 	COMPAT_NOMID	# NetBSD 0.8, 386BSD, and BSDI
+options 	COMPAT_09	# NetBSD 0.9
+options 	COMPAT_10	# NetBSD 1.0
+options 	COMPAT_11	# NetBSD 1.1
+options 	COMPAT_12	# NetBSD 1.2, 386BSD, and BSDI
+options 	COMPAT_13	# NetBSD 1.3, 386BSD, and BSDI
+options 	COMPAT_14	# NetBSD 1.4
+options 	COMPAT_15	# NetBSD 1.5
+options 	COMPAT_16	# NetBSD 1.6
+options 	COMPAT_43	# 4.3BSD, 386BSD, and BSDI
+options 	COMPAT_386BSD_MBRPART # recognize old partition ID
+#options 	TCP_COMPAT_42	# 4.2BSD TCP/IP bug compat. Not recommended.
+
+options 	COMPAT_OSSAUDIO	# OSS (Voxware) audio driver compatibility
+options 	COMPAT_SVR4	# binary compatibility with SVR4
+options 	COMPAT_IBCS2	# binary compatibility with SCO and ISC
+options 	COMPAT_LINUX	# binary compatibility with Linux
+options 	COMPAT_FREEBSD	# binary compatibility with FreeBSD
+#options 	COMPAT_MACH	# binary compatibility with Mach binaries
+#options 	COMPAT_DARWIN	# binary compatibility with Darwin binaries
+#options 	EXEC_MACHO	# exec MACH-O binaries
+#options 	COMPAT_PECOFF	# kernel support to run Win32 apps
+
+# File systems
+file-system 	FFS		# UFS
+#file-system 	EXT2FS		# second extended file system (linux)
+#file-system 	LFS		# log-structured file system
+file-system 	MFS		# memory file system
+file-system 	NFS		# Network File System client
+#file-system 	NTFS		# Windows/NT file system (experimental)
+#file-system 	CD9660		# ISO 9660 + Rock Ridge file system
+#file-system 	MSDOSFS		# MS-DOS file system
+file-system 	FDESC		# /dev/fd
+file-system 	KERNFS		# /kern
+file-system 	NULLFS		# loopback file system
+file-system 	OVERLAY		# overlay file system
+file-system 	PORTAL		# portal filesystem (still experimental)
+file-system 	PROCFS		# /proc
+file-system 	UMAPFS		# NULLFS + uid and gid remapping
+file-system 	UNION		# union file system
+file-system	CODA		# Coda File System; also needs vcoda (below)
+file-system	SMBFS		# experimental - CIFS; also needs nsmb (below)
+
+# File system options
+options 	QUOTA		# UFS quotas
+#options 	FFS_EI		# FFS Endian Independent support
+options 	SOFTDEP		# FFS soft updates support.
+options 	NFSSERVER	# Network File System server
+#options 	EXT2FS_SYSTEM_FLAGS # makes ext2fs file flags (append and
+				# immutable) behave as system flags.
+
+# Networking options
+#options 	GATEWAY		# packet forwarding
+options 	INET		# IP + ICMP + TCP + UDP
+options 	INET6		# IPV6
+#options 	IPSEC		# IP security
+#options 	IPSEC_ESP	# IP security (encryption part; define w/IPSEC)
+#options 	IPSEC_DEBUG	# debug for IP security
+#options 	MROUTING	# IP multicast routing
+options 	NS		# XNS
+#options 	NSIP		# XNS tunneling over IP
+options 	ISO,TPIP	# OSI
+#options 	EON		# OSI tunneling over IP
+options 	CCITT,LLC,HDLC	# X.25
+options 	NETATALK	# AppleTalk networking protocols
+options 	PPP_BSDCOMP	# BSD-Compress compression support for PPP
+options 	PPP_DEFLATE	# Deflate compression support for PPP
+options 	PPP_FILTER	# Active filter support for PPP (requires bpf)
+options 	PFIL_HOOKS	# pfil(9) packet filter hooks
+options 	IPFILTER_LOG	# ipmon(8) log support
+#options 	IPFILTER_DEFAULT_BLOCK	# block all packets by default
+#options 	TCP_DEBUG	# Record last TCP_NDEBUG packets with SO_DEBUG
+
+options 	NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM,NFS_BOOT_BOOTSTATIC
+#options 	NFS_BOOTSTATIC_MYIP="\"169.254.1.2\""
+#options 	NFS_BOOTSTATIC_GWIP="\"169.254.1.1\""
+#options 	NFS_BOOTSTATIC_MASK="\"255.255.255.0\""
+#options 	NFS_BOOTSTATIC_SERVADDR="\"169.254.1.1\""
+#options 	NFS_BOOTSTATIC_SERVER="\"server:/path/to/root\""
+
+# Kernel root file system and dump configuration.
+config		netbsd	root on ? type ?
+#config		netbsd	root on xennet0 type nfs
+#config		netbsd	root on ? type nfs
+
+#
+# Device configuration
+#
+
+mainbus0 at root
+
+cpu* at mainbus?
+
+npx0	at mainbus?		# x86 math coprocessor
+
+xenc*	at mainbus?		# Xen virtual console
+xennet*	at mainbus?		# Xen virtual network interface
+
+
+# Pull in optional local configuration
+include	"arch/xen/conf/GENERIC.local"
+
+
+# Pseudo-Devices
+
+pseudo-device 	crypto		# opencrypto framework
+
+# disk/mass storage pseudo-devices
+pseudo-device	ccd		4	# concatenated/striped disk devices
+#pseudo-device	cgd		4	# cryptographic disk devices
+pseudo-device	raid		8	# RAIDframe disk driver
+options 	RAID_AUTOCONFIG		# auto-configuration of RAID components
+# Options to enable various other RAIDframe RAID types.
+#options 	RF_INCLUDE_EVENODD=1
+#options 	RF_INCLUDE_RAID5_RS=1
+#options 	RF_INCLUDE_PARITYLOGGING=1
+#options 	RF_INCLUDE_CHAINDECLUSTER=1
+#options 	RF_INCLUDE_INTERDECLUSTER=1
+#options 	RF_INCLUDE_PARITY_DECLUSTERING=1
+#options 	RF_INCLUDE_PARITY_DECLUSTERING_DS=1
+
+# Vinum
+# pseudo-device	vinum		8	# Vinum
+# options 	VINUMDEBUG
+
+pseudo-device	md		1	# memory disk device (ramdisk)
+pseudo-device	vnd		4	# disk-like interface to files
+
+# network pseudo-devices
+pseudo-device	bpfilter	8	# Berkeley packet filter
+pseudo-device	ipfilter		# IP filter (firewall) and NAT
+pseudo-device	loop			# network loopback
+pseudo-device	ppp		2	# Point-to-Point Protocol
+pseudo-device	pppoe			# PPP over Ethernet (RFC 2516)
+pseudo-device	sl		2	# Serial Line IP
+pseudo-device	strip		2	# Starmode Radio IP (Metricom)
+pseudo-device	irframetty		# IrDA frame line discipline
+pseudo-device	tun		2	# network tunneling over tty
+pseudo-device	gre		2	# generic L3 over IP tunnel
+pseudo-device	gif		4	# IPv[46] over IPv[46] tunnel (RFC1933)
+#pseudo-device	faith		1	# IPv[46] tcp relay translation i/f
+#pseudo-device	stf		1	# 6to4 IPv6 over IPv4 encapsulation
+pseudo-device	vlan			# IEEE 802.1q encapsulation
+pseudo-device	bridge			# simple inter-network bridging
+#options 	BRIDGE_IPF		# bridge uses IP/IPv6 pfil hooks too
+
+# miscellaneous pseudo-devices
+pseudo-device	pty			# pseudo-terminals
+pseudo-device	tb		1	# tablet line discipline
+pseudo-device	sequencer	1	# MIDI sequencer
+# rnd works; RND_COM does not on port i386 yet.
+pseudo-device	rnd			# /dev/random and in-kernel generator
+#options 	RND_COM			# use "com" randomness as well (BROKEN)
+pseudo-device	clockctl		# user control of clock subsystem
+
+# a pseudo device needed for Coda	# also needs CODA (above)
+pseudo-device	vcoda		4	# coda minicache <-> venus comm.
+
+# a pseudo device needed for SMBFS
+pseudo-device	nsmb			# experimental - SMB requester
+
+pseudo-device	ksyms			# /dev/ksyms
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/GENERIC.local	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,6 @@
+#	$NetBSD: GENERIC.local,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: GENERIC.local,v 1.1 1996/09/09 16:37:08 mycroft Exp 
+#
+#	GENERIC.local -- local additions to the GENERIC configuration
+#
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/Makefile.xen	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,118 @@
+#	$NetBSD: Makefile.xen,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: Makefile.i386,v 1.132 2003/07/05 16:56:10 simonb Exp 
+
+# Makefile for NetBSD
+#
+# This makefile is constructed from a machine description:
+#	config machineid
+# Most changes should be made in the machine description
+#	/sys/arch/xen/conf/``machineid''
+# after which you should do
+#	config machineid
+# Machine generic makefile changes should be made in
+#	/sys/arch/xen/conf/Makefile.xen
+# after which config should be rerun for all machines of that type.
+#
+# To specify debugging, add the config line: makeoptions DEBUG="-g"
+# A better way is to specify -g only for a few files.
+#
+#	makeoptions DEBUGLIST="uvm* trap if_*"
+
+MACHINE_ARCH?=	i386
+MACHINE_ARCH2?=	x86
+USETOOLS?=	no
+NEED_OWN_INSTALL_TARGET?=no
+.include <bsd.own.mk>
+
+##
+## (1) port identification
+##
+XEN=		$S/arch/xen
+XEN_MA=		$S/arch/${MACHINE_ARCH}
+XEN_MA2=	$S/arch/${MACHINE_ARCH2}
+GENASSYM=	${XEN}/${MACHINE_ARCH}/genassym.cf
+
+##
+## (2) compile settings
+##
+CPPFLAGS+=	-D${MACHINE_ARCH}
+AFLAGS+=	-x assembler-with-cpp -traditional-cpp ${DBG}
+EXTRA_INCLUDES=	-I${.CURDIR}/xen-ma
+
+##
+## (3) libkern and compat
+##
+KERN_AS=	obj
+
+##
+## (4) local objects, compile rules, and dependencies
+##
+MD_OBJS=	locore.o spl.o vector.o
+MD_CFILES=
+MD_SFILES=	${XEN}/${MACHINE_ARCH}/locore.S ${XEN}/${MACHINE_ARCH}/spl.S \
+		${XEN}/${MACHINE_ARCH}/vector.S
+
+locore.o: ${XEN}/${MACHINE_ARCH}/locore.S assym.h
+	${NORMAL_S}
+
+spl.o: ${XEN}/${MACHINE_ARCH}/spl.S assym.h
+	${NORMAL_S}
+
+vector.o: ${XEN}/${MACHINE_ARCH}/vector.S assym.h
+	${NORMAL_S}
+
+.if !make(obj) && !make(clean) && !make(cleandir)
+.BEGIN:
+	rm -f ${MACHINE_ARCH} ${MACHINE_ARCH2}
+	ln -s ${XEN_MA}/include ${MACHINE_ARCH}
+	ln -s ${XEN_MA2}/include ${MACHINE_ARCH2}
+	rm -rf xen-ma
+	mkdir xen-ma
+	ln -s ../${MACHINE_ARCH} xen-ma/machine
+.endif
+
+##
+## (5) link settings
+##
+TEXTADDR?=	c0100000
+LINKFLAGS_NORMAL=	-X
+.if (${OBJECT_FMT} == "ELF")
+KERN_LDSCRIPT?=	kern.ldscript
+LINKFORMAT=	-T ${XEN_MA}/conf/${KERN_LDSCRIPT}
+.else
+LINKFORMAT=	-z
+.endif
+
+##
+## (6) port specific target dependencies
+##
+
+freebsd_sigcode.o ibcs2_sigcode.o linux_sigcode.o: assym.h
+svr4_sigcode.o mach_sigcode.o: assym.h
+apmcall.o in_cksum.o pnpbioscall.o bioscall.o: assym.h
+mptramp.o: assym.h
+clock.o: config_time.h
+
+##
+## (7) misc settings
+##
+
+##
+## (8) config(8) generated machinery
+##
+%INCLUDES
+
+%OBJS
+
+%CFILES
+
+%SFILES
+
+%LOAD
+
+%RULES
+
+##
+## (9) port independent kernel machinery
+##
+.include "$S/conf/Makefile.kern.inc"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/XEN	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,147 @@
+# $NetBSD: XEN,v 1.1 2004/03/11 21:44:08 cl Exp $
+
+include 	"arch/xen/conf/std.xen"
+
+options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
+
+#options		UVMHIST
+#options		UVMHIST_PRINT
+#options		SYSCALL_DEBUG
+
+maxusers	32		# estimated number of users
+
+#
+options		XEN
+options		HZ=50
+
+#options 	I586_CPU
+options 	I686_CPU
+
+#options 	VM86		# virtual 8086 emulation
+#options 	USER_LDT	# user-settable LDT; used by WINE
+
+#options 	MTRR		# memory-type range register syscall support
+
+options		CONSDEVNAME="\"xen\""
+#options 	CONSDEVNAME="\"com\"",CONADDR=0x2f8,CONSPEED=57600
+#options 	CONS_OVERRIDE
+
+options		INSECURE	# disable kernel security levels - X needs this
+
+options 	RTC_OFFSET=0	# hardware clock is this many mins. west of GMT
+#options 	NTP		# NTP phase/frequency locked loop
+
+options 	KTRACE		# system call tracing via ktrace(1)
+#options 	SYSTRACE	# system call vetting via systrace(1)
+
+options 	SYSVMSG		# System V-like message queues
+options 	SYSVSEM		# System V-like semaphores
+#options 	SEMMNI=10	# number of semaphore identifiers
+#options 	SEMMNS=60	# number of semaphores in system
+#options 	SEMUME=10	# max number of undo entries per process
+#options 	SEMMNU=30	# number of undo structures in system
+options 	SYSVSHM		# System V-like memory sharing
+#options 	SHMMAXPGS=2048	# 2048 pages is the default
+options 	P1003_1B_SEMAPHORE	# p1003.1b semaphore support
+
+options 	LKM		# loadable kernel modules
+
+options 	USERCONF	# userconf(4) support
+
+# Diagnostic/debugging support options
+options 	DIAGNOSTIC	# expensive kernel consistency checks
+options 	DEBUG		# expensive debugging checks/support 
+options 	KMEMSTATS	# kernel memory statistics (vmstat -m)
+options 	DDB		# in-kernel debugger
+options		DDB_ONPANIC=1	# see also sysctl(8): `ddb.onpanic'
+options 	DDB_HISTORY_SIZE=512	# enable history editing in DDB
+#options 	KGDB		# remote debugger
+#options 	KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x2f8,KGDB_DEVRATE=57600
+makeoptions	DEBUG="-g"	# compile full symbol table
+
+#options 	COMPAT_14	# NetBSD 1.4
+#options 	COMPAT_15	# NetBSD 1.5
+options 	COMPAT_16	# NetBSD 1.6
+
+##options 	COMPAT_LINUX	# binary compatibility with Linux
+#options 	COMPAT_FREEBSD	# binary compatibility with FreeBSD
+#options 	COMPAT_MACH	# binary compatibility with Mach binaries
+#options	COMPAT_DARWIN	# binary compatibility with Darwin binaries
+#options 	EXEC_MACHO	# exec MACH-O binaries
+#options 	COMPAT_PECOFF	# kernel support to run Win32 apps
+
+#file-system 	FFS		# UFS
+#file-system 	EXT2FS		# second extended file system (linux)
+#file-system 	LFS		# log-structured file system
+#file-system 	MFS		# memory file system
+file-system 	NFS		# Network File System client
+#file-system 	NTFS		# Windows/NT file system (experimental)
+#file-system 	CD9660		# ISO 9660 + Rock Ridge file system
+#file-system 	MSDOSFS		# MS-DOS file system
+file-system 	FDESC		# /dev/fd
+file-system 	KERNFS		# /kern
+file-system 	NULLFS		# loopback file system
+#file-system 	OVERLAY		# overlay file system
+#file-system 	PORTAL		# portal filesystem (still experimental)
+file-system 	PROCFS		# /proc
+#file-system 	UMAPFS		# NULLFS + uid and gid remapping
+#file-system 	UNION		# union file system
+#file-system	SMBFS		# experimental - CIFS; also needs nsmb (below)
+
+#options 	QUOTA		# UFS quotas
+#options 	SOFTDEP		# FFS soft updates support.
+#options 	NFSSERVER	# Network File System server
+
+options 	GATEWAY		# packet forwarding
+options 	INET		# IP + ICMP + TCP + UDP
+options 	INET6		# IPV6
+options 	IPSEC		# IP security
+options 	IPSEC_ESP	# IP security (encryption part; define w/IPSEC)
+options 	MROUTING	# IP multicast routing
+options 	PFIL_HOOKS	# pfil(9) packet filter hooks
+options 	IPFILTER_LOG	# ipmon(8) log support
+
+options 	NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM,NFS_BOOT_BOOTSTATIC
+#options 	NFS_BOOTSTATIC_MYIP="\"169.254.1.2\""
+#options 	NFS_BOOTSTATIC_GWIP="\"169.254.1.1\""
+#options 	NFS_BOOTSTATIC_MASK="\"255.255.255.0\""
+#options 	NFS_BOOTSTATIC_SERVADDR="\"169.254.1.1\""
+#options 	NFS_BOOTSTATIC_SERVER="\"server:/path/to/root\""
+
+config		netbsd	root on ? type ?
+#config		netbsd	root on xennet0 type nfs
+
+mainbus0 at root
+
+cpu* at mainbus?
+
+npx0	at mainbus?		# x86 math coprocessor
+
+xenc*	at mainbus?		# Xen virtual console
+xennet*	at mainbus?		# Xen virtual network interface
+
+
+include	"arch/xen/conf/GENERIC.local"
+
+
+#pseudo-device	ccd		4	# concatenated/striped disk devices
+#pseudo-device	cgd		4	# cryptographic disk devices
+#pseudo-device	md		1	# memory disk device (ramdisk)
+#pseudo-device	vnd		4	# disk-like interface to files
+
+pseudo-device	bpfilter	8	# Berkeley packet filter
+pseudo-device	ipfilter		# IP filter (firewall) and NAT
+pseudo-device	loop			# network loopback
+#pseudo-device	tun		2	# network tunneling over tty
+#pseudo-device	gre		2	# generic L3 over IP tunnel
+#pseudo-device	gif		4	# IPv[46] over IPv[46] tunnel (RFC1933)
+#pseudo-device	faith		1	# IPv[46] tcp relay translation i/f
+#pseudo-device	stf		1	# 6to4 IPv6 over IPv4 encapsulation
+#pseudo-device	vlan			# IEEE 802.1q encapsulation
+#pseudo-device	bridge			# simple inter-network bridging
+
+pseudo-device	pty			# pseudo-terminals
+pseudo-device	rnd			# /dev/random and in-kernel generator
+pseudo-device	clockctl		# user control of clock subsystem
+
+pseudo-device	ksyms			# /dev/ksyms
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/files.compat	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,63 @@
+#	$NetBSD: files.compat,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp 
+
+# options for MP configuration through the MP spec
+#defflag opt_mpbios.h MPBIOS MPVERBOSE MPDEBUG MPBIOS_SCANPCI
+#defflag opt_mpacpi.h MPACPI MPACPI_SCANPCI
+
+# MTRR support
+defflag                 MTRR
+
+defflag opt_xen.h		XEN
+
+
+# XXX define fake options to make config create the .h files
+defflag	ioapic.h		XXXIOAPIC
+defflag	lapic.h			XXXLAPIC
+defflag	apm.h			XXXAPM
+defflag	pci.h			XXXPCI
+defflag	isa.h			XXXISA
+defflag	eisa.h			XXXEISA
+defflag	isadma.h		XXXISADMA
+defflag	mca.h			XXXMCA
+defflag	ega.h			XXXEGA
+defflag	pcdisplay.h		XXXPCDISPLAY
+defflag	pckbd.h			XXXPCKBD
+defflag	pc.h			XXXPC
+defflag	ukbd.h			XXXUKBD
+defflag	bios32.h		XXXBIOS32
+defflag	pnpbios.h		XXXPNPBIOS
+defflag	vesabios.h		XXXVESABIOS
+defflag	bioscall.h		XXXBIOSCALL
+defflag	opt_pcibios.h		XXXOPT_PCIBIOS
+defflag	acpi.h			XXXACPI
+defflag	opt_mpacpi.h		XXXOPT_MPACPI
+defflag	opt_mpbios.h		XXXOPT_MPBIOS
+
+# VM86 emulation
+defflag	opt_vm86.h			XXXVM86
+defflag	opt_kvm86.h			XXXKVM86
+
+# Floating point emulation
+defflag opt_math_emulate.h		XXXMATH_EMULATE
+
+# User-settable LDT (used by WINE)
+defflag	opt_user_ldt.h			XXXUSER_LDT
+
+# X server support in console drivers
+defflag	opt_xserver.h			XXXXSERVER XXXXSERVER_DDB
+
+# The REAL{BASE,EXT}MEM options
+defparam opt_realmem.h			XXXREALBASEMEM XXXREALEXTMEM
+
+# understand boot device passed by pre-1.3 bootblocks
+defflag	opt_compat_oldboot.h		XXXCOMPAT_OLDBOOT
+
+# Large page size
+defflag	opt_largepages.h		XXXLARGEPAGES
+
+# kernel stack debug
+defflag	opt_kstack_dr0.h		XXXKSTACK_CHECK_DR0
+
+# compat linux #include wsdisplay.h
+defparam wsdisplay.h			XXXWSDISPLAY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/files.xen	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,199 @@
+#	$NetBSD: files.xen,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp 
+#	NetBSD: files.i386,v 1.249 2004/02/16 17:11:27 wiz Exp 
+
+maxpartitions 8
+
+maxusers 2 16 128
+
+# Processor type options.
+defflag	opt_cputype.h	I686_CPU
+
+# delay before cpu_reset() for reboot.
+defparam		CPURESET_DELAY
+
+# No unmapped page below kernel stack
+defflag			NOREDZONE
+
+# Beep on halt
+defflag opt_beep.h		BEEP_ONHALT
+defparam opt_beep.h		BEEP_ONHALT_COUNT
+defparam opt_beep.h		BEEP_ONHALT_PITCH BEEP_ONHALT_PERIOD
+
+file	arch/xen/i386/autoconf.c
+file	arch/i386/i386/db_dbgreg.S	ddb | kstack_check_dr0
+file	arch/i386/i386/db_disasm.c	ddb
+file	arch/i386/i386/db_interface.c	ddb
+file	arch/i386/i386/db_memrw.c	ddb | kgdb
+file	arch/i386/i386/db_trace.c	ddb
+file	kern/subr_disk_mbr.c		disk
+file	arch/xen/i386/gdt.c
+file	arch/xen/i386/hypervisor.c
+file	arch/i386/i386/in_cksum.S	inet | inet6
+file	arch/i386/i386/ipkdb_glue.c	ipkdb
+file	arch/i386/i386/kgdb_machdep.c	kgdb
+file	arch/xen/i386/machdep.c
+file	arch/xen/i386/identcpu.c
+file	arch/i386/i386/math_emulate.c	math_emulate
+file	arch/i386/i386/mem.c
+file	kern/kern_microtime.c		i586_cpu | i686_cpu
+file	arch/i386/i386/mtrr_k6.c	mtrr
+file	netns/ns_cksum.c		ns
+file	arch/xen/i386/pmap.c
+file	arch/i386/i386/process_machdep.c
+file	arch/i386/i386/procfs_machdep.c	procfs
+file	arch/i386/i386/sys_machdep.c
+file	arch/i386/i386/syscall.c
+file	arch/xen/i386/trap.c
+file	arch/i386/i386/vm_machdep.c
+file	arch/xen/i386/xen_machdep.c
+
+file	arch/xen/xen/xen_debug.c
+
+file	arch/xen/xen/clock.c
+file	arch/xen/xen/events.c
+
+file	dev/cons.c
+
+file	arch/i386/i386/mptramp.S		multiprocessor
+file    arch/i386/i386/ipifuncs.c	multiprocessor
+
+file	arch/i386/i386/pmc.c		perfctrs
+
+file	crypto/des/arch/i386/des_enc.S		des
+file	crypto/des/arch/i386/des_cbc.S		des
+
+file	crypto/blowfish/arch/i386/bf_enc.S	blowfish
+file	crypto/blowfish/arch/i386/bf_cbc.S	blowfish & !i386_cpu
+
+#
+# Machine-independent SCSI drivers
+#
+
+#xxx include	"dev/scsipi/files.scsipi"
+
+#
+# Machine-independent ATA drivers
+#
+
+#xxx include	"dev/ata/files.ata"
+
+# Memory Disk for install floppy
+file	dev/md_root.c			memory_disk_hooks
+
+#
+define  mainbus { [apid = -1] }
+
+file	arch/x86/x86/bus_dma.c
+file	arch/x86/x86/bus_space.c
+file	arch/x86/x86/cacheinfo.c
+file	arch/xen/x86/consinit.c
+file	arch/xen/x86/intr.c
+file	arch/x86/x86/ipi.c		multiprocessor
+file	arch/x86/x86/lock_machdep.c	lockdebug
+file	arch/x86/x86/softintr.c
+
+include	"arch/xen/conf/files.compat"
+
+#
+# System bus types
+#
+
+device	mainbus: mainbus
+attach	mainbus at root
+file	arch/xen/i386/mainbus.c	mainbus
+
+# Numeric Processing Extension; Math Co-processor
+device	npx
+file	arch/xen/i386/npx.c		npx needs-flag
+
+attach	npx at mainbus with npx_hv
+file	arch/xen/i386/npx_hv.c		npx_hv
+
+# Xen console support
+device	xenc: tty
+attach	xenc at mainbus
+file	arch/xen/xen/console.c		xenc needs-flag
+
+#include	"dev/wscons/files.wscons"
+#include	"dev/wsfont/files.wsfont"
+
+# CPUS
+
+define cpu { [apid = -1] }
+device cpu
+attach cpu at mainbus
+file	arch/xen/i386/cpu.c		cpu
+
+#
+# Compatibility modules
+#
+
+# VM86 mode
+file	arch/i386/i386/vm86.c			vm86
+
+# VM86 in kernel
+file	arch/i386/i386/kvm86.c			kvm86
+file	arch/i386/i386/kvm86call.S		kvm86
+
+# Binary compatibility with previous NetBSD releases (COMPAT_XX)
+file	arch/i386/i386/compat_13_machdep.c	compat_13 | compat_aout
+file	arch/i386/i386/compat_16_machdep.c	compat_16 | compat_ibcs2
+
+# SVR4 binary compatibility (COMPAT_SVR4)
+include	"compat/svr4/files.svr4"
+file	arch/i386/i386/svr4_machdep.c		compat_svr4
+file	arch/i386/i386/svr4_sigcode.S		compat_svr4
+file	arch/i386/i386/svr4_syscall.c		compat_svr4
+
+# MACH binary compatibility (COMPAT_MACH)
+include	"compat/mach/files.mach"
+file	arch/i386/i386/mach_machdep.c		compat_mach | compat_darwin
+file	arch/i386/i386/mach_sigcode.S		compat_mach | compat_darwin
+file	arch/i386/i386/mach_syscall.c		compat_mach | compat_darwin
+file	arch/i386/i386/macho_machdep.c		exec_macho
+
+# DARWIN binary compatibility (COMPAT_DARWIN)
+include	"compat/darwin/files.darwin"
+file	arch/i386/i386/darwin_machdep.c		compat_darwin
+
+# iBCS-2 binary compatibility (COMPAT_IBCS2)
+include	"compat/ibcs2/files.ibcs2"
+file	arch/i386/i386/ibcs2_machdep.c		compat_ibcs2
+file	arch/i386/i386/ibcs2_sigcode.S		compat_ibcs2
+file	arch/i386/i386/ibcs2_syscall.c		compat_ibcs2
+
+# Linux binary compatibility (COMPAT_LINUX)
+include	"compat/linux/files.linux"
+include	"compat/linux/arch/i386/files.linux_i386"
+file	arch/i386/i386/linux_sigcode.S		compat_linux
+file	arch/i386/i386/linux_syscall.c		compat_linux
+file	arch/i386/i386/linux_trap.c		compat_linux
+
+# FreeBSD binary compatibility (COMPAT_FREEBSD)
+include	"compat/freebsd/files.freebsd"
+file	arch/i386/i386/freebsd_machdep.c	compat_freebsd
+file	arch/i386/i386/freebsd_sigcode.S	compat_freebsd
+file	arch/i386/i386/freebsd_syscall.c	compat_freebsd
+
+# a.out binary compatibility (COMPAT_AOUT)
+include	"compat/aout/files.aout"
+
+# Win32 binary compatibility (COMPAT_PECOFF)
+include	"compat/pecoff/files.pecoff"
+
+# OSS audio driver compatibility
+include	"compat/ossaudio/files.ossaudio"
+
+# Xen devices
+
+device	xennet: arp, ether, ifnet
+attach	xennet at mainbus
+file	arch/xen/xen/if_xennet.c	xennet needs-flag
+
+#device	xd: XXXXdisk
+#attach	xd at mainbus
+#file	arch/xen/xen/disk.c		xd needs-flag
+
+
+include "arch/xen/conf/majors.i386"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/kern.ldscript	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,139 @@
+/*	$NetBSD: kern.ldscript,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: kern.ldscript,v 1.1 2001/01/20 01:05:23 thorpej Exp 	*/
+
+/*
+ * Kernel linker script for NetBSD/i386.  This script is based on
+ * elf_i386.x, but puts _etext after all of the read-only sections.
+ */
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+	      "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SEARCH_DIR(/usr/lib);
+/* Do we need any of these for elf?
+   __DYNAMIC = 0;    */
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0x08048000 + SIZEOF_HEADERS;
+  .interp     : { *(.interp) 	}
+  .hash          : { *(.hash)		}
+  .dynsym        : { *(.dynsym)		}
+  .dynstr        : { *(.dynstr)		}
+  .gnu.version   : { *(.gnu.version)	}
+  .gnu.version_d   : { *(.gnu.version_d)	}
+  .gnu.version_r   : { *(.gnu.version_r)	}
+  .rel.text      :
+    { *(.rel.text) *(.rel.gnu.linkonce.t*) }
+  .rela.text     :
+    { *(.rela.text) *(.rela.gnu.linkonce.t*) }
+  .rel.data      :
+    { *(.rel.data) *(.rel.gnu.linkonce.d*) }
+  .rela.data     :
+    { *(.rela.data) *(.rela.gnu.linkonce.d*) }
+  .rel.rodata    :
+    { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
+  .rela.rodata   :
+    { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+  .rel.got       : { *(.rel.got)		}
+  .rela.got      : { *(.rela.got)		}
+  .rel.ctors     : { *(.rel.ctors)	}
+  .rela.ctors    : { *(.rela.ctors)	}
+  .rel.dtors     : { *(.rel.dtors)	}
+  .rela.dtors    : { *(.rela.dtors)	}
+  .rel.init      : { *(.rel.init)	}
+  .rela.init     : { *(.rela.init)	}
+  .rel.fini      : { *(.rel.fini)	}
+  .rela.fini     : { *(.rela.fini)	}
+  .rel.bss       : { *(.rel.bss)		}
+  .rela.bss      : { *(.rela.bss)		}
+  .rel.plt       : { *(.rel.plt)		}
+  .rela.plt      : { *(.rela.plt)		}
+  .init          : { *(.init)	} =0x9090
+  .plt      : { *(.plt)	}
+  .text      :
+  {
+    *(.text)
+    *(.stub)
+    /* .gnu.warning sections are handled specially by elf32.em.  */
+    *(.gnu.warning)
+    *(.gnu.linkonce.t*)
+  } =0x9090
+  .fini      : { *(.fini)    } =0x9090
+  .rodata    : { *(.rodata) *(.gnu.linkonce.r*) }
+  .rodata1   : { *(.rodata1) }
+  _etext = .;
+  PROVIDE (etext = .);
+  /* Adjust the address for the data segment.  We want to adjust up to
+     the same address within the page on the next page up.  */
+  . = ALIGN(0x1000) + (. & (0x1000 - 1));
+  .data    :
+  {
+    *(.data)
+    *(.gnu.linkonce.d*)
+    CONSTRUCTORS
+  }
+  .data1   : { *(.data1) }
+  .ctors         :
+  {
+    *(.ctors)
+  }
+  .dtors         :
+  {
+    *(.dtors)
+  }
+  .got           : { *(.got.plt) *(.got) }
+  .dynamic       : { *(.dynamic) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata     : { *(.sdata) }
+  _edata  =  .;
+  PROVIDE (edata = .);
+  __bss_start = .;
+  .sbss      : { *(.sbss) *(.scommon) }
+  .bss       :
+  {
+   *(.dynbss)
+   *(.bss)
+   *(COMMON)
+  }
+  . = ALIGN(32 / 8);
+  _end = . ;
+  PROVIDE (end = .);
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/majors.i386	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,116 @@
+#	$NetBSD: majors.i386,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: majors.i386,v 1.23 2003/12/10 02:04:01 jmc Exp 
+#
+# Device majors for Xen/i386
+#
+
+device-major	cons		char 0
+device-major	ctty		char 1
+device-major	mem		char 2
+device-major	wd		char 3   block 0	wd
+device-major	swap		char 4   block 1
+device-major	pts		char 5			pty
+device-major	ptc		char 6			pty
+device-major	log		char 7
+device-major	com		char 8			com
+device-major	fd		char 9   block 2	fdc
+device-major	wt		char 10  block 3	wt
+device-major	scd		char 11  block 15	scd
+device-major	pc		char 12			pc
+device-major	sd		char 13  block 4	sd
+device-major	st		char 14  block 5	st
+device-major	cd		char 15  block 6	cd
+device-major	lpt		char 16			lpt
+device-major	ch		char 17			ch
+device-major	ccd		char 18  block 16	ccd
+device-major	ss		char 19			ss
+device-major	uk		char 20			uk
+device-major	apm		char 21			apm
+device-major	filedesc	char 22
+device-major	bpf		char 23			bpfilter
+device-major	md		char 24  block 17	md
+
+device-major	joy		char 26			joy
+device-major	spkr		char 27			spkr
+device-major	lkm		char 28			lkm
+
+# next three are obsolete
+#device-major	omms		char 35			omms
+#device-major	olms		char 36			olms
+#device-major	opms		char 37			opms
+device-major	cy		char 38			cy
+device-major	mcd		char 39  block 7	mcd
+device-major	tun		char 40			tun
+device-major	vnd		char 41  block 14	vnd
+device-major	audio		char 42			audio
+device-major	svr4_net	char 43			compat_svr4
+device-major	ipl		char 44			ipfilter
+device-major	satlink		char 45			satlink
+device-major	rnd		char 46			rnd
+device-major	wsdisplay	char 47			wsdisplay
+device-major	wskbd		char 48			wskbd
+device-major	wsmouse		char 49			wsmouse
+device-major	isdn		char 50			isdn
+device-major	isdnctl		char 51			isdnctl
+device-major	isdnbchan	char 52			isdnbchan
+device-major	isdntrc		char 53			isdntrc
+device-major	isdntel		char 54			isdntel
+device-major	usb		char 55			usb
+device-major	uhid		char 56			uhid
+device-major	ulpt		char 57			ulpt
+device-major	midi		char 58			midi
+device-major	sequencer	char 59			sequencer
+device-major	vcoda		char 60			vcoda
+device-major	scsibus		char 61			scsibus
+device-major	raid		char 62  block 18	raid
+device-major	esh		char 63			esh
+device-major	ugen		char 64			ugen
+device-major	wsmux		char 65			wsmux
+device-major	ucom		char 66			ucom
+device-major	sysmon		char 67			sysmon_envsys | sysmon_wdog | sysmon_power
+device-major	vmegeneric	char 68			vmegeneric
+device-major	ld		char 69  block 19	ld
+device-major	urio		char 70			urio
+device-major	bktr		char 71			bktr
+
+device-major	cz		char 73			cz
+device-major	ses		char 74			ses
+device-major	uscanner	char 75			uscanner
+device-major	iop		char 76			iop
+device-major	altq		char 77			altq
+device-major	mlx		char 78			mlx
+device-major	ed		char 79  block 20	ed_mca
+device-major	mly		char 80			mly
+device-major	wsfont		char 81			wsfont
+device-major	agp		char 82			agp
+device-major	pci		char 83			pci
+device-major	dpti		char 84			dpti
+device-major	irframe		char 85			irframedrv
+device-major	cir		char 86			cir
+device-major	radio		char 87			radio
+# major 88 free
+device-major	clockctl	char 89			clockctl
+device-major	systrace	char 90			systrace
+device-major	kttcp		char 91			kttcp
+device-major	dmoverio	char 92			dmoverio
+device-major	cgd		char 93  block 21	cgd
+device-major	verifiedexec	char 94			verifiedexec
+device-major	fwiso		char 95			fwiso
+device-major	dpt		char 96			dpt
+device-major	twe		char 97			twe
+device-major	nsmb		char 98			nsmb		
+device-major	vmmon		char 99			vmmon
+device-major	vmnet		char 100		vmnet
+device-major	ksyms		char 101		ksyms
+device-major	icp		char 102		icp
+device-major	gpib		char 103		gpib
+device-major	ppi		char 104		ppi
+device-major	rd		char 105 block 22	rd
+device-major	ct		char 106 block 23	ct
+device-major	mt		char 107 block 24	mt
+
+include "arch/xen/conf/majors.xen"
+
+# Majors up to 143 are reserved for machine-dependant drivers.
+# New machine-independant driver majors are assigned in 
+# sys/conf/majors.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/majors.xen	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,10 @@
+#	$NetBSD: majors.xen,v 1.1 2004/03/11 21:44:08 cl Exp $
+#
+# Device majors for Xen
+#
+
+device-major	xenc		char 143		xenc
+
+# Majors up to 143 are reserved for machine-dependant drivers.
+# New machine-independant driver majors are assigned in 
+# sys/conf/majors.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/conf/std.xen	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,16 @@
+# $NetBSD: std.xen,v 1.1 2004/03/11 21:44:08 cl Exp $
+# NetBSD: std.i386,v 1.24 2003/02/26 21:33:36 fvdl Exp 
+#
+# standard, required NetBSD/i386 'options'
+
+machine xen
+
+options 	EXEC_AOUT	# exec a.out binaries
+options 	EXEC_ELF32	# exec ELF binaries
+options 	EXEC_SCRIPT	# exec #! scripts
+
+options 	INET6_MD_CKSUM	# machine-dependant code for in6_cksum
+#options 	CRYPTO_MD_DES_ENC	# machine-dependant code for DES
+#options 	CRYPTO_MD_DES_CBC	# machine-dependant DES CBC code
+#options 	CRYPTO_MD_BF_ENC	# machine-dependant code for BF_encrypt
+#options 	CRYPTO_MD_BF_CBC	# careful: uses bswapl, requires 486
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/autoconf.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,600 @@
+/*	$NetBSD: autoconf.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: autoconf.c,v 1.75 2003/12/30 12:33:22 pk Exp 	*/
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)autoconf.c	7.1 (Berkeley) 5/9/91
+ */
+
+/*
+ * Setup the system to run on the current machine.
+ *
+ * Configure() is called at boot time and initializes the vba
+ * device tables and the memory controller monitoring.  Available
+ * devices are determined (from possibilities mentioned in ioconf.c),
+ * and the drivers are initialized.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include "opt_compat_oldboot.h"
+#include "opt_multiprocessor.h"
+#include "opt_nfs_boot.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/conf.h>
+#ifdef COMPAT_OLDBOOT
+#include <sys/reboot.h>
+#endif
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/dkio.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+
+#ifdef NFS_BOOT_BOOTSTATIC
+#include <net/if.h>
+#include <net/if_ether.h>
+#include <netinet/in.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsdiskless.h>
+#include <machine/if_xennetvar.h>
+#endif
+
+#include <machine/pte.h>
+#include <machine/cpu.h>
+#include <machine/gdt.h>
+#include <machine/pcb.h>
+#include <machine/bootinfo.h>
+
+#include "ioapic.h"
+#include "lapic.h"
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#endif
+
+#if NLAPIC > 0
+#include <machine/i82489var.h>
+#endif
+
+static int match_harddisk(struct device *, struct btinfo_bootdisk *);
+static void matchbiosdisks(void);
+static void findroot(void);
+static int is_valid_disk(struct device *);
+
+extern struct disklist *i386_alldisks;
+extern int i386_ndisks;
+
+#include "bios32.h"
+#if NBIOS32 > 0
+#include <machine/bios32.h>
+#endif
+
+#include "opt_pcibios.h"
+#ifdef PCIBIOS
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <i386/pci/pcibios.h>
+#endif
+
+#include "opt_kvm86.h"
+#ifdef KVM86
+#include <machine/kvm86.h>
+#endif
+
+#include "opt_xen.h"
+
+struct device *booted_device;
+int booted_partition;
+
+/*
+ * Determine i/o configuration for a machine.
+ */
+void
+cpu_configure(void)
+{
+
+	startrtclock();
+
+#if NBIOS32 > 0
+	bios32_init();
+#endif
+#ifdef PCIBIOS
+	pcibios_init();
+#endif
+
+	/* kvm86 needs a TSS */
+	i386_proc0_tss_ldt_init();
+#ifdef KVM86
+	kvm86_init();
+#endif
+
+	if (config_rootfound("mainbus", NULL) == NULL)
+		panic("configure: mainbus not configured");
+
+#ifdef INTRDEBUG
+	intr_printconfig();
+#endif
+
+#if NIOAPIC > 0
+	lapic_set_lvt();
+	ioapic_enable();
+#endif
+	/* resync cr0 after FPU configuration */
+	lwp0.l_addr->u_pcb.pcb_cr0 = rcr0();
+#ifdef MULTIPROCESSOR
+	/* propagate this to the idle pcb's. */
+	cpu_init_idle_pcbs();
+#endif
+
+	spl0();
+#if NLAPIC > 0
+	lapic_tpr = 0;
+#endif
+}
+
+void
+cpu_rootconf(void)
+{
+	findroot();
+	matchbiosdisks();
+
+	printf("boot device: %s\n",
+	    booted_device ? booted_device->dv_xname : "<unknown>");
+
+	setroot(booted_device, booted_partition);
+}
+
+/*
+ * XXX ugly bit of code. But, this is the only safe time that the
+ * match between BIOS disks and native disks can be done.
+ */
+static void
+matchbiosdisks(void)
+{
+	struct btinfo_biosgeom *big;
+	struct bi_biosgeom_entry *be;
+	struct device *dv;
+	int i, ck, error, m, n;
+	struct vnode *tv;
+	char mbr[DEV_BSIZE];
+	int  dklist_size;
+	int bmajor;
+
+	big = lookup_bootinfo(BTINFO_BIOSGEOM);
+
+	if (big == NULL)
+		return;
+
+	/*
+	 * First, count all native disks
+	 */
+	for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next)
+		if (is_valid_disk(dv))
+			i386_ndisks++;
+
+	if (i386_ndisks == 0)
+		return;
+
+	dklist_size = sizeof (struct disklist) + (i386_ndisks - 1) *
+	    sizeof (struct nativedisk_info);
+
+	/* XXX M_TEMP is wrong */
+	i386_alldisks = malloc(dklist_size, M_TEMP, M_NOWAIT);
+	if (i386_alldisks == NULL)
+		return;
+
+	memset(i386_alldisks, 0, dklist_size);
+
+	i386_alldisks->dl_nnativedisks = i386_ndisks;
+	i386_alldisks->dl_nbiosdisks = big->num;
+	for (i = 0; i < big->num; i++) {
+		i386_alldisks->dl_biosdisks[i].bi_dev = big->disk[i].dev;
+		i386_alldisks->dl_biosdisks[i].bi_sec = big->disk[i].sec;
+		i386_alldisks->dl_biosdisks[i].bi_head = big->disk[i].head;
+		i386_alldisks->dl_biosdisks[i].bi_cyl = big->disk[i].cyl;
+		i386_alldisks->dl_biosdisks[i].bi_lbasecs = big->disk[i].totsec;
+		i386_alldisks->dl_biosdisks[i].bi_flags = big->disk[i].flags;
+#ifdef GEOM_DEBUG
+#ifdef NOTYET
+		printf("disk %x: flags %x, interface %x, device %llx\n",
+			big->disk[i].dev, big->disk[i].flags,
+			big->disk[i].interface_path, big->disk[i].device_path);
+#endif
+#endif
+	}
+
+	/*
+	 * XXX code duplication from findroot()
+	 */
+	n = -1;
+	for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
+		if (dv->dv_class != DV_DISK)
+			continue;
+#ifdef GEOM_DEBUG
+		printf("matchbiosdisks: trying to match (%s) %s\n",
+		    dv->dv_xname, dv->dv_cfdata->cf_name);
+#endif
+		if (is_valid_disk(dv)) {
+			n++;
+			sprintf(i386_alldisks->dl_nativedisks[n].ni_devname,
+			    "%s%d", dv->dv_cfdata->cf_name,
+			    dv->dv_unit);
+
+			bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
+			if (bmajor == -1)
+				return;
+
+			if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART),
+			    &tv))
+				panic("matchbiosdisks: can't alloc vnode");
+
+			error = VOP_OPEN(tv, FREAD, NOCRED, 0);
+			if (error) {
+				vput(tv);
+				continue;
+			}
+			error = vn_rdwr(UIO_READ, tv, mbr, DEV_BSIZE, 0,
+			    UIO_SYSSPACE, 0, NOCRED, NULL, 0);
+			VOP_CLOSE(tv, FREAD, NOCRED, 0);
+			if (error) {
+#ifdef GEOM_DEBUG
+				printf("matchbiosdisks: %s: MBR read failure\n",
+				    dv->dv_xname);
+#endif
+				continue;
+			}
+
+			for (ck = i = 0; i < DEV_BSIZE; i++)
+				ck += mbr[i];
+			for (m = i = 0; i < big->num; i++) {
+				be = &big->disk[i];
+#ifdef GEOM_DEBUG
+				printf("match %s with %d ", dv->dv_xname, i);
+				printf("dev ck %x bios ck %x\n", ck, be->cksum);
+#endif
+				if (be->flags & BI_GEOM_INVALID)
+					continue;
+				if (be->cksum == ck &&
+				    !memcmp(&mbr[MBR_PART_OFFSET], be->dosparts,
+					MBR_PART_COUNT *
+					    sizeof (struct mbr_partition))) {
+#ifdef GEOM_DEBUG
+					printf("matched bios disk %x with %s\n",
+					    be->dev, dv->dv_xname);
+#endif
+					i386_alldisks->dl_nativedisks[n].
+					    ni_biosmatches[m++] = i;
+				}
+			}
+			i386_alldisks->dl_nativedisks[n].ni_nmatches = m;
+			vput(tv);
+		}
+	}
+}
+
+#ifdef COMPAT_OLDBOOT
+u_long	bootdev = 0;		/* should be dev_t, but not until 32 bits */
+#endif
+
+/*
+ * helper function for "findroot()":
+ * return nonzero if disk device matches bootinfo
+ */
+static int
+match_harddisk(struct device *dv, struct btinfo_bootdisk *bid)
+{
+	struct vnode *tmpvn;
+	int error;
+	struct disklabel label;
+	int found = 0;
+	int bmajor;
+
+	/*
+	 * A disklabel is required here.  The
+	 * bootblocks don't refuse to boot from
+	 * a disk without a label, but this is
+	 * normally not wanted.
+	 */
+	if (bid->labelsector == -1)
+		return(0);
+
+	/*
+	 * lookup major number for disk block device
+	 */
+	bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
+	if (bmajor == -1)
+		return(0); /* XXX panic() ??? */
+
+	/*
+	 * Fake a temporary vnode for the disk, open
+	 * it, and read the disklabel for comparison.
+	 */
+	if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, bid->partition), &tmpvn))
+		panic("findroot can't alloc vnode");
+	error = VOP_OPEN(tmpvn, FREAD, NOCRED, 0);
+	if (error) {
+#ifndef DEBUG
+		/*
+		 * Ignore errors caused by missing
+		 * device, partition or medium.
+		 */
+		if (error != ENXIO && error != ENODEV)
+#endif
+			printf("findroot: can't open dev %s%c (%d)\n",
+			       dv->dv_xname, 'a' + bid->partition, error);
+		vput(tmpvn);
+		return(0);
+	}
+	error = VOP_IOCTL(tmpvn, DIOCGDINFO, &label, FREAD, NOCRED, 0);
+	if (error) {
+		/*
+		 * XXX can't happen - open() would
+		 * have errored out (or faked up one)
+		 */
+		printf("can't get label for dev %s%c (%d)\n",
+		       dv->dv_xname, 'a' + bid->partition, error);
+		goto closeout;
+	}
+
+	/* compare with our data */
+	if (label.d_type == bid->label.type &&
+	    label.d_checksum == bid->label.checksum &&
+	    !strncmp(label.d_packname, bid->label.packname, 16))
+		found = 1;
+
+closeout:
+	VOP_CLOSE(tmpvn, FREAD, NOCRED, 0);
+	vput(tmpvn);
+	return(found);
+}
+
+/*
+ * Attempt to find the device from which we were booted.
+ * If we can do so, and not instructed not to do so,
+ * change rootdev to correspond to the load device.
+ */
+void
+findroot(void)
+{
+	struct btinfo_bootdisk *bid;
+	struct device *dv;
+#ifdef COMPAT_OLDBOOT
+	int i, majdev, unit, part;
+	char buf[32];
+#endif
+
+	if (booted_device)
+		return;
+
+	if (lookup_bootinfo(BTINFO_NETIF)) {
+		/*
+		 * We got netboot interface information, but
+		 * "device_register()" couldn't match it to a configured
+		 * device. Bootdisk information cannot be present at the
+		 * same time, so give up.
+		 */
+		printf("findroot: netboot interface not found\n");
+		return;
+	}
+
+	bid = lookup_bootinfo(BTINFO_BOOTDISK);
+	if (bid) {
+		/*
+		 * Scan all disk devices for ones that match the passed data.
+		 * Don't break if one is found, to get possible multiple
+		 * matches - for problem tracking. Use the first match anyway
+		 * because lower device numbers are more likely to be the
+		 * boot device.
+		 */
+		for (dv = alldevs.tqh_first; dv != NULL;
+		    dv = dv->dv_list.tqe_next) {
+			if (dv->dv_class != DV_DISK)
+				continue;
+
+			if (!strcmp(dv->dv_cfdata->cf_name, "fd")) {
+				/*
+				 * Assume the configured unit number matches
+				 * the BIOS device number.  (This is the old
+				 * behaviour.)  Needs some ideas how to handle
+				 * BIOS's "swap floppy drive" options.
+				 */
+				if ((bid->biosdev & 0x80) ||
+				    dv->dv_unit != bid->biosdev)
+					continue;
+
+				goto found;
+			}
+
+			if (is_valid_disk(dv)) {
+				/*
+				 * Don't trust BIOS device numbers, try
+				 * to match the information passed by the
+				 * bootloader instead.
+				 */
+				if ((bid->biosdev & 0x80) == 0 ||
+				    !match_harddisk(dv, bid))
+					continue;
+
+				goto found;
+			}
+
+			/* no "fd", "wd", "sd", "ld", "ed" */
+			continue;
+
+found:
+			if (booted_device) {
+				printf("warning: double match for boot "
+				    "device (%s, %s)\n",
+				    booted_device->dv_xname, dv->dv_xname);
+				continue;
+			}
+			booted_device = dv;
+			booted_partition = bid->partition;
+		}
+
+		if (booted_device)
+			return;
+	}
+
+#ifdef COMPAT_OLDBOOT
+#if 0
+	printf("howto %x bootdev %x ", boothowto, bootdev);
+#endif
+
+	if ((bootdev & B_MAGICMASK) != (u_long)B_DEVMAGIC)
+		return;
+
+	majdev = (bootdev >> B_TYPESHIFT) & B_TYPEMASK;
+	name = devsw_blk2name(majdev);
+	if (name == NULL)
+		return;
+
+	part = (bootdev >> B_PARTITIONSHIFT) & B_PARTITIONMASK;
+	unit = (bootdev >> B_UNITSHIFT) & B_UNITMASK;
+
+	sprintf(buf, "%s%d", name, unit);
+	for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
+		if (strcmp(buf, dv->dv_xname) == 0) {
+			booted_device = dv;
+			booted_partition = part;
+			return;
+		}
+	}
+#endif
+}
+
+#include "pci.h"
+
+#include <dev/isa/isavar.h>
+#if NPCI > 0
+#include <dev/pci/pcivar.h>
+#endif
+
+void
+device_register(struct device *dev, void *aux)
+{
+	/*
+	 * Handle network interfaces here, the attachment information is
+	 * not available driver independantly later.
+	 * For disks, there is nothing useful available at attach time.
+	 */
+#ifdef XEN
+	if (dev->dv_class == DV_IFNET) {
+		char bootdev[16]; /* sizeof(dv_xname) */
+
+		xen_parse_cmdline(bootdev, NULL);
+		if (strncmp(bootdev, dev->dv_xname, 16) == 0) {
+#ifdef NFS_BOOT_BOOTSTATIC
+			nfs_bootstatic_callback = xennet_bootstatic_callback;
+#endif
+			goto found;
+		}
+	}
+#endif
+	if (dev->dv_class == DV_IFNET) {
+		struct btinfo_netif *bin = lookup_bootinfo(BTINFO_NETIF);
+		if (bin == NULL)
+			return;
+
+		/*
+		 * We don't check the driver name against the device name
+		 * passed by the boot ROM. The ROM should stay usable
+		 * if the driver gets obsoleted.
+		 * The physical attachment information (checked below)
+		 * must be sufficient to identify the device.
+		 */
+
+		if (bin->bus == BI_BUS_ISA &&
+		    !strcmp(dev->dv_parent->dv_cfdata->cf_name, "isa")) {
+			struct isa_attach_args *iaa = aux;
+
+			/* compare IO base address */
+			/* XXXJRT what about multiple I/O addrs? */
+			if (iaa->ia_nio > 0 &&
+			    bin->addr.iobase == iaa->ia_io[0].ir_addr)
+				goto found;
+		}
+#if NPCI > 0
+		if (bin->bus == BI_BUS_PCI &&
+		    !strcmp(dev->dv_parent->dv_cfdata->cf_name, "pci")) {
+			struct pci_attach_args *paa = aux;
+			int b, d, f;
+
+			/*
+			 * Calculate BIOS representation of:
+			 *
+			 *	<bus,device,function>
+			 *
+			 * and compare.
+			 */
+			pci_decompose_tag(paa->pa_pc, paa->pa_tag, &b, &d, &f);
+			if (bin->addr.tag == ((b << 8) | (d << 3) | f))
+				goto found;
+		}
+#endif
+	}
+	return;
+
+found:
+	if (booted_device) {
+		/* XXX should be a "panic()" */
+		printf("warning: double match for boot device (%s, %s)\n",
+		    booted_device->dv_xname, dev->dv_xname);
+		return;
+	}
+	booted_device = dev;
+}
+
+static int
+is_valid_disk(struct device *dv)
+{
+	const char *name;
+
+	if (dv->dv_class != DV_DISK)
+		return (0);
+
+	name = dv->dv_cfdata->cf_name;
+
+	return (strcmp(name, "sd") == 0 || strcmp(name, "wd") == 0 ||
+	    strcmp(name, "ld") == 0 || strcmp(name, "ed") == 0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/cpu.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,808 @@
+/*	$NetBSD: cpu.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp  */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the NetBSD
+ *      Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include "opt_ddb.h"
+#include "opt_multiprocessor.h"
+#include "opt_mpbios.h"		/* for MPDEBUG */
+#include "opt_mtrr.h"
+#include "opt_xen.h"
+
+#include "lapic.h"
+#include "ioapic.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/mpbiosvar.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/gdt.h>
+#include <machine/mtrr.h>
+#include <machine/tlog.h>
+#include <machine/pio.h>
+
+#if NLAPIC > 0
+#include <machine/apicvar.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+#endif
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#endif
+
+#include <dev/ic/mc146818reg.h>
+#include <i386/isa/nvram.h>
+#include <dev/isa/isareg.h>
+
+int     cpu_match(struct device *, struct cfdata *, void *);
+void    cpu_attach(struct device *, struct device *, void *);
+
+struct cpu_softc {
+	struct device sc_dev;		/* device tree glue */
+	struct cpu_info *sc_info;	/* pointer to CPU info */
+};
+
+int mp_cpu_start(struct cpu_info *); 
+void mp_cpu_start_cleanup(struct cpu_info *);
+struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
+				      mp_cpu_start_cleanup };
+
+
+CFATTACH_DECL(cpu, sizeof(struct cpu_softc),
+    cpu_match, cpu_attach, NULL, NULL);
+
+/*
+ * Statically-allocated CPU info for the primary CPU (or the only
+ * CPU, on uniprocessors).  The CPU info list is initialized to
+ * point at it.
+ */
+#ifdef TRAPLOG
+struct tlog tlog_primary;
+struct cpu_info cpu_info_primary = { 0, &cpu_info_primary, &tlog_primary };
+#else  /* TRAPLOG */
+struct cpu_info cpu_info_primary = { 0, &cpu_info_primary };
+#endif /* !TRAPLOG */
+
+struct cpu_info *cpu_info_list = &cpu_info_primary;
+
+static void	cpu_set_tss_gates(struct cpu_info *ci);
+#if !defined(XEN)
+static void	cpu_init_tss(struct i386tss *, void *, void *);
+#endif
+
+u_int32_t cpus_attached = 0;
+
+#ifdef MULTIPROCESSOR
+/*
+ * Array of CPU info structures.  Must be statically-allocated because
+ * curproc, etc. are used early.
+ */
+struct cpu_info *cpu_info[X86_MAXPROCS] = { &cpu_info_primary };
+
+u_int32_t cpus_running = 0;
+
+void    	cpu_hatch(void *);
+static void    	cpu_boot_secondary(struct cpu_info *ci);
+static void    	cpu_start_secondary(struct cpu_info *ci);
+static void	cpu_copy_trampoline(void);
+
+/*
+ * Runs once per boot once multiprocessor goo has been detected and
+ * the local APIC on the boot processor has been mapped.
+ *
+ * Called from lapic_boot_init() (from mpbios_scan()).
+ */
+void
+cpu_init_first()
+{
+	int cpunum = lapic_cpu_number();
+
+	if (cpunum != 0) {
+		cpu_info[0] = NULL;
+		cpu_info[cpunum] = &cpu_info_primary;
+	}
+
+	cpu_copy_trampoline();
+}
+#endif
+
+int
+cpu_match(parent, match, aux)
+	struct device *parent;
+	struct cfdata *match;
+	void *aux;
+{
+	struct cpu_attach_args *caa = aux;
+
+	if (strcmp(caa->caa_name, match->cf_name) == 0)
+		return 1;
+	return 0;
+}
+
+static void
+cpu_vm_init(struct cpu_info *ci)
+{
+	int ncolors = 2, i;
+
+	for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) {
+		struct x86_cache_info *cai;
+		int tcolors;
+
+		cai = &ci->ci_cinfo[i];
+
+		tcolors = atop(cai->cai_totalsize);
+		switch(cai->cai_associativity) {
+		case 0xff:
+			tcolors = 1; /* fully associative */
+			break;
+		case 0:
+		case 1:
+			break;
+		default:
+			tcolors /= cai->cai_associativity;
+		}
+		ncolors = max(ncolors, tcolors);
+	}
+
+	/*
+	 * Knowing the size of the largest cache on this CPU, re-color
+	 * our pages.
+	 */
+	if (ncolors <= uvmexp.ncolors)
+		return;
+	printf("%s: %d page colors\n", ci->ci_dev->dv_xname, ncolors);
+	uvm_page_recolor(ncolors);
+}
+
+
+void
+cpu_attach(parent, self, aux)
+	struct device *parent, *self;
+	void *aux;
+{
+	struct cpu_softc *sc = (void *) self;
+	struct cpu_attach_args *caa = aux;
+	struct cpu_info *ci;
+#if defined(MULTIPROCESSOR)
+	int cpunum = caa->cpu_number;
+	vaddr_t kstack;
+	struct pcb *pcb;
+#endif
+
+	/*
+	 * If we're an Application Processor, allocate a cpu_info
+	 * structure, otherwise use the primary's.
+	 */
+	if (caa->cpu_role == CPU_ROLE_AP) {
+		ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK);
+		memset(ci, 0, sizeof(*ci));
+#if defined(MULTIPROCESSOR)
+		if (cpu_info[cpunum] != NULL)
+			panic("cpu at apic id %d already attached?", cpunum);
+		cpu_info[cpunum] = ci;
+#endif
+#ifdef TRAPLOG
+		ci->ci_tlog_base = malloc(sizeof(struct tlog),
+		    M_DEVBUF, M_WAITOK);
+#endif
+	} else {
+		ci = &cpu_info_primary;
+#if defined(MULTIPROCESSOR)
+		if (cpunum != lapic_cpu_number()) {
+			panic("%s: running CPU is at apic %d"
+			    " instead of at expected %d",
+			    sc->sc_dev.dv_xname, lapic_cpu_number(), cpunum);
+		}
+#endif
+	}
+
+	ci->ci_self = ci;
+	sc->sc_info = ci;
+
+	ci->ci_dev = self;
+	ci->ci_apicid = caa->cpu_number;
+#ifdef MULTIPROCESSOR
+	ci->ci_cpuid = ci->ci_apicid;
+#else
+	ci->ci_cpuid = 0;	/* False for APs, but they're not used anyway */
+#endif
+	ci->ci_func = caa->cpu_func;
+
+	simple_lock_init(&ci->ci_slock);
+
+#if defined(MULTIPROCESSOR)
+	/*
+	 * Allocate UPAGES contiguous pages for the idle PCB and stack.
+	 */
+	kstack = uvm_km_alloc (kernel_map, USPACE);
+	if (kstack == 0) {
+		if (caa->cpu_role != CPU_ROLE_AP) {
+			panic("cpu_attach: unable to allocate idle stack for"
+			    " primary");
+		}
+		printf("%s: unable to allocate idle stack\n",
+		    sc->sc_dev.dv_xname);
+		return;
+	}
+	pcb = ci->ci_idle_pcb = (struct pcb *) kstack;
+	memset(pcb, 0, USPACE);
+
+	pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+	pcb->pcb_tss.tss_esp0 =
+	    kstack + USPACE - 16 - sizeof (struct trapframe);
+	pcb->pcb_tss.tss_esp =
+	    kstack + USPACE - 16 - sizeof (struct trapframe);
+	pcb->pcb_cr0 = rcr0();
+	pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa;
+#endif
+	pmap_reference(pmap_kernel());
+	ci->ci_pmap = pmap_kernel();
+	ci->ci_tlbstate = TLBSTATE_STALE;
+
+	/* further PCB init done later. */
+
+	printf(": ");
+
+	switch (caa->cpu_role) {
+	case CPU_ROLE_SP:
+		printf("(uniprocessor)\n");
+		ci->ci_flags |= CPUF_PRESENT | CPUF_SP | CPUF_PRIMARY;
+		cpu_intr_init(ci);
+		identifycpu(ci);
+		cpu_init(ci);
+		cpu_set_tss_gates(ci);
+		break;
+
+	case CPU_ROLE_BP:
+		printf("apid %d (boot processor)\n", caa->cpu_number);
+		ci->ci_flags |= CPUF_PRESENT | CPUF_BSP | CPUF_PRIMARY;
+		cpu_intr_init(ci);
+		identifycpu(ci);
+		cpu_init(ci);
+		cpu_set_tss_gates(ci);
+
+#if NLAPIC > 0
+		/*
+		 * Enable local apic
+		 */
+		lapic_enable();
+		lapic_calibrate_timer(ci);
+#endif
+#if NIOAPIC > 0
+		ioapic_bsp_id = caa->cpu_number;
+#endif
+		break;
+
+	case CPU_ROLE_AP:
+		/*
+		 * report on an AP
+		 */
+		printf("apid %d (application processor)\n", caa->cpu_number);
+
+#if defined(MULTIPROCESSOR)
+		cpu_intr_init(ci);
+		gdt_alloc_cpu(ci);
+		cpu_set_tss_gates(ci);
+		cpu_start_secondary(ci);
+		if (ci->ci_flags & CPUF_PRESENT) {
+			identifycpu(ci);
+			ci->ci_next = cpu_info_list->ci_next;
+			cpu_info_list->ci_next = ci;
+		}
+#else
+		printf("%s: not started\n", sc->sc_dev.dv_xname);
+#endif
+		break;
+
+	default:
+		panic("unknown processor type??\n");
+	}
+	cpu_vm_init(ci);
+
+	cpus_attached |= (1 << ci->ci_cpuid);
+
+#if defined(MULTIPROCESSOR)
+	if (mp_verbose) {
+		printf("%s: kstack at 0x%lx for %d bytes\n",
+		    sc->sc_dev.dv_xname, kstack, USPACE);
+		printf("%s: idle pcb at %p, idle sp at 0x%x\n",
+		    sc->sc_dev.dv_xname, pcb, pcb->pcb_esp);
+	}
+#endif
+}
+
+/*
+ * Initialize the processor appropriately.
+ */
+
+void
+cpu_init(ci)
+	struct cpu_info *ci;
+{
+	/* configure the CPU if needed */
+	if (ci->cpu_setup != NULL)
+		(*ci->cpu_setup)(ci);
+
+#if !defined(XEN)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	/*
+	 * On a 486 or above, enable ring 0 write protection.
+	 */
+	if (ci->ci_cpu_class >= CPUCLASS_486)
+		lcr0(rcr0() | CR0_WP);
+#endif
+#endif
+#if defined(I686_CPU)
+	/*
+	 * On a P6 or above, enable global TLB caching if the
+	 * hardware supports it.
+	 */
+	if (cpu_feature & CPUID_PGE)
+		lcr4(rcr4() | CR4_PGE);	/* enable global TLB caching */
+
+#ifdef MTRR
+	/*
+	 * On a P6 or above, initialize MTRR's if the hardware supports them.
+	 */
+	if (cpu_feature & CPUID_MTRR) {
+		if ((ci->ci_flags & CPUF_AP) == 0)
+			i686_mtrr_init_first();
+		mtrr_init_cpu(ci);
+	}
+#endif
+#endif
+#if defined(I686_CPU)
+	/*
+	 * If we have FXSAVE/FXRESTOR, use them.
+	 */
+	if (cpu_feature & CPUID_FXSR) {
+		lcr4(rcr4() | CR4_OSFXSR);
+
+		/*
+		 * If we have SSE/SSE2, enable XMM exceptions.
+		 */
+		if (cpu_feature & (CPUID_SSE|CPUID_SSE2))
+			lcr4(rcr4() | CR4_OSXMMEXCPT);
+	}
+#endif /* I686_CPU */
+#ifdef MTRR
+	if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) {
+		/*
+		 * Must be a K6-2 Step >= 7 or a K6-III.
+		 */
+		if (CPUID2FAMILY(ci->ci_signature) == 5) {
+			if (CPUID2MODEL(ci->ci_signature) > 8 ||
+			    (CPUID2MODEL(ci->ci_signature) == 8 &&
+			     CPUID2STEPPING(ci->ci_signature) >= 7)) {
+				mtrr_funcs = &k6_mtrr_funcs;
+				k6_mtrr_init_first();
+				mtrr_init_cpu(ci);
+			}
+		}
+	}
+#endif /* MTRR */
+
+#ifdef MULTIPROCESSOR
+	ci->ci_flags |= CPUF_RUNNING;
+	cpus_running |= 1 << ci->ci_cpuid;
+#endif
+}
+
+
+#ifdef MULTIPROCESSOR
+void
+cpu_boot_secondary_processors()
+{
+	struct cpu_info *ci;
+	u_long i;
+
+	for (i=0; i < X86_MAXPROCS; i++) {
+		ci = cpu_info[i];
+		if (ci == NULL)
+			continue;
+		if (ci->ci_idle_pcb == NULL)
+			continue;
+		if ((ci->ci_flags & CPUF_PRESENT) == 0)
+			continue;
+		if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
+			continue;
+		cpu_boot_secondary(ci);
+	}
+}
+
+void
+cpu_init_idle_pcbs()
+{
+	struct cpu_info *ci;
+	u_long i;
+
+	for (i=0; i < X86_MAXPROCS; i++) {
+		ci = cpu_info[i];
+		if (ci == NULL)
+			continue;
+		if (ci->ci_idle_pcb == NULL)
+			continue;
+		if ((ci->ci_flags & CPUF_PRESENT) == 0)
+			continue;
+		i386_init_pcb_tss_ldt(ci);
+	}
+}
+
+void
+cpu_start_secondary (ci)
+	struct cpu_info *ci;
+{
+	struct pcb *pcb;
+	int i;
+	struct pmap *kpm = pmap_kernel();
+	extern u_int32_t mp_pdirpa;
+
+	mp_pdirpa = kpm->pm_pdirpa; /* XXX move elsewhere, not per CPU. */
+
+	pcb = ci->ci_idle_pcb;
+
+	ci->ci_flags |= CPUF_AP;
+
+	printf("%s: starting\n", ci->ci_dev->dv_xname);
+
+	CPU_STARTUP(ci);
+
+	/*
+	 * wait for it to become ready
+	 */
+	for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i>0;i--) {
+		delay(10);
+	}
+	if (! (ci->ci_flags & CPUF_PRESENT)) {
+		printf("%s: failed to become ready\n", ci->ci_dev->dv_xname);
+#if defined(MPDEBUG) && defined(DDB)
+		printf("dropping into debugger; continue from here to resume boot\n");
+		Debugger();
+#endif
+	}
+
+	CPU_START_CLEANUP(ci);
+}
+
+void
+cpu_boot_secondary(ci)
+	struct cpu_info *ci;
+{
+	int i;
+
+	ci->ci_flags |= CPUF_GO; /* XXX atomic */
+
+	for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i>0;i--) {
+		delay(10);
+	}
+	if (! (ci->ci_flags & CPUF_RUNNING)) {
+		printf("CPU failed to start\n");
+#if defined(MPDEBUG) && defined(DDB)
+		printf("dropping into debugger; continue from here to resume boot\n");
+		Debugger();
+#endif
+	}
+}
+
+/*
+ * The CPU ends up here when its ready to run
+ * This is called from code in mptramp.s; at this point, we are running
+ * in the idle pcb/idle stack of the new CPU.  When this function returns,
+ * this processor will enter the idle loop and start looking for work.
+ *
+ * XXX should share some of this with init386 in machdep.c
+ */
+void
+cpu_hatch(void *v)
+{
+	struct cpu_info *ci = (struct cpu_info *)v;
+	int s;
+
+	cpu_probe_features(ci);
+	cpu_feature &= ci->ci_feature_flags;
+
+#ifdef DEBUG
+	if (ci->ci_flags & CPUF_PRESENT)
+		panic("%s: already running!?", ci->ci_dev->dv_xname);
+#endif
+
+	ci->ci_flags |= CPUF_PRESENT;
+
+	lapic_enable();
+	lapic_initclocks();
+
+	while ((ci->ci_flags & CPUF_GO) == 0)
+		delay(10);
+#ifdef DEBUG
+	if (ci->ci_flags & CPUF_RUNNING)
+		panic("%s: already running!?", ci->ci_dev->dv_xname);
+#endif
+
+	lcr0(ci->ci_idle_pcb->pcb_cr0);
+	cpu_init_idt();
+	lapic_set_lvt();
+	gdt_init_cpu(ci);
+	npxinit(ci);
+
+	lldt(GSEL(GLDT_SEL, SEL_KPL));
+
+	cpu_init(ci);
+
+	s = splhigh();
+	lapic_tpr = 0;
+	enable_intr();
+
+	printf("%s: CPU %ld running\n",ci->ci_dev->dv_xname, ci->ci_cpuid);
+#if defined(I586_CPU) || defined(I686_CPU)
+	if (ci->ci_feature_flags & CPUID_TSC)
+		cc_microset(ci);
+#endif
+	microtime(&ci->ci_schedstate.spc_runtime);
+	splx(s);
+}
+
+#if defined(DDB)
+
+#include <ddb/db_output.h>
+#include <machine/db_machdep.h>
+
+/*
+ * Dump CPU information from ddb.
+ */
+void
+cpu_debug_dump(void)
+{
+	struct cpu_info *ci;
+	CPU_INFO_ITERATOR cii;
+
+	db_printf("addr		dev	id	flags	ipis	curproc		fpcurproc\n");
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		db_printf("%p	%s	%ld	%x	%x	%10p	%10p\n",
+		    ci,
+		    ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname,
+		    ci->ci_cpuid,
+		    ci->ci_flags, ci->ci_ipis,
+		    ci->ci_curlwp,
+		    ci->ci_fpcurlwp);
+	}
+}
+#endif
+
+static void
+cpu_copy_trampoline()
+{
+	/*
+	 * Copy boot code.
+	 */
+	extern u_char cpu_spinup_trampoline[];
+	extern u_char cpu_spinup_trampoline_end[];
+	pmap_kenter_pa((vaddr_t)MP_TRAMPOLINE,	/* virtual */
+	    (paddr_t)MP_TRAMPOLINE,	/* physical */
+	    VM_PROT_ALL);		/* protection */
+	memcpy((caddr_t)MP_TRAMPOLINE,
+	    cpu_spinup_trampoline,
+	    cpu_spinup_trampoline_end-cpu_spinup_trampoline);
+}
+
+#endif
+
+
+#ifndef XEN
+static void
+cpu_init_tss(struct i386tss *tss, void *stack, void *func)
+{
+	memset(tss, 0, sizeof *tss);
+	tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
+	tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+	tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+	tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
+	tss->tss_gs = tss->__tss_es = tss->__tss_ds =
+	    tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
+	tss->tss_cr3 = pmap_kernel()->pm_pdirpa;
+	tss->tss_esp = (int)((char *)stack + USPACE - 16);
+	tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+	tss->__tss_eflags = PSL_MBO | PSL_NT;	/* XXX not needed? */
+	tss->__tss_eip = (int)func;
+}
+#endif
+
+/* XXX */
+#define IDTVEC(name)	__CONCAT(X, name)
+typedef void (vector)(void);
+extern vector IDTVEC(tss_trap08);
+#ifdef DDB
+extern vector Xintrddbipi;
+extern int ddb_vec;
+#endif
+
+static void
+cpu_set_tss_gates(struct cpu_info *ci)
+{
+#ifndef XEN
+	struct segment_descriptor sd;
+
+	ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE);
+	cpu_init_tss(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack,
+	    IDTVEC(tss_trap08));
+	setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1,
+	    SDT_SYS386TSS, SEL_KPL, 0, 0);
+	ci->ci_gdt[GTRAPTSS_SEL].sd = sd;
+	setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
+	    GSEL(GTRAPTSS_SEL, SEL_KPL));
+#endif
+
+#if defined(DDB) && defined(MULTIPROCESSOR)
+	/*
+	 * Set up seperate handler for the DDB IPI, so that it doesn't
+	 * stomp on a possibly corrupted stack.
+	 *
+	 * XXX overwriting the gate set in db_machine_init.
+	 * Should rearrange the code so that it's set only once.
+	 */
+	ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE);
+	cpu_init_tss(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack,
+	    Xintrddbipi);
+
+	setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1,
+	    SDT_SYS386TSS, SEL_KPL, 0, 0);
+	ci->ci_gdt[GIPITSS_SEL].sd = sd;
+
+	setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
+	    GSEL(GIPITSS_SEL, SEL_KPL));
+#endif
+}
+
+
+int
+mp_cpu_start(struct cpu_info *ci)
+{
+#if NLAPIC > 0
+	int error;
+#endif
+	unsigned short dwordptr[2];
+
+	/*
+	 * "The BSP must initialize CMOS shutdown code to 0Ah ..."
+	 */
+
+	outb(IO_RTC, NVRAM_RESET);
+	outb(IO_RTC+1, NVRAM_RESET_JUMP);
+
+	/*
+	 * "and the warm reset vector (DWORD based at 40:67) to point
+	 * to the AP startup code ..."
+	 */
+
+	dwordptr[0] = 0;
+	dwordptr[1] = MP_TRAMPOLINE >> 4;
+
+	pmap_kenter_pa (0, 0, VM_PROT_READ|VM_PROT_WRITE);
+	memcpy ((u_int8_t *) 0x467, dwordptr, 4);
+	pmap_kremove (0, PAGE_SIZE);
+
+#if NLAPIC > 0
+	/*
+	 * ... prior to executing the following sequence:"
+	 */
+
+	if (ci->ci_flags & CPUF_AP) {
+		if ((error = x86_ipi_init(ci->ci_apicid)) != 0)
+			return error;
+
+		delay(10000);
+
+		if (cpu_feature & CPUID_APIC) {
+
+			if ((error = x86_ipi(MP_TRAMPOLINE/PAGE_SIZE,
+					     ci->ci_apicid,
+					     LAPIC_DLMODE_STARTUP)) != 0)
+				return error;
+			delay(200);
+
+			if ((error = x86_ipi(MP_TRAMPOLINE/PAGE_SIZE,
+					     ci->ci_apicid,
+					     LAPIC_DLMODE_STARTUP)) != 0)
+				return error;
+			delay(200);
+		}
+	}
+#endif
+	return 0;
+}
+
+void
+mp_cpu_start_cleanup(struct cpu_info *ci)
+{
+	/*
+	 * Ensure the NVRAM reset byte contains something vaguely sane.
+	 */
+
+	outb(IO_RTC, NVRAM_RESET);
+	outb(IO_RTC+1, NVRAM_RESET_RST);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/gdt.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,406 @@
+/*	$NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: gdt.c,v 1.32 2004/02/13 11:36:13 wiz Exp 	*/
+
+/*-
+ * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by John T. Kohl and Charles M. Hannum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include "opt_multiprocessor.h"
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/user.h>
+
+#include <uvm/uvm.h>
+
+#include <machine/gdt.h>
+
+int gdt_size[2];	/* total number of GDT entries */
+int gdt_count[2];	/* number of GDT entries in use */
+int gdt_next[2];	/* next available slot for sweeping */
+int gdt_free[2];	/* next free slot; terminated with GNULL_SEL */
+
+struct lock gdt_lock_store;
+
+static __inline void gdt_lock(void);
+static __inline void gdt_unlock(void);
+void gdt_init(void);
+void gdt_grow(int);
+int gdt_get_slot(void);
+int gdt_get_slot1(int);
+void gdt_put_slot(int);
+void gdt_put_slot1(int, int);
+
+/*
+ * Lock and unlock the GDT, to avoid races in case gdt_{ge,pu}t_slot() sleep
+ * waiting for memory.
+ *
+ * Note that the locking done here is not sufficient for multiprocessor
+ * systems.  A freshly allocated slot will still be of type SDT_SYSNULL for
+ * some time after the GDT is unlocked, so gdt_compact() could attempt to
+ * reclaim it.
+ */
+static __inline void
+gdt_lock()
+{
+
+	(void) lockmgr(&gdt_lock_store, LK_EXCLUSIVE, NULL);
+}
+
+static __inline void
+gdt_unlock()
+{
+
+	(void) lockmgr(&gdt_lock_store, LK_RELEASE, NULL);
+}
+
+void
+setgdt(int sel, void *base, size_t limit,
+    int type, int dpl, int def32, int gran)
+{
+	struct segment_descriptor sd;
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+
+	if (type == SDT_SYS386TSS) {
+		/* printk("XXX TSS descriptor not supported in GDT\n"); */
+		return;
+	}
+
+	setsegment(&sd, base, limit, type, dpl, def32, gran);
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		if (ci->ci_gdt != NULL) {
+#ifndef XEN
+			ci->ci_gdt[sel].sd = sd;
+#else
+			xen_update_descriptor(&ci->ci_gdt[sel],
+			    (union descriptor *)&sd);
+#endif
+		}
+	}
+}
+
+/*
+ * Initialize the GDT subsystem.  Called from autoconf().
+ */
+void
+gdt_init()
+{
+	size_t max_len, min_len;
+	union descriptor *old_gdt;
+	struct vm_page *pg;
+	vaddr_t va;
+	struct cpu_info *ci = &cpu_info_primary;
+
+	lockinit(&gdt_lock_store, PZERO, "gdtlck", 0, 0);
+
+	max_len = MAXGDTSIZ * sizeof(gdt[0]);
+	min_len = MINGDTSIZ * sizeof(gdt[0]);
+
+	gdt_size[0] = MINGDTSIZ;
+	gdt_count[0] = NGDT;
+	gdt_next[0] = NGDT;
+	gdt_free[0] = GNULL_SEL;
+
+	gdt_size[1] = 0;
+	gdt_count[1] = MAXGDTSIZ;
+	gdt_next[1] = MAXGDTSIZ;
+	gdt_free[1] = GNULL_SEL;
+
+	old_gdt = gdt;
+	gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len + max_len);
+	for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + min_len; va += PAGE_SIZE) {
+		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+		if (pg == NULL) {
+			panic("gdt_init: no pages");
+		}
+		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+		    VM_PROT_READ | VM_PROT_WRITE);
+	}
+	memcpy(gdt, old_gdt, NGDT * sizeof(gdt[0]));
+	ci->ci_gdt = gdt;
+	setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
+	    SDT_MEMRWA, SEL_KPL, 1, 1);
+
+	gdt_init_cpu(ci);
+}
+
+/*
+ * Allocate shadow GDT for a slave CPU.
+ */
+void
+gdt_alloc_cpu(struct cpu_info *ci)
+{
+	int max_len = MAXGDTSIZ * sizeof(gdt[0]);
+	int min_len = MINGDTSIZ * sizeof(gdt[0]);
+	struct vm_page *pg;
+	vaddr_t va;
+
+	ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len);
+	for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len;
+	    va += PAGE_SIZE) {
+		while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO))
+		    == NULL) {
+			uvm_wait("gdt_alloc_cpu");
+		}
+		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+		    VM_PROT_READ | VM_PROT_WRITE);
+	}
+	memset(ci->ci_gdt, 0, min_len);
+	memcpy(ci->ci_gdt, gdt, gdt_count[0] * sizeof(gdt[0]));
+	setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
+	    SDT_MEMRWA, SEL_KPL, 1, 1);
+}
+
+
+/*
+ * Load appropriate gdt descriptor; we better be running on *ci
+ * (for the most part, this is how a CPU knows who it is).
+ */
+void
+gdt_init_cpu(struct cpu_info *ci)
+{
+#ifndef XEN
+	struct region_descriptor region;
+	size_t max_len;
+
+	max_len = MAXGDTSIZ * sizeof(gdt[0]);
+	setregion(&region, ci->ci_gdt, max_len - 1);
+	lgdt(&region);
+#else
+	size_t len = gdt_size[0] * sizeof(gdt[0]);
+	unsigned long frames[len >> PAGE_SHIFT];
+	vaddr_t va;
+	pt_entry_t *ptp;
+	int f;
+
+	for (va = (vaddr_t)ci->ci_gdt, f = 0;
+	     va < (vaddr_t)ci->ci_gdt + len;
+	     va += PAGE_SIZE, f++) {
+		KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
+		ptp = kvtopte(va);
+		frames[f] = *ptp >> PAGE_SHIFT;
+		PTE_CLEARBITS(ptp, PG_RW);
+	}
+	PTE_UPDATES_FLUSH();
+	/* printk("loading gdt %x, %d entries, %d pages", */
+	    /* frames[0] << PAGE_SHIFT, gdt_size[0], len >> PAGE_SHIFT); */
+	if (HYPERVISOR_set_gdt(frames, gdt_size[0]))
+		panic("HYPERVISOR_set_gdt failed!\n");
+	lgdt_finish();
+#endif
+}
+
+#ifdef MULTIPROCESSOR
+
+void
+gdt_reload_cpu(struct cpu_info *ci)
+{
+	struct region_descriptor region;
+	size_t max_len;
+
+	max_len = MAXGDTSIZ * sizeof(gdt[0]);
+	setregion(&region, ci->ci_gdt, max_len - 1);
+	lgdt(&region);
+}
+#endif
+
+
+/*
+ * Grow the GDT.
+ */
+void
+gdt_grow(int which)
+{
+	size_t old_len, new_len, max_len;
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	struct vm_page *pg;
+	vaddr_t va;
+
+	old_len = gdt_size[which] * sizeof(gdt[0]);
+	gdt_size[which] <<= 1;
+	new_len = old_len << 1;
+
+	if (which != 0) {
+		max_len = MAXGDTSIZ * sizeof(gdt[0]);
+		if (old_len == 0) {
+			gdt_size[which] = MINGDTSIZ;
+			new_len = gdt_size[which] * sizeof(gdt[0]);
+		}
+		for (va = (vaddr_t)(cpu_info_primary.ci_gdt) + old_len + max_len;
+		     va < (vaddr_t)(cpu_info_primary.ci_gdt) + new_len + max_len;
+		     va += PAGE_SIZE) {
+			while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
+			    NULL) {
+				uvm_wait("gdt_grow");
+			}
+			pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+			    VM_PROT_READ | VM_PROT_WRITE);
+		}
+		return;
+	}
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		for (va = (vaddr_t)(ci->ci_gdt) + old_len;
+		     va < (vaddr_t)(ci->ci_gdt) + new_len;
+		     va += PAGE_SIZE) {
+			while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
+			    NULL) {
+				uvm_wait("gdt_grow");
+			}
+			pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+			    VM_PROT_READ | VM_PROT_WRITE);
+		}
+	}
+}
+
+/*
+ * Allocate a GDT slot as follows:
+ * 1) If there are entries on the free list, use those.
+ * 2) If there are fewer than gdt_size entries in use, there are free slots
+ *    near the end that we can sweep through.
+ * 3) As a last resort, we increase the size of the GDT, and sweep through
+ *    the new slots.
+ */
+int
+gdt_get_slot()
+{
+	return gdt_get_slot1(0);
+}
+
+int
+gdt_get_slot1(int which)
+{
+	size_t offset;
+	int slot;
+
+	gdt_lock();
+
+	if (gdt_free[which] != GNULL_SEL) {
+		slot = gdt_free[which];
+		gdt_free[which] = gdt[slot].gd.gd_selector;
+	} else {
+		offset = which * MAXGDTSIZ * sizeof(gdt[0]);
+		if (gdt_next[which] != gdt_count[which] + offset)
+			panic("gdt_get_slot botch 1");
+		if (gdt_next[which] - offset >= gdt_size[which]) {
+			if (gdt_size[which] >= MAXGDTSIZ)
+				panic("gdt_get_slot botch 2");
+			gdt_grow(which);
+		}
+		slot = gdt_next[which]++;
+	}
+
+	gdt_count[which]++;
+	gdt_unlock();
+	return (slot);
+}
+
+/*
+ * Deallocate a GDT slot, putting it on the free list.
+ */
+void
+gdt_put_slot(int slot)
+{
+	gdt_put_slot1(slot, 0);
+}
+
+void
+gdt_put_slot1(int slot, int which)
+{
+
+	gdt_lock();
+	gdt_count[which]--;
+
+	gdt[slot].gd.gd_type = SDT_SYSNULL;
+	gdt[slot].gd.gd_selector = gdt_free[which];
+	gdt_free[which] = slot;
+
+	gdt_unlock();
+}
+
+int
+tss_alloc(struct pcb *pcb)
+{
+	int slot;
+
+	slot = gdt_get_slot();
+	setgdt(slot, &pcb->pcb_tss, sizeof(struct pcb) - 1,
+	    SDT_SYS386TSS, SEL_KPL, 0, 0);
+	return GSEL(slot, SEL_KPL);
+}
+
+void
+tss_free(int sel)
+{
+
+	gdt_put_slot(IDXSEL(sel));
+}
+
+/*
+ * Caller must have pmap locked for both of these functions.
+ */
+void
+ldt_alloc(struct pmap *pmap, union descriptor *ldt, size_t len)
+{
+	int slot;
+
+	slot = gdt_get_slot1(1);
+#ifndef XEN
+	setgdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
+#else
+	cpu_info_primary.ci_gdt[slot].ld.ld_base = (uint32_t)ldt;
+	cpu_info_primary.ci_gdt[slot].ld.ld_entries =
+		len / sizeof(union descriptor);
+#endif
+	pmap->pm_ldt_sel = GSEL(slot, SEL_KPL);
+}
+
+void
+ldt_free(struct pmap *pmap)
+{
+	int slot;
+
+	slot = IDXSEL(pmap->pm_ldt_sel);
+
+	gdt_put_slot1(slot, 1);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/genassym.cf	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,323 @@
+#	$NetBSD: genassym.cf,v 1.1 2004/03/11 21:44:08 cl Exp $
+#	NetBSD: genassym.cf,v 1.40 2004/02/20 17:35:01 yamt Exp 
+
+#
+# Copyright (c) 1998 The NetBSD Foundation, Inc.
+# All rights reserved.
+#
+# This code is derived from software contributed to The NetBSD Foundation
+# by Charles M. Hannum.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#        This product includes software developed by the NetBSD
+#        Foundation, Inc. and its contributors.
+# 4. Neither the name of The NetBSD Foundation nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# Copyright (c) 1982, 1990 The Regents of the University of California.
+# All rights reserved.
+#
+# This code is derived from software contributed to Berkeley by
+# William Jolitz.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)genassym.c	5.11 (Berkeley) 5/10/91
+#
+
+if defined(_KERNEL_OPT)
+include "opt_compat_svr4.h"
+include "opt_compat_freebsd.h"
+include "opt_compat_linux.h"
+include "opt_multiprocessor.h"
+include "opt_xen.h"
+endif
+
+include <sys/param.h>
+include <sys/proc.h>
+include <sys/resourcevar.h>
+include <sys/device.h>
+include <sys/user.h>
+include <sys/mbuf.h>
+include <netinet/in.h>
+include <netinet/in_systm.h>
+include <netinet/ip.h>
+include <netinet/ip6.h>
+include <netinet/ip_var.h>
+
+include <uvm/uvm.h>
+
+include <machine/trap.h>
+include <machine/pmap.h>
+include <machine/vmparam.h>
+include <machine/intr.h>
+include <machine/types.h>
+
+if defined(_KERNEL) && !defined(_LKM) && !defined(XENO)
+include "ioapic.h"
+include "apm.h"
+endif
+if NAPM > 0
+include <machine/bioscall.h>
+include <machine/apmvar.h>
+endif
+
+ifdef COMPAT_SVR4
+include <compat/svr4/svr4_ucontext.h>
+endif
+
+ifdef COMPAT_LINUX
+include <compat/linux/common/linux_signal.h>
+include <compat/linux/common/linux_machdep.h>
+endif
+
+ifdef COMPAT_FREEBSD
+include <machine/freebsd_machdep.h>
+endif
+
+ifdef MULTIPROCESSOR
+include <machine/cpu.h>
+endif
+
+if NIOAPIC > 0
+include <machine/i82093reg.h>
+include <machine/i82093var.h>
+endif
+
+if defined(_KERNEL) && !defined(_LKM) && !defined(XENO)
+include "isa.h"
+endif
+if NISA > 0
+include <machine/isa_machdep.h>
+endif
+
+include <machine/tlog.h>
+
+define	PAGE_SIZE		PAGE_SIZE
+
+define	LSRUN			LSRUN
+define	LSONPROC		LSONPROC
+
+define	PDSLOT_PTE		PDSLOT_PTE
+define	PDSLOT_APTE		PDSLOT_APTE
+define	PDSLOT_KERN		PDSLOT_KERN
+define	NKPTP_MIN		NKPTP_MIN
+define	NKPTP_MAX		NKPTP_MAX
+
+define	VM_MAXUSER_ADDRESS	(int)VM_MAXUSER_ADDRESS
+
+define	UVM_PAGE_IDLE_ZERO	offsetof(struct uvm, page_idle_zero)
+
+define	L_ADDR			offsetof(struct lwp, l_addr)
+define	L_BACK			offsetof(struct lwp, l_back)
+define	L_FORW			offsetof(struct lwp, l_forw)
+define	L_PRIORITY		offsetof(struct lwp, l_priority)
+define	L_STAT			offsetof(struct lwp, l_stat)
+define	L_WCHAN			offsetof(struct lwp, l_wchan)
+define	L_PROC			offsetof(struct lwp, l_proc)
+define	L_MD_TSS_SEL		offsetof(struct lwp, l_md.md_tss_sel)
+define	L_MD_REGS		offsetof(struct lwp, l_md.md_regs)
+define	L_CPU			offsetof(struct lwp, l_cpu)
+define	P_FLAG			offsetof(struct proc, p_flag)
+define	P_RASLIST		offsetof(struct proc, p_raslist)
+define	P_MD_SYSCALL		offsetof(struct proc, p_md.md_syscall)
+define	P_MD_ASTPENDING		offsetof(struct proc, p_md.md_astpending)
+
+define	P_SYSTEM		P_SYSTEM
+
+define	M_DATA			offsetof(struct mbuf, m_data)
+define	M_LEN			offsetof(struct mbuf, m_len)
+define	M_NEXT			offsetof(struct mbuf, m_next)
+
+define	IP_SRC			offsetof(struct ip, ip_src)
+define	IP_DST			offsetof(struct ip, ip_dst)
+
+define	IP6_SRC			offsetof(struct ip6_hdr, ip6_src)
+define	IP6_DST			offsetof(struct ip6_hdr, ip6_dst)
+
+define	V_TRAP			offsetof(struct uvmexp, traps)
+define	V_INTR			offsetof(struct uvmexp, intrs)
+
+define	PCB_CR3			offsetof(struct pcb, pcb_cr3)
+define	PCB_EBP			offsetof(struct pcb, pcb_ebp)
+define	PCB_ESP			offsetof(struct pcb, pcb_esp)
+define	PCB_CR0			offsetof(struct pcb, pcb_cr0)
+define	PCB_LDT_SEL		offsetof(struct pcb, pcb_ldt_sel)
+define	PCB_ONFAULT		offsetof(struct pcb, pcb_onfault)
+define	PCB_FPCPU		offsetof(struct pcb, pcb_fpcpu)
+define	PCB_TSS_SS0		offsetof(struct pcb, pcb_tss.tss_ss0)
+define	PCB_TSS_ESP0		offsetof(struct pcb, pcb_tss.tss_esp0)
+
+define	TF_CS			offsetof(struct trapframe, tf_cs)
+define	TF_EIP			offsetof(struct trapframe, tf_eip)
+define	TF_ERR			offsetof(struct trapframe, tf_err)
+define	TF_TRAPNO		offsetof(struct trapframe, tf_trapno)
+define	TF_EFLAGS		offsetof(struct trapframe, tf_eflags)
+
+define	TF_GS			offsetof(struct trapframe, tf_gs)
+define	TF_FS			offsetof(struct trapframe, tf_fs)
+define	TF_ES			offsetof(struct trapframe, tf_es)
+define	TF_DS			offsetof(struct trapframe, tf_ds)
+define	TF_EDI			offsetof(struct trapframe, tf_edi)
+define	TF_ESI			offsetof(struct trapframe, tf_esi)
+define	TF_EBP			offsetof(struct trapframe, tf_ebp)
+define	TF_EBX			offsetof(struct trapframe, tf_ebx)
+define	TF_EDX			offsetof(struct trapframe, tf_edx)
+define	TF_ECX			offsetof(struct trapframe, tf_ecx)
+define	TF_EAX			offsetof(struct trapframe, tf_eax)
+
+define	TF_PUSHSIZE		offsetof(struct trapframe, tf_trapno)
+
+define	FRAMESIZE		sizeof(struct trapframe)
+
+ifdef COMPAT_SVR4
+define	SVR4_SIGF_HANDLER	offsetof(struct svr4_sigframe, sf_handler)
+define	SVR4_SIGF_UC		offsetof(struct svr4_sigframe, sf_uc)
+endif
+
+ifdef COMPAT_LINUX
+define	LINUX_SIGF_HANDLER	offsetof(struct linux_sigframe, sf_handler)
+define	LINUX_SIGF_SC		offsetof(struct linux_sigframe, sf_sc)
+define	LINUX_RT_SIGF_HANDLER	offsetof(struct linux_rt_sigframe, sf_handler)
+define	LINUX_RT_SIGF_UC	offsetof(struct linux_rt_sigframe, sf_uc)
+endif
+
+ifdef COMPAT_FREEBSD
+define	FREEBSD_SIGF_HANDLER	offsetof(struct freebsd_sigframe, sf_handler)
+define	FREEBSD_SIGF_SC		offsetof(struct freebsd_sigframe, sf_sc)
+endif
+
+define	IH_FUN			offsetof(struct intrhand, ih_fun)
+define	IH_ARG			offsetof(struct intrhand, ih_arg)
+define	IH_LEVEL		offsetof(struct intrhand, ih_level)
+define	IH_NEXT			offsetof(struct intrhand, ih_next)
+
+if NAPM > 0
+define	APM_CODE32		offsetof(struct apm_connect_info, apm_code32_seg_base)
+define	APM_CODE16		offsetof(struct apm_connect_info, apm_code16_seg_base)
+define	APM_DATA		offsetof(struct apm_connect_info, apm_data_seg_base)
+define	APM_CODE32_LEN		offsetof(struct apm_connect_info, apm_code32_seg_len)
+define	APM_DATA_LEN		offsetof(struct apm_connect_info, apm_data_seg_len)
+define	APM_ENTRY		offsetof(struct apm_connect_info, apm_entrypt)
+define	APM_DETAIL		offsetof(struct apm_connect_info, apm_detail)
+define	APM_SIZE		sizeof(struct apm_connect_info)
+define	BIOSCALLREG_EAX		offsetof(struct bioscallregs, EAX)
+define	BIOSCALLREG_EBX		offsetof(struct bioscallregs, EBX)
+define	BIOSCALLREG_ECX		offsetof(struct bioscallregs, ECX)
+define	BIOSCALLREG_EDX		offsetof(struct bioscallregs, EDX)
+define	BIOSCALLREG_ESI		offsetof(struct bioscallregs, ESI)
+define	BIOSCALLREG_EDI		offsetof(struct bioscallregs, EDI)
+define	BIOSCALLREG_EFLAGS	offsetof(struct bioscallregs, EFLAGS)
+endif
+
+define	CPU_INFO_SELF		offsetof(struct cpu_info, ci_self)
+define	CPU_INFO_RESCHED	offsetof(struct cpu_info, ci_want_resched)
+define	CPU_INFO_WANT_PMAPLOAD	offsetof(struct cpu_info, ci_want_pmapload)
+define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
+define	TLBSTATE_VALID		TLBSTATE_VALID
+define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
+define	CPU_INFO_CURPCB		offsetof(struct cpu_info, ci_curpcb)
+define	CPU_INFO_IDLE_PCB	offsetof(struct cpu_info, ci_idle_pcb)
+define  CPU_INFO_IDLE_TSS_SEL	offsetof(struct cpu_info, ci_idle_tss_sel)
+define	CPU_INFO_ASTPENDING	offsetof(struct cpu_info, ci_astpending)
+
+define	CPU_INFO_LEVEL		offsetof(struct cpu_info, ci_cpuid_level)
+define	CPU_INFO_VENDOR		offsetof(struct cpu_info, ci_vendor[0])
+define	CPU_INFO_SIGNATURE	offsetof(struct cpu_info, ci_signature)
+define	CPU_INFO_FEATURES	offsetof(struct cpu_info, ci_feature_flags)
+define	CPU_INFO_BRAND		offsetof(struct cpu_info, ci_brand_id)
+
+define	CPU_TLOG_OFFSET		offsetof(struct cpu_info, ci_tlog_offset)
+define	CPU_TLOG_BASE		offsetof(struct cpu_info, ci_tlog_base)
+
+define	CPU_INFO_GDT		offsetof(struct cpu_info, ci_gdt)
+define	CPU_INFO_IPENDING	offsetof(struct cpu_info, ci_ipending)
+define	CPU_INFO_IMASK		offsetof(struct cpu_info, ci_imask)
+define	CPU_INFO_IUNMASK	offsetof(struct cpu_info, ci_iunmask)
+define	CPU_INFO_ILEVEL		offsetof(struct cpu_info, ci_ilevel)
+define	CPU_INFO_IDEPTH		offsetof(struct cpu_info, ci_idepth)
+define	CPU_INFO_ISOURCES	offsetof(struct cpu_info, ci_isources)
+
+if NIOAPIC > 0
+define		IOAPIC_SC_REG		offsetof(struct ioapic_softc, sc_reg)
+define		IOAPIC_SC_DATA		offsetof(struct ioapic_softc, sc_data)
+define		PIC_LOCK		offsetof(struct pic, pic_lock)
+endif
+
+define	SIZEOF_CPU_INFO		sizeof(struct cpu_info)
+
+define        SIZEOF_ISOURCE          sizeof(struct intrsource)
+define        SIZEOF_ISTUB            sizeof(struct intrstub)
+
+define		IS_RECURSE	offsetof(struct intrsource, is_recurse)
+define		IS_RESUME	offsetof(struct intrsource, is_resume)
+define		IS_EVCNTLO	offsetof(struct intrsource, is_evcnt.ev_count)
+define		IS_EVCNTHI	offsetof(struct intrsource, is_evcnt.ev_count)+4
+define		IS_HANDLERS	offsetof(struct intrsource, is_handlers)
+define		IS_PIC		offsetof(struct intrsource, is_pic)
+define		IS_FLAGS	offsetof(struct intrsource, is_flags)
+define		IS_PIN		offsetof(struct intrsource, is_pin)
+define		IS_TYPE		offsetof(struct intrsource, is_type)
+define		IS_MAXLEVEL	offsetof(struct intrsource, is_maxlevel)
+
+define	TREC_SP			offsetof(struct trec, tr_sp)
+define	TREC_HPC		offsetof(struct trec, tr_hpc)
+define	TREC_IPC		offsetof(struct trec, tr_ipc)
+define	TREC_TSC		offsetof(struct trec, tr_tsc)
+define	TREC_LBF		offsetof(struct trec, tr_lbf)
+define	TREC_LBT		offsetof(struct trec, tr_lbt)
+define	TREC_IBF		offsetof(struct trec, tr_ibf)
+define	TREC_IBT		offsetof(struct trec, tr_ibt)
+
+define	SIZEOF_TREC		sizeof(struct trec)
+define	SIZEOF_TLOG		sizeof(struct tlog)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/hypervisor.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,189 @@
+/*	$NetBSD: hypervisor.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/******************************************************************************
+ * hypervisor.c
+ * 
+ * Communication to/from hypervisor.
+ * 
+ * Copyright (c) 2002-2003, K A Fraser
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/xen.h>
+#include <machine/hypervisor.h>
+
+/* static */ unsigned long event_mask = 0;
+static unsigned long ev_err_count;
+
+int stipending(void);
+int
+stipending()
+{
+	unsigned long events;
+	struct cpu_info *ci;
+	int num, ret;
+
+	ret = 0;
+	ci = &cpu_info_primary;
+
+/* 	if (HYPERVISOR_shared_info->events) */
+/* 		printf("stipending events %08lx ilevel %d\n", */
+/* 		    HYPERVISOR_shared_info->events, ci->ci_ilevel); */
+
+	do {
+		/*
+		 * we're only called after STIC, so we know that we'll
+		 * have to STI at the end
+		 */
+		__cli();
+
+		events = xchg(&HYPERVISOR_shared_info->events, 0);
+
+		while (events) {
+			__asm__ __volatile__ (
+				"   bsfl %1,%0		;"
+				"   btrl %0,%1		;"
+				: "=r" (num) : "r" (events));
+			if (num) {
+				ci->ci_ipending |= (1 << num);
+				if (ret == 0 &&
+				    ci->ci_ilevel <
+				    ci->ci_isources[num]->is_handlers->ih_level)
+					ret = 1;
+			}
+		}
+
+		__sti();
+	} while (HYPERVISOR_shared_info->events);
+
+	return (ret);
+}
+
+void do_hypervisor_callback(struct pt_regs *regs)
+{
+	unsigned long events, flags;
+	shared_info_t *shared = HYPERVISOR_shared_info;
+	struct cpu_info *ci;
+	int level;
+	extern int once;
+
+	ci = &cpu_info_primary;
+	level = ci->ci_ilevel;
+	if (0 && once == 2)
+		printf("hypervisor\n");
+
+	do {
+		/* Specialised local_irq_save(). */
+		flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, 
+		    &shared->events_mask);
+		barrier();
+
+		events = xchg(&shared->events, 0);
+		events &= event_mask;
+
+		/* 'events' now contains some pending events to handle. */
+		__asm__ __volatile__ (
+			"   push %1                    ;"
+			"   sub  $4,%%esp              ;"
+			"   jmp  2f                    ;"
+			"1: btrl %%eax,%0              ;" /* clear bit     */
+			"   mov  %%eax,(%%esp)         ;"
+			"   call do_event              ;" /* do_event(event) */
+			"2: bsfl %0,%%eax              ;" /* %eax == bit # */
+			"   jnz  1b                    ;"
+			"   add  $8,%%esp              ;"
+			/* we use %ebx because it is callee-saved */
+			: : "b" (events), "r" (regs)
+			/* clobbered by callback function calls */
+			: "eax", "ecx", "edx", "memory" ); 
+
+		/* Specialised local_irq_restore(). */
+		if (flags)
+			set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask);
+		barrier();
+	}
+	while ( shared->events );
+
+	if (level != ci->ci_ilevel)
+		printf("hypervisor done %08lx level %d/%d ipending %08x\n",
+		    HYPERVISOR_shared_info->events_mask, level, ci->ci_ilevel,
+		    ci->ci_ipending);
+	if (0 && once == 2)
+		printf("hypervisor done\n");
+}
+
+void hypervisor_enable_event(unsigned int ev)
+{
+	set_bit(ev, &event_mask);
+	set_bit(ev, &HYPERVISOR_shared_info->events_mask);
+#if 0
+	if ( test_bit(EVENTS_MASTER_ENABLE_BIT, 
+		 &HYPERVISOR_shared_info->events_mask) )
+		do_hypervisor_callback(NULL);
+#endif
+}
+
+void hypervisor_disable_event(unsigned int ev)
+{
+	clear_bit(ev, &event_mask);
+	clear_bit(ev, &HYPERVISOR_shared_info->events_mask);
+}
+
+void hypervisor_acknowledge_event(unsigned int ev)
+{
+	if ( !(event_mask & (1<<ev)) )
+		atomic_inc((atomic_t *)(void *)&ev_err_count);
+	set_bit(ev, &HYPERVISOR_shared_info->events_mask);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/identcpu.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,1320 @@
+/*	$NetBSD: identcpu.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: identcpu.c,v 1.8 2003/11/20 13:30:29 fvdl Exp 	*/
+
+/*-
+ * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Frank van der Linden,  and by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the NetBSD
+ *      Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include "opt_cputype.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/specialreg.h>
+#include <machine/pio.h>
+#include <machine/cpu.h>
+#include <x86/cacheinfo.h>
+
+static const struct x86_cache_info
+intel_cpuid_cache_info[] = {
+	{ CAI_ITLB, 	0x01,	 4, 32, 4 * 1024 },
+	{ CAI_ITLB2, 	0x02, 0xff,  2, 4 * 1024 * 1024 },
+	{ CAI_DTLB, 	0x03,    4, 64, 4 * 1024 },
+	{ CAI_DTLB2,    0x04,    4,  8, 4 * 1024 * 1024 },
+	{ CAI_ITLB,     0x50, 0xff, 64, 4 * 1024, "4K/4M: 64 entries" },
+	{ CAI_ITLB,     0x51, 0xff, 64, 4 * 1024, "4K/4M: 128 entries" },
+	{ CAI_ITLB,     0x52, 0xff, 64, 4 * 1024, "4K/4M: 256 entries" },
+	{ CAI_DTLB,     0x5b, 0xff, 64, 4 * 1024, "4K/4M: 64 entries" },
+	{ CAI_DTLB,     0x5c, 0xff, 64, 4 * 1024, "4K/4M: 128 entries" },
+	{ CAI_DTLB,     0x5d, 0xff, 64, 4 * 1024, "4K/4M: 256 entries" },
+
+	{ CAI_ICACHE,   0x06,  4,        8 * 1024, 32 },
+	{ CAI_ICACHE,   0x08,  4,       16 * 1024, 32 },
+	{ CAI_DCACHE,   0x0a,  2,        8 * 1024, 32 },
+	{ CAI_DCACHE,   0x0c,  4,       16 * 1024, 32 },
+	{ CAI_L2CACHE,  0x40,  0,               0,  0, "not present" },
+	{ CAI_L2CACHE,  0x41,  4,      128 * 1024, 32 },
+	{ CAI_L2CACHE,  0x42,  4,      256 * 1024, 32 },
+	{ CAI_L2CACHE,  0x43,  4,      512 * 1024, 32 },
+	{ CAI_L2CACHE,  0x44,  4, 1 * 1024 * 1024, 32 },
+	{ CAI_L2CACHE,  0x45,  4, 2 * 1024 * 1024, 32 },
+	{ CAI_DCACHE,   0x66,  4,        8 * 1024, 64 },
+	{ CAI_DCACHE,   0x67,  4,       16 * 1024, 64 },
+	{ CAI_DCACHE,   0x68,  4,  	32 * 1024, 64 },
+	{ CAI_ICACHE,   0x70,  8,       12 * 1024, 64, "12K uOp cache"},
+	{ CAI_ICACHE,   0x71,  8,       16 * 1024, 64, "16K uOp cache"},
+	{ CAI_ICACHE,   0x72,  8,       32 * 1024, 64, "32K uOp cache"},
+	{ CAI_L2CACHE,  0x79,  8,      128 * 1024, 64 },
+	{ CAI_L2CACHE,  0x7a,  8,      256 * 1024, 64 },
+	{ CAI_L2CACHE,  0x7b,  8,      512 * 1024, 64 },
+	{ CAI_L2CACHE,  0x7c,  8, 1 * 1024 * 1024, 64 },
+	{ CAI_L2CACHE,  0x82,  8,      256 * 1024, 32 },
+	{ CAI_L2CACHE,  0x83,  8,      512 * 1024, 32 },
+	{ CAI_L2CACHE,  0x84,  8, 1 * 1024 * 1024, 32 },
+	{ CAI_L2CACHE,  0x85,  8, 2 * 1024 * 1024, 32 },
+	{ 0,               0,  0,	        0,  0 },
+};
+
+/*
+ * Map Brand ID from cpuid instruction to brand name.
+ * Source: Intel Processor Identification and the CPUID Instruction, AP-485
+ */
+static const char * const i386_intel_brand[] = {
+	"",		    /* Unsupported */
+	"Celeron",	    /* Intel (R) Celeron (TM) processor */
+	"Pentium III",      /* Intel (R) Pentium (R) III processor */
+	"Pentium III Xeon", /* Intel (R) Pentium (R) III Xeon (TM) processor */
+	"Pentium III",      /* Intel (R) Pentium (R) III processor */
+	"",		    /* Reserved */
+	"Mobile Pentium III", /* Mobile Intel (R) Pentium (R) III processor-M */
+	"Mobile Celeron",   /* Mobile Intel (R) Celeron (R) processor */    
+	"Pentium 4",	    /* Intel (R) Pentium (R) 4 processor */
+	"Pentium 4",	    /* Intel (R) Pentium (R) 4 processor */
+	"Celeron",	    /* Intel (R) Celeron (TM) processor */
+	"Xeon",		    /* Intel (R) Xeon (TM) processor */
+	"Xeon MP",	    /* Intel (R) Xeon (TM) processor MP */
+	"Mobile Pentium 4", /* Mobile Intel (R) Pentium (R) 4 processor-M */
+	"Mobile Celeron",   /* Mobile Intel (R) Celeron (R) processor */
+};
+
+/*
+ * AMD processors don't have Brand IDs, so we need these names for probe.
+ */
+static const char * const amd_brand[] = {
+	"",
+	"Duron",	/* AMD Duron(tm) */
+	"MP",		/* AMD Athlon(tm) MP */
+	"XP",		/* AMD Athlon(tm) XP */
+	"4"		/* AMD Athlon(tm) 4 */
+};
+
+u_int cpu_serial[3];
+static char amd_brand_name[48];
+
+void cyrix6x86_cpu_setup(struct cpu_info *);
+void winchip_cpu_setup(struct cpu_info *);
+void amd_family5_setup(struct cpu_info *);
+void transmeta_cpu_setup(struct cpu_info *);
+
+static void via_cpu_probe(struct cpu_info *);
+static void amd_family6_probe(struct cpu_info *);
+
+static const char *intel_family6_name(struct cpu_info *);
+
+static void transmeta_cpu_info(struct cpu_info *);
+
+static __inline u_char
+cyrix_read_reg(u_char reg)
+{
+	outb(0x22, reg);
+	return inb(0x23);
+}
+
+static __inline void
+cyrix_write_reg(u_char reg, u_char data)
+{
+	outb(0x22, reg);
+	outb(0x23, data);
+}
+
+/*
+ * Info for CTL_HW
+ */
+char	cpu_model[120];
+
+/*
+ * Note: these are just the ones that may not have a cpuid instruction.
+ * We deal with the rest in a different way.
+ */
+const struct cpu_nocpuid_nameclass i386_nocpuid_cpus[] = {
+	{ CPUVENDOR_INTEL, "Intel", "386SX",	CPUCLASS_386,
+		NULL, NULL},			/* CPU_386SX */
+	{ CPUVENDOR_INTEL, "Intel", "386DX",	CPUCLASS_386,
+		NULL, NULL},			/* CPU_386   */
+	{ CPUVENDOR_INTEL, "Intel", "486SX",	CPUCLASS_486,
+		NULL, NULL},			/* CPU_486SX */
+	{ CPUVENDOR_INTEL, "Intel", "486DX",	CPUCLASS_486,
+		NULL, NULL},			/* CPU_486   */
+	{ CPUVENDOR_CYRIX, "Cyrix", "486DLC",	CPUCLASS_486,
+		NULL, NULL},			/* CPU_486DLC */
+	{ CPUVENDOR_CYRIX, "Cyrix", "6x86",	CPUCLASS_486,
+		cyrix6x86_cpu_setup, NULL},	/* CPU_6x86 */
+	{ CPUVENDOR_NEXGEN,"NexGen","586",      CPUCLASS_386,
+		NULL, NULL},			/* CPU_NX586 */
+};
+
+const char *classnames[] = {
+	"386",
+	"486",
+	"586",
+	"686"
+};
+
+const char *modifiers[] = {
+	"",
+	"OverDrive",
+	"Dual",
+	""
+};
+
+const struct cpu_cpuid_nameclass i386_cpuid_cpus[] = {
+	{
+		"GenuineIntel",
+		CPUVENDOR_INTEL,
+		"Intel",
+		/* Family 4 */
+		{ {
+			CPUCLASS_486,
+			{
+				"486DX", "486DX", "486SX", "486DX2", "486SL",
+				"486SX2", 0, "486DX2 W/B Enhanced",
+				"486DX4", 0, 0, 0, 0, 0, 0, 0,
+				"486"		/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 5 */
+		{
+			CPUCLASS_586,
+			{
+				"Pentium (P5 A-step)", "Pentium (P5)",
+				"Pentium (P54C)", "Pentium (P24T)",
+				"Pentium/MMX", "Pentium", 0,
+				"Pentium (P54C)", "Pentium/MMX (Tillamook)",
+				0, 0, 0, 0, 0, 0, 0,
+				"Pentium"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 6 */
+		{
+			CPUCLASS_686,
+			{
+				"Pentium Pro (A-step)", "Pentium Pro", 0,
+				"Pentium II (Klamath)", "Pentium Pro",
+				"Pentium II/Celeron (Deschutes)",
+				"Celeron (Mendocino)",
+				"Pentium III (Katmai)",
+				"Pentium III (Coppermine)",
+				"Pentium M (Banias)", 
+				"Pentium III Xeon (Cascades)",
+				"Pentium III (Tualatin)", 0, 0, 0, 0,
+				"Pentium Pro, II or III"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family > 6 */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium 4"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	},
+	{
+		"AuthenticAMD",
+		CPUVENDOR_AMD,
+		"AMD",
+		/* Family 4 */
+		{ {
+			CPUCLASS_486,
+			{
+				0, 0, 0, "Am486DX2 W/T",
+				0, 0, 0, "Am486DX2 W/B",
+				"Am486DX4 W/T or Am5x86 W/T 150",
+				"Am486DX4 W/B or Am5x86 W/B 150", 0, 0,
+				0, 0, "Am5x86 W/T 133/160",
+				"Am5x86 W/B 133/160",
+				"Am486 or Am5x86"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 5 */
+		{
+			CPUCLASS_586,
+			{
+				"K5", "K5", "K5", "K5", 0, 0, "K6",
+				"K6", "K6-2", "K6-III", 0, 0, 0,
+				"K6-2+/III+", 0, 0,
+				"K5 or K6"		/* Default */
+			},
+			amd_family5_setup,
+			NULL,
+			amd_cpu_cacheinfo,
+		},
+		/* Family 6 */
+		{
+			CPUCLASS_686,
+			{
+				0, "Athlon Model 1", "Athlon Model 2",
+				"Duron", "Athlon Model 4 (Thunderbird)",
+				0, "Athlon", "Duron", "Athlon", 0, 0, 0,
+				0, 0, 0, 0,
+				"K7 (Athlon)"	/* Default */
+			},
+			NULL,
+			amd_family6_probe,
+			amd_cpu_cacheinfo,
+		},
+		/* Family > 6 */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Unknown K7 (Athlon)"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	},
+	{
+		"CyrixInstead",
+		CPUVENDOR_CYRIX,
+		"Cyrix",
+		/* Family 4 */
+		{ {
+			CPUCLASS_486,
+			{
+				0, 0, 0,
+				"MediaGX",
+				0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+				"486"		/* Default */
+			},
+			cyrix6x86_cpu_setup, /* XXX ?? */
+			NULL,
+			NULL,
+		},
+		/* Family 5 */
+		{
+			CPUCLASS_586,
+			{
+				0, 0, "6x86", 0,
+				"MMX-enhanced MediaGX (GXm)", /* or Geode? */
+				0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+				"6x86"		/* Default */
+			},
+			cyrix6x86_cpu_setup,
+			NULL,
+			NULL,
+		},
+		/* Family 6 */
+		{
+			CPUCLASS_686,
+			{
+				"6x86MX", 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"6x86MX"		/* Default */
+			},
+			cyrix6x86_cpu_setup,
+			NULL,
+			NULL,
+		},
+		/* Family > 6 */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Unknown 6x86MX"		/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	},
+	{	/* MediaGX is now owned by National Semiconductor */
+		"Geode by NSC",
+		CPUVENDOR_CYRIX, /* XXX */
+		"National Semiconductor",
+		/* Family 4, NSC never had any of these */
+		{ {
+			CPUCLASS_486,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"486 compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 5: Geode family, formerly MediaGX */
+		{
+			CPUCLASS_586,
+			{
+				0, 0, 0, 0,
+				"Geode GX1",
+				0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+				"Geode"		/* Default */
+			},
+			cyrix6x86_cpu_setup,
+			NULL,
+			NULL,
+		},
+		/* Family 6, not yet available from NSC */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium Pro compatible" /* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family > 6, not yet available from NSC */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium Pro compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	},
+	{
+		"CentaurHauls",
+		CPUVENDOR_IDT,
+		"IDT",
+		/* Family 4, IDT never had any of these */
+		{ {
+			CPUCLASS_486,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"486 compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 5 */
+		{
+			CPUCLASS_586,
+			{
+				0, 0, 0, 0, "WinChip C6", 0, 0, 0,
+				"WinChip 2", "WinChip 3", 0, 0, 0, 0, 0, 0,
+				"WinChip"		/* Default */
+			},
+			winchip_cpu_setup,
+			NULL,
+			NULL,
+		},
+		/* Family 6, VIA acquired IDT Centaur design subsidiary */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, "C3 Samuel",
+				"C3 Samuel 2/Ezra", "C3 Ezra-T",
+				0, 0, 0, 0, 0, 0, 0,
+				"C3"	/* Default */
+			},
+			NULL,
+			via_cpu_probe,
+			NULL,
+		},
+		/* Family > 6, not yet available from VIA */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium Pro compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	},
+	{
+		"GenuineTMx86",
+		CPUVENDOR_TRANSMETA,
+		"Transmeta",
+		/* Family 4, Transmeta never had any of these */
+		{ {
+			CPUCLASS_486,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"486 compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family 5 */
+		{
+			CPUCLASS_586,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Crusoe"		/* Default */
+			},
+			transmeta_cpu_setup,
+			NULL,
+			transmeta_cpu_info,
+		},
+		/* Family 6, not yet available from Transmeta */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium Pro compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		},
+		/* Family > 6, not yet available from Transmeta */
+		{
+			CPUCLASS_686,
+			{
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, 0, 0, 0, 0, 0,
+				"Pentium Pro compatible"	/* Default */
+			},
+			NULL,
+			NULL,
+			NULL,
+		} }
+	}
+};
+
+void
+cyrix6x86_cpu_setup(ci)
+	struct cpu_info *ci;
+{
+	/*
+	 * i8254 latch check routine:
+	 *     National Geode (formerly Cyrix MediaGX) has a serious bug in
+	 *     its built-in i8254-compatible clock module.
+	 *     Set the variable 'clock_broken_latch' to indicate it.
+	 */
+
+#ifdef ISA_CLOCK
+	extern int clock_broken_latch;
+
+	switch (ci->ci_signature) {
+	case 0x440:     /* Cyrix MediaGX */
+	case 0x540:     /* GXm */
+		clock_broken_latch = 1;
+		break;
+	}
+#endif
+
+	/* set up various cyrix registers */
+	/* Enable suspend on halt */
+	cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
+	/* enable access to ccr4/ccr5 */
+	cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) | 0x10);
+	/* cyrix's workaround  for the "coma bug" */
+	cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
+	cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
+	cyrix_write_reg(0x33, cyrix_read_reg(0x33) & ~0xff);
+	cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
+	/* disable access to ccr4/ccr5 */
+	cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10);
+
+	/*
+	 * XXX disable page zero in the idle loop, it seems to
+	 * cause panics on these CPUs.
+	 */
+	vm_page_zero_enable = FALSE;
+}
+
+void
+winchip_cpu_setup(ci)
+	struct cpu_info *ci;
+{
+#if defined(I586_CPU)
+	switch (CPUID2MODEL(ci->ci_signature)) { /* model */
+	case 4:	/* WinChip C6 */
+		cpu_feature &= ~CPUID_TSC;
+		printf("WARNING: WinChip C6: broken TSC disabled\n");
+	}
+#endif
+}
+
+void
+via_cpu_probe(struct cpu_info *ci)
+{
+	u_int descs[4];
+	u_int lfunc;
+
+	/*
+	 * Determine the largest extended function value.
+	 */
+	CPUID(0x80000000, descs[0], descs[1], descs[2], descs[3]);
+	lfunc = descs[0];
+
+	/*
+	 * Determine the extended feature flags.
+	 */
+	if (lfunc >= 0x80000001) {
+		CPUID(0x80000001, descs[0], descs[1], descs[2], descs[3]);
+		ci->ci_feature_flags = descs[3];
+	}
+}
+
+const char *
+intel_family6_name(struct cpu_info *ci)
+{
+	int model = CPUID2MODEL(ci->ci_signature);
+	const char *ret = NULL;
+	u_int l2cache = ci->ci_cinfo[CAI_L2CACHE].cai_totalsize;
+
+	if (model == 5) {
+		switch (l2cache) {
+		case 0:
+		case 128 * 1024:
+			ret = "Celeron (Covington)";
+			break;
+		case 256 * 1024:
+			ret = "Mobile Pentium II (Dixon)";
+			break;
+		case 512 * 1024:
+			ret = "Pentium II";
+			break;
+		case 1 * 1024 * 1024:
+		case 2 * 1024 * 1024:
+			ret = "Pentium II Xeon";
+			break;
+		}
+	} else if (model == 6) {
+		switch (l2cache) {
+		case 256 * 1024:
+		case 512 * 1024:
+			ret = "Mobile Pentium II";
+			break;
+		}
+	} else if (model == 7) {
+		switch (l2cache) {
+		case 512 * 1024:
+			ret = "Pentium III";
+			break;
+		case 1 * 1024 * 1024:
+		case 2 * 1024 * 1024:
+			ret = "Pentium III Xeon";
+			break;
+		}
+	} else if (model >= 8) {
+		if (ci->ci_brand_id && ci->ci_brand_id < 0x10) {
+			switch (ci->ci_brand_id) {
+			case 0x3:
+				if (ci->ci_signature == 0x6B1)
+					ret = "Celeron";
+				break;
+			case 0x08:
+				if (ci->ci_signature >= 0xF13)
+					ret = "genuine processor";
+				break;
+			case 0x0E:
+				if (ci->ci_signature < 0xF13)
+					ret = "Xeon";
+				break;
+			}
+			if (ret == NULL)
+				ret = i386_intel_brand[ci->ci_brand_id];
+		}
+	}
+
+	return ret;
+}
+
+static void
+cpu_probe_base_features(struct cpu_info *ci)
+{
+	const struct x86_cache_info *cai;
+	u_int descs[4];
+	int iterations, i, j;
+	u_int8_t desc;
+	u_int32_t dummy1, dummy2, miscbytes;
+
+	if (ci->ci_cpuid_level < 0)
+		return;
+
+	CPUID(0, ci->ci_cpuid_level,
+	    ci->ci_vendor[0],
+	    ci->ci_vendor[2],
+	    ci->ci_vendor[1]);
+	ci->ci_vendor[3] = 0;
+
+	if (ci->ci_cpuid_level < 1)
+		return;
+
+	CPUID(1, ci->ci_signature, miscbytes, dummy1, ci->ci_feature_flags);
+
+	/* Brand is low order 8 bits of ebx */
+	ci->ci_brand_id = miscbytes & 0xff;
+
+	/* CLFLUSH line size is next 8 bits */
+	if (ci->ci_feature_flags & CPUID_CFLUSH)
+		ci->ci_cflush_lsize = ((miscbytes >> 8) & 0xff) << 3;
+
+	if (ci->ci_cpuid_level < 2)
+		return;
+
+	/*
+	 * Parse the cache info from `cpuid', if we have it.
+	 * XXX This is kinda ugly, but hey, so is the architecture...
+	 */
+
+	CPUID(2, descs[0], descs[1], descs[2], descs[3]);
+
+	iterations = descs[0] & 0xff;
+	while (iterations-- > 0) {
+		for (i = 0; i < 4; i++) {
+			if (descs[i] & 0x80000000)
+				continue;
+			for (j = 0; j < 4; j++) {
+				if (i == 0 && j == 0)
+					continue;
+				desc = (descs[i] >> (j * 8)) & 0xff;
+				if (desc == 0)
+					continue;
+				cai = cache_info_lookup(intel_cpuid_cache_info,
+				    desc);
+				if (cai != NULL)
+					ci->ci_cinfo[cai->cai_index] = *cai;
+			}
+		}
+		CPUID(2, descs[0], descs[1], descs[2], descs[3]);
+	}
+
+	if (ci->ci_cpuid_level < 3)
+		return;
+
+	/*
+	 * If the processor serial number misfeature is present and supported,
+	 * extract it here.
+	 */
+	if ((ci->ci_feature_flags & CPUID_PN) != 0)
+	{
+		ci->ci_cpu_serial[0] = ci->ci_signature;
+		CPUID(3, dummy1, dummy2,
+		    ci->ci_cpu_serial[2],
+		    ci->ci_cpu_serial[1]);
+	}
+}
+
+void
+cpu_probe_features(struct cpu_info *ci)
+{
+	const struct cpu_cpuid_nameclass *cpup = NULL;
+	int i, max, family;
+
+	cpu_probe_base_features(ci);
+
+	if (ci->ci_cpuid_level < 1)
+		return;
+
+	max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
+	for (i = 0; i < max; i++) {
+		if (!strncmp((char *)ci->ci_vendor,
+		    i386_cpuid_cpus[i].cpu_id, 12)) {
+			cpup = &i386_cpuid_cpus[i];
+			break;
+		}
+	}
+
+	if (cpup == NULL)
+		return;
+
+	family = (ci->ci_signature >> 8) & 0xf;
+
+	if (family > CPU_MAXFAMILY) {
+		family = CPU_MAXFAMILY;
+	}
+	i = family - CPU_MINFAMILY;
+
+	if (cpup->cpu_family[i].cpu_probe == NULL)
+		return;
+
+	(*cpup->cpu_family[i].cpu_probe)(ci);
+}
+
+void
+amd_family6_probe(struct cpu_info *ci)
+{
+	u_int32_t lfunc;
+	u_int32_t descs[4];
+	u_int32_t brand[12];
+	char *p;
+	int i;
+
+	CPUID(0x80000000, lfunc, descs[1], descs[2], descs[3]);
+
+	/*
+	 * Determine the extended feature flags.
+	 */
+	if (lfunc >= 0x80000001) {
+		CPUID(0x80000001, descs[0], descs[1], descs[2], descs[3]);
+		ci->ci_feature_flags |= descs[3];
+	}
+
+	if (lfunc < 0x80000004)
+		return;
+	
+	CPUID(0x80000002, brand[0], brand[1], brand[2], brand[3]);
+	CPUID(0x80000003, brand[4], brand[5], brand[6], brand[7]);
+	CPUID(0x80000004, brand[8], brand[9], brand[10], brand[11]);
+
+	for (i = 1; i < sizeof(amd_brand) / sizeof(amd_brand[0]); i++)
+		if ((p = strstr((char *)brand, amd_brand[i])) != NULL) {
+			ci->ci_brand_id = i;
+			strcpy(amd_brand_name, p);
+			break;
+		}
+}
+
+void
+amd_family5_setup(struct cpu_info *ci)
+{
+
+	switch (CPUID2MODEL(ci->ci_signature)) {
+	case 0:		/* AMD-K5 Model 0 */
+		/*
+		 * According to the AMD Processor Recognition App Note,
+		 * the AMD-K5 Model 0 uses the wrong bit to indicate
+		 * support for global PTEs, instead using bit 9 (APIC)
+		 * rather than bit 13 (i.e. "0x200" vs. 0x2000".  Oops!).
+		 */
+		if (cpu_feature & CPUID_APIC)
+			cpu_feature = (cpu_feature & ~CPUID_APIC) | CPUID_PGE;
+		/*
+		 * XXX But pmap_pg_g is already initialized -- need to kick
+		 * XXX the pmap somehow.  How does the MP branch do this?
+		 */
+		break;
+	}
+}
+
+/*
+ * Transmeta Crusoe LongRun Support by Tamotsu Hattori.
+ * Port from FreeBSD-current(August, 2001) to NetBSD by tshiozak.
+ */
+
+#define	MSR_TMx86_LONGRUN		0x80868010
+#define	MSR_TMx86_LONGRUN_FLAGS		0x80868011
+
+#define	LONGRUN_MODE_MASK(x)		((x) & 0x0000007f)
+#define	LONGRUN_MODE_RESERVED(x)	((x) & 0xffffff80)
+#define	LONGRUN_MODE_WRITE(x, y)	(LONGRUN_MODE_RESERVED(x) | \
+					    LONGRUN_MODE_MASK(y))
+
+#define	LONGRUN_MODE_MINFREQUENCY	0x00
+#define	LONGRUN_MODE_ECONOMY		0x01
+#define	LONGRUN_MODE_PERFORMANCE	0x02
+#define	LONGRUN_MODE_MAXFREQUENCY	0x03
+#define	LONGRUN_MODE_UNKNOWN		0x04
+#define	LONGRUN_MODE_MAX		0x04
+
+union msrinfo {
+	u_int64_t	msr;
+	u_int32_t	regs[2];
+};
+
+u_int32_t longrun_modes[LONGRUN_MODE_MAX][3] = {
+	/*  MSR low, MSR high, flags bit0 */
+	{	  0,	  0,		0},	/* LONGRUN_MODE_MINFREQUENCY */
+	{	  0,	100,		0},	/* LONGRUN_MODE_ECONOMY */
+	{	  0,	100,		1},	/* LONGRUN_MODE_PERFORMANCE */
+	{	100,	100,		1},	/* LONGRUN_MODE_MAXFREQUENCY */
+};
+
+u_int
+tmx86_get_longrun_mode(void)
+{
+	u_long		eflags;
+	union msrinfo	msrinfo;
+	u_int		low, high, flags, mode;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN);
+	low = LONGRUN_MODE_MASK(msrinfo.regs[0]);
+	high = LONGRUN_MODE_MASK(msrinfo.regs[1]);
+	flags = rdmsr(MSR_TMx86_LONGRUN_FLAGS) & 0x01;
+
+	for (mode = 0; mode < LONGRUN_MODE_MAX; mode++) {
+		if (low   == longrun_modes[mode][0] &&
+		    high  == longrun_modes[mode][1] &&
+		    flags == longrun_modes[mode][2]) {
+			goto out;
+		}
+	}
+	mode = LONGRUN_MODE_UNKNOWN;
+out:
+	write_eflags(eflags);
+	return (mode);
+}
+
+static u_int
+tmx86_get_longrun_status(u_int *frequency, u_int *voltage, u_int *percentage)
+{
+	u_long		eflags;
+	u_int		eax, ebx, ecx, edx;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	CPUID(0x80860007, eax, ebx, ecx, edx);
+	*frequency = eax;
+	*voltage = ebx;
+	*percentage = ecx;
+
+	write_eflags(eflags);
+	return (1);
+}
+
+u_int
+tmx86_set_longrun_mode(u_int mode)
+{
+	u_long		eflags;
+	union msrinfo	msrinfo;
+
+	if (mode >= LONGRUN_MODE_UNKNOWN) {
+		return (0);
+	}
+
+	eflags = read_eflags();
+	disable_intr();
+
+	/* Write LongRun mode values to Model Specific Register. */
+	msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN);
+	msrinfo.regs[0] = LONGRUN_MODE_WRITE(msrinfo.regs[0],
+	    longrun_modes[mode][0]);
+	msrinfo.regs[1] = LONGRUN_MODE_WRITE(msrinfo.regs[1],
+	    longrun_modes[mode][1]);
+	wrmsr(MSR_TMx86_LONGRUN, msrinfo.msr);
+
+	/* Write LongRun mode flags to Model Specific Register. */
+	msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN_FLAGS);
+	msrinfo.regs[0] = (msrinfo.regs[0] & ~0x01) | longrun_modes[mode][2];
+	wrmsr(MSR_TMx86_LONGRUN_FLAGS, msrinfo.msr);
+
+	write_eflags(eflags);
+	return (1);
+}
+
+u_int crusoe_longrun;
+u_int crusoe_frequency;
+u_int crusoe_voltage;
+u_int crusoe_percentage;
+
+void
+tmx86_get_longrun_status_all(void)
+{
+
+	tmx86_get_longrun_status(&crusoe_frequency,
+	    &crusoe_voltage, &crusoe_percentage);
+}
+
+
+static void
+transmeta_cpu_info(struct cpu_info *ci)
+{
+	u_int eax, ebx, ecx, edx, nreg = 0;
+
+	CPUID(0x80860000, eax, ebx, ecx, edx);
+	nreg = eax;
+	if (nreg >= 0x80860001) {
+		CPUID(0x80860001, eax, ebx, ecx, edx);
+		printf("%s: Processor revision %u.%u.%u.%u\n",
+		    ci->ci_dev->dv_xname,
+		    (ebx >> 24) & 0xff,
+		    (ebx >> 16) & 0xff,
+		    (ebx >> 8) & 0xff,
+		    ebx & 0xff);
+	}
+	if (nreg >= 0x80860002) {
+		CPUID(0x80860002, eax, ebx, ecx, edx);
+		printf("%s: Code Morphing Software Rev: %u.%u.%u-%u-%u\n",
+		    ci->ci_dev->dv_xname, (ebx >> 24) & 0xff,
+		    (ebx >> 16) & 0xff,
+		    (ebx >> 8) & 0xff,
+		    ebx & 0xff,
+		    ecx);
+	}
+	if (nreg >= 0x80860006) {
+		union {
+			char text[65];
+			struct
+			{
+				u_int eax;
+				u_int ebx;
+				u_int ecx;
+				u_int edx;
+			} regs[4];
+		} info;
+		int i;
+
+		for (i=0; i<4; i++) {
+			CPUID(0x80860003 + i,
+			    info.regs[i].eax, info.regs[i].ebx,
+			    info.regs[i].ecx, info.regs[i].edx);
+		}
+		info.text[64] = 0;
+		printf("%s: %s\n", ci->ci_dev->dv_xname, info.text);
+	}
+
+	if (nreg >= 0x80860007) {
+		crusoe_longrun = tmx86_get_longrun_mode();
+		tmx86_get_longrun_status(&crusoe_frequency,
+		    &crusoe_voltage, &crusoe_percentage);
+		printf("%s: LongRun mode: %d  <%dMHz %dmV %d%%>\n",
+		    ci->ci_dev->dv_xname,
+		    crusoe_longrun, crusoe_frequency, crusoe_voltage,
+		    crusoe_percentage);
+	}
+}
+
+void
+transmeta_cpu_setup(struct cpu_info *ci)
+{
+	u_int nreg = 0, dummy;
+
+	CPUID(0x80860000, nreg, dummy, dummy, dummy);
+	if (nreg >= 0x80860007)
+		tmx86_has_longrun = 1;
+}
+
+static const char n_support[] __attribute__((__unused__)) =
+    "NOTICE: this kernel does not support %s CPU class\n";
+static const char n_lower[] __attribute__((__unused__)) =
+    "NOTICE: lowering CPU class to %s\n";
+
+void
+identifycpu(struct cpu_info *ci)
+{
+	const char *name, *modifier, *vendorname, *brand = "";
+	int class = CPUCLASS_386, vendor, i, max;
+	int modif, family, model;
+	const struct cpu_cpuid_nameclass *cpup = NULL;
+	const struct cpu_cpuid_family *cpufam;
+	char *cpuname = ci->ci_dev->dv_xname;
+	char buf[1024];
+	char *feature_str[3];
+
+	if (ci->ci_cpuid_level == -1) {
+#ifdef DIAGNOSTIC
+		if (cpu < 0 || cpu >=
+		    sizeof(i386_nocpuid_cpus) / sizeof(i386_nocpuid_cpus[0]))
+			panic("unknown cpu type %d", cpu);
+#endif
+		name = i386_nocpuid_cpus[cpu].cpu_name;
+		vendor = i386_nocpuid_cpus[cpu].cpu_vendor;
+		vendorname = i386_nocpuid_cpus[cpu].cpu_vendorname;
+		class = i386_nocpuid_cpus[cpu].cpu_class;
+		ci->cpu_setup = i386_nocpuid_cpus[cpu].cpu_setup;
+		ci->ci_info = i386_nocpuid_cpus[cpu].cpu_info;
+		modifier = "";
+	} else {
+		max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
+		modif = (ci->ci_signature >> 12) & 0x3;
+		family = CPUID2FAMILY(ci->ci_signature);
+		if (family < CPU_MINFAMILY)
+			panic("identifycpu: strange family value");
+		model = CPUID2MODEL(ci->ci_signature);
+
+		for (i = 0; i < max; i++) {
+			if (!strncmp((char *)ci->ci_vendor,
+			    i386_cpuid_cpus[i].cpu_id, 12)) {
+				cpup = &i386_cpuid_cpus[i];
+				break;
+			}
+		}
+
+		if (cpup == NULL) {
+			vendor = CPUVENDOR_UNKNOWN;
+			if (ci->ci_vendor[0] != '\0')
+				vendorname = (char *)&ci->ci_vendor[0];
+			else
+				vendorname = "Unknown";
+			if (family > CPU_MAXFAMILY)
+				family = CPU_MAXFAMILY;
+			class = family - 3;
+			modifier = "";
+			name = "";
+			ci->cpu_setup = NULL;
+			ci->ci_info = NULL;
+		} else {
+			vendor = cpup->cpu_vendor;
+			vendorname = cpup->cpu_vendorname;
+			modifier = modifiers[modif];
+			if (family > CPU_MAXFAMILY) {
+				family = CPU_MAXFAMILY;
+				model = CPU_DEFMODEL;
+			} else if (model > CPU_MAXMODEL)
+				model = CPU_DEFMODEL;
+			cpufam = &cpup->cpu_family[family - CPU_MINFAMILY];
+			name = cpufam->cpu_models[model];
+			if (name == NULL)
+			    name = cpufam->cpu_models[CPU_DEFMODEL];
+			class = cpufam->cpu_class;
+			ci->cpu_setup = cpufam->cpu_setup;
+			ci->ci_info = cpufam->cpu_info;
+
+			if (vendor == CPUVENDOR_INTEL && family == 6 &&
+			    model >= 5) {
+				const char *tmp = intel_family6_name(ci);
+				if (tmp != NULL)
+					name = tmp;
+			}
+
+			if (vendor == CPUVENDOR_AMD && family == 6 &&
+			    model >= 6) {
+				if (ci->ci_brand_id == 1)
+					/* 
+					 * It's Duron. We override the 
+					 * name, since it might have been 
+					 * misidentified as Athlon.
+					 */
+					name = amd_brand[ci->ci_brand_id];
+				else
+					brand = amd_brand_name;
+			}
+			
+			if (vendor == CPUVENDOR_IDT && family >= 6)
+				vendorname = "VIA";
+		}
+	}
+
+	cpu_class = class;
+	ci->ci_cpu_class = class;
+
+#if defined(I586_CPU) || defined(I686_CPU)
+	/*
+	 * If we have a cycle counter, compute the approximate
+	 * CPU speed in MHz.
+	 * XXX this needs to run on the CPU being probed..
+	 */
+	if (ci->ci_feature_flags & CPUID_TSC) {
+		u_int64_t last_tsc;
+
+		last_tsc = rdtsc();
+		delay(100000);
+		ci->ci_tsc_freq = (rdtsc() - last_tsc) * 10;
+#ifndef NO_TSC_TIME
+		microtime_func = cc_microtime;
+#endif
+	}
+	/* XXX end XXX */
+#endif
+
+	snprintf(cpu_model, sizeof(cpu_model), "%s%s%s%s%s%s%s (%s-class)",
+	    vendorname,
+	    *modifier ? " " : "", modifier,
+	    *name ? " " : "", name,
+	    *brand ? " " : "", brand,
+	    classnames[class]);
+	printf("%s: %s", cpuname, cpu_model);
+
+	if (ci->ci_tsc_freq != 0)
+		printf(", %qd.%02qd MHz", (ci->ci_tsc_freq + 4999) / 1000000,
+		    ((ci->ci_tsc_freq + 4999) / 10000) % 100);
+	if (ci->ci_signature != 0)
+		printf(", id 0x%x", ci->ci_signature);
+	printf("\n");
+
+	if (ci->ci_info)
+		(*ci->ci_info)(ci);
+
+	if (vendor == CPUVENDOR_INTEL) {
+		feature_str[0] = CPUID_FLAGS1;
+		feature_str[1] = CPUID_FLAGS2;
+		feature_str[2] = CPUID_FLAGS3;
+	} else {
+		feature_str[0] = CPUID_FLAGS1;
+		feature_str[1] = CPUID_EXT_FLAGS2;
+		feature_str[2] = CPUID_EXT_FLAGS3;
+	}	
+	
+	if (ci->ci_feature_flags) {
+		if ((ci->ci_feature_flags & CPUID_MASK1) != 0) {
+			bitmask_snprintf(ci->ci_feature_flags,
+			    feature_str[0], buf, sizeof(buf));
+			printf("%s: features %s\n", cpuname, buf);
+		}
+		if ((ci->ci_feature_flags & CPUID_MASK2) != 0) {
+			bitmask_snprintf(ci->ci_feature_flags,
+			    feature_str[1], buf, sizeof(buf));
+			printf("%s: features %s\n", cpuname, buf);
+		}
+		if ((ci->ci_feature_flags & CPUID_MASK3) != 0) {
+			bitmask_snprintf(ci->ci_feature_flags,
+			    feature_str[2], buf, sizeof(buf));
+			printf("%s: features %s\n", cpuname, buf);
+		}
+	}
+
+	x86_print_cacheinfo(ci);
+
+	if (ci->ci_cpuid_level >= 3 && (ci->ci_feature_flags & CPUID_PN)) {
+		printf("%s: serial number %04X-%04X-%04X-%04X-%04X-%04X\n",
+		    cpuname,
+		    ci->ci_cpu_serial[0] / 65536, ci->ci_cpu_serial[0] % 65536,
+		    ci->ci_cpu_serial[1] / 65536, ci->ci_cpu_serial[1] % 65536,
+		    ci->ci_cpu_serial[2] / 65536, ci->ci_cpu_serial[2] % 65536);
+	}
+
+	/*
+	 * Now that we have told the user what they have,
+	 * let them know if that machine type isn't configured.
+	 */
+	switch (cpu_class) {
+#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
+#error No CPU classes configured.
+#endif
+#ifndef I686_CPU
+	case CPUCLASS_686:
+		printf(n_support, "Pentium Pro");
+#ifdef I586_CPU
+		printf(n_lower, "i586");
+		cpu_class = CPUCLASS_586;
+		break;
+#endif
+#endif
+#ifndef I586_CPU
+	case CPUCLASS_586:
+		printf(n_support, "Pentium");
+#ifdef I486_CPU
+		printf(n_lower, "i486");
+		cpu_class = CPUCLASS_486;
+		break;
+#endif
+#endif
+#ifndef I486_CPU
+	case CPUCLASS_486:
+		printf(n_support, "i486");
+#ifdef I386_CPU
+		printf(n_lower, "i386");
+		cpu_class = CPUCLASS_386;
+		break;
+#endif
+#endif
+#ifndef I386_CPU
+	case CPUCLASS_386:
+		printf(n_support, "i386");
+		panic("no appropriate CPU class available");
+#endif
+	default:
+		break;
+	}
+
+	/*
+	 * Now plug in optimized versions of various routines we
+	 * might have.
+	 */
+	switch (cpu_class) {
+#if defined(I686_CPU)
+	case CPUCLASS_686:
+		copyout_func = i486_copyout;
+		break;
+#endif
+#if defined(I586_CPU)
+	case CPUCLASS_586:
+		copyout_func = i486_copyout;
+		break;
+#endif
+#if defined(I486_CPU)
+	case CPUCLASS_486:
+		copyout_func = i486_copyout;
+		break;
+#endif
+	default:
+		/* We just inherit the default i386 versions. */
+		break;
+	}
+
+	if (cpu == CPU_486DLC) {
+#ifndef CYRIX_CACHE_WORKS
+		printf("WARNING: CYRIX 486DLC CACHE UNCHANGED.\n");
+#else
+#ifndef CYRIX_CACHE_REALLY_WORKS
+		printf("WARNING: CYRIX 486DLC CACHE ENABLED IN HOLD-FLUSH MODE.\n");
+#else
+		printf("WARNING: CYRIX 486DLC CACHE ENABLED.\n");
+#endif
+#endif
+	}
+
+#if defined(I686_CPU)
+	/*
+	 * If we have FXSAVE/FXRESTOR, use them.
+	 */
+	if (cpu_feature & CPUID_FXSR) {
+		i386_use_fxsave = 1;
+
+		/*
+		 * If we have SSE/SSE2, enable XMM exceptions, and
+		 * notify userland.
+		 */
+		if (cpu_feature & (CPUID_SSE|CPUID_SSE2)) {
+			if (cpu_feature & CPUID_SSE)
+				i386_has_sse = 1;
+			if (cpu_feature & CPUID_SSE2)
+				i386_has_sse2 = 1;
+		}
+	} else
+		i386_use_fxsave = 0;
+#endif /* I686_CPU */
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/locore.S	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,2087 @@
+/*	$NetBSD: locore.S,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: locore.S,v 1.24 2004/02/20 17:35:01 yamt Exp 	*/
+
+/*-
+ * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)locore.s	7.3 (Berkeley) 5/13/91
+ */
+
+#include "opt_compat_netbsd.h"
+#include "opt_compat_oldboot.h"
+#include "opt_cputype.h"
+#include "opt_ddb.h"
+#include "opt_ipkdb.h"
+#include "opt_lockdebug.h"
+#include "opt_multiprocessor.h"
+#include "opt_realmem.h"
+#include "opt_user_ldt.h"
+#include "opt_vm86.h"
+#include "opt_xen.h"
+
+#include "npx.h"
+#include "assym.h"
+#include "apm.h"
+#include "lapic.h"
+#include "ioapic.h"
+#include "ksyms.h"
+
+#include <sys/errno.h>
+#include <sys/syscall.h>
+
+#include <machine/cputypes.h>
+#include <machine/param.h>
+#include <machine/pte.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/trap.h>
+#include <machine/bootinfo.h>
+
+#if NLAPIC > 0
+#include <machine/i82489reg.h>
+#endif
+
+/* LINTSTUB: include <sys/types.h> */
+/* LINTSTUB: include <machine/cpu.h> */
+/* LINTSTUB: include <sys/systm.h> */
+
+#include <machine/asm.h>
+
+#if defined(MULTIPROCESSOR)
+	
+#define SET_CURLWP(lwp,cpu)				\
+	movl	CPUVAR(SELF),cpu		; 	\
+	movl	lwp,CPUVAR(CURLWP)	;	\
+	movl	cpu,L_CPU(lwp)
+	
+#else
+
+#define SET_CURLWP(lwp,tcpu)		movl	lwp,CPUVAR(CURLWP)
+#define GET_CURLWP(reg)			movl	CPUVAR(CURLWP),reg
+
+#endif
+
+#define GET_CURPCB(reg)			movl	CPUVAR(CURPCB),reg	
+#define SET_CURPCB(reg)			movl	reg,CPUVAR(CURPCB)
+
+#define CLEAR_RESCHED(reg)		movl	reg,CPUVAR(RESCHED)
+
+/* XXX temporary kluge; these should not be here */
+/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
+#include <dev/isa/isareg.h>
+
+
+/* Disallow old names for REALBASEMEM */
+#ifdef BIOSBASEMEM
+#error BIOSBASEMEM option deprecated; use REALBASEMEM only if memory size reported by latest boot block is incorrect
+#endif
+
+/* Disallow old names for REALEXTMEM */
+#ifdef EXTMEM_SIZE
+#error EXTMEM_SIZE option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect
+#endif
+#ifdef BIOSEXTMEM
+#error BIOSEXTMEM option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect
+#endif
+
+#include <machine/frameasm.h>
+
+
+#ifdef MULTIPROCESSOR
+#include <machine/i82489reg.h>
+#endif
+	
+/*
+ * PTmap is recursive pagemap at top of virtual address space.
+ * Within PTmap, the page directory can be found (third indirection).
+ *
+ * XXX 4 == sizeof pde
+ */
+	.set	_C_LABEL(PTmap),(PDSLOT_PTE << PDSHIFT)
+	.set	_C_LABEL(PTD),(_C_LABEL(PTmap) + PDSLOT_PTE * PAGE_SIZE)
+	.set	_C_LABEL(PTDpde),(_C_LABEL(PTD) + PDSLOT_PTE * 4)
+
+/*
+ * APTmap, APTD is the alternate recursive pagemap.
+ * It's used when modifying another process's page tables.
+ *
+ * XXX 4 == sizeof pde
+ */
+	.set	_C_LABEL(APTmap),(PDSLOT_APTE << PDSHIFT)
+	.set	_C_LABEL(APTD),(_C_LABEL(APTmap) + PDSLOT_APTE * PAGE_SIZE)
+	.set	_C_LABEL(APTDpde),(_C_LABEL(PTD) + PDSLOT_APTE * 4)
+
+
+/*
+ * Initialization
+ */
+	.data
+
+	.globl	_C_LABEL(cpu)
+	.globl	_C_LABEL(esym),_C_LABEL(boothowto)
+	.globl	_C_LABEL(bootinfo),_C_LABEL(atdevbase)
+#ifdef COMPAT_OLDBOOT
+	.globl	_C_LABEL(bootdev)
+#endif
+	.globl	_C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
+	.globl	_C_LABEL(biosbasemem),_C_LABEL(biosextmem)
+	.globl	_C_LABEL(gdt)
+#ifdef I586_CPU
+	.globl	_C_LABEL(idt)
+#endif
+	.globl	_C_LABEL(lapic_tpr)	
+	
+#if NLAPIC > 0
+#ifdef __ELF__
+	.align	PAGE_SIZE
+#else
+	.align	12
+#endif
+	.globl _C_LABEL(local_apic), _C_LABEL(lapic_id)
+_C_LABEL(local_apic):
+	.space	LAPIC_ID
+_C_LABEL(lapic_id):	
+	.long	0x00000000
+	.space  LAPIC_TPRI-(LAPIC_ID+4)
+_C_LABEL(lapic_tpr):		
+	.space  LAPIC_PPRI-LAPIC_TPRI
+_C_LABEL(lapic_ppr):		
+	.space	LAPIC_ISR-LAPIC_PPRI
+_C_LABEL(lapic_isr):
+	.space	PAGE_SIZE-LAPIC_ISR
+#else
+_C_LABEL(lapic_tpr):	
+	.long 0
+#endif
+	
+
+_C_LABEL(cpu):		.long	0	# are we 386, 386sx, or 486,
+					#   or Pentium, or..
+_C_LABEL(esym):		.long	0	# ptr to end of syms
+_C_LABEL(atdevbase):	.long	0	# location of start of iomem in virtual
+_C_LABEL(proc0paddr):	.long	0
+_C_LABEL(PTDpaddr):	.long	0	# paddr of PTD, for libkvm
+#ifndef REALBASEMEM
+_C_LABEL(biosbasemem):	.long	0	# base memory reported by BIOS
+#else
+_C_LABEL(biosbasemem):	.long	REALBASEMEM
+#endif
+#ifndef REALEXTMEM
+_C_LABEL(biosextmem):	.long	0	# extended memory reported by BIOS
+#else
+_C_LABEL(biosextmem):	.long	REALEXTMEM
+#endif
+
+#include <machine/xen.h>
+#define __HYPERVISOR_yield		   8
+
+	.space 512
+tmpstk:
+	.long tmpstk, __KERNEL_DS
+
+
+#define	_RELOC(x)	((x))
+#define	RELOC(x)	_RELOC(_C_LABEL(x))
+
+	.text
+	.globl	_C_LABEL(kernel_text)
+	.set	_C_LABEL(kernel_text),KERNTEXTOFF
+
+	.globl	start
+start:
+	cld
+
+	lss	tmpstk,%esp		# bootstrap stack end location
+
+	movl	%esi,%ebx		# save start_info pointer
+
+#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
+	/* Save the symbol locations. */
+/* XXX assym.h */
+#define MOD_START   20
+#define MOD_LEN     24
+	movl	MOD_START(%ebx),%esi
+	addl	MOD_LEN(%ebx),%esi
+	movl	%esi,RELOC(esym)
+#endif
+
+        /* Clear BSS first so that there are no surprises... */
+	xorl	%eax,%eax
+	movl	$RELOC(__bss_start),%edi
+	movl	$RELOC(_end),%ecx
+	subl	%edi,%ecx
+	rep stosb
+
+	/* Copy the necessary stuff from start_info structure. */
+        /* We need to copy shared_info early, so that sti/cli work */
+	movl	%ebx,%esi
+	movl	$RELOC(start_info_union),%edi
+	movl	$128,%ecx
+	rep movsl
+
+    	/* (howto, [bootdev], bootinfo, basemem, extmem). */
+	xorl	%eax,%eax
+	movl	%eax,RELOC(boothowto)
+#ifdef COMPAT_OLDBOOT
+	movl	%eax,RELOC(bootdev)
+#endif
+	movl	$0x20000,%eax
+	movl	%eax,RELOC(boothowto)
+
+	/* First, reset the PSL. */
+	pushl	$PSL_MBO
+	popfl
+
+	/* Clear segment registers; always null in proc0. */
+	xorl	%eax,%eax
+	movw	%ax,%fs
+	movw	%ax,%gs
+	decl	%eax
+	movl	%eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL
+
+	xorl	%eax,%eax
+	cpuid
+	movl	%eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL
+
+/*
+ * Virtual address space of kernel:
+ *
+ * text | data | bss | [syms] | page dir | proc0 kstack 
+ *			      0          1       2      3
+ */
+#define	PROC0PDIR	((0)              * PAGE_SIZE)
+#define	PROC0STACK	((1)              * PAGE_SIZE)
+#define	SYSMAP		((1+UPAGES)       * PAGE_SIZE)
+#define	TABLESIZE	((1+UPAGES) * PAGE_SIZE) /* + nkpde * PAGE_SIZE */
+
+	/* Find end of kernel image. */
+	movl	$RELOC(end),%edi
+#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
+	/* Save the symbols (if loaded). */
+	movl	RELOC(esym),%eax
+	testl	%eax,%eax
+	jz	1f
+	movl	%eax,%edi
+1:
+#endif
+	/* Calculate where to start the bootstrap tables. */
+	movl	%edi,%esi			# edi = esym ? esym : end
+	addl	$PGOFSET,%esi			# page align up
+	andl	$~PGOFSET,%esi
+
+	/*
+	 * Calculate the size of the kernel page table directory, and
+	 * how many entries it will have.
+	 */
+	movl	RELOC(nkpde),%ecx		# get nkpde
+	cmpl	$NKPTP_MIN,%ecx			# larger than min?
+	jge	1f
+	movl	$NKPTP_MIN,%ecx			# set at min
+	jmp	2f
+1:	cmpl	$NKPTP_MAX,%ecx			# larger than max?
+	jle	2f
+	movl	$NKPTP_MAX,%ecx
+2:
+
+	/* Clear memory for bootstrap tables. */
+	shll	$PGSHIFT,%ecx
+	addl	$TABLESIZE,%ecx
+	addl	%esi,%ecx			# end of tables
+	movl	%ecx,RELOC(gdt)
+	addl	$PAGE_SIZE,%ecx
+	movl	%ecx,RELOC(xpmap_phys_to_machine_mapping)
+	subl	%edi,%ecx			# size of tables
+	shrl	$2,%ecx
+	xorl	%eax,%eax
+	cld
+	rep
+	stosl
+
+/*
+ * fillkpt
+ *	eax = pte (page frame | control | status)
+ *	ebx = page table address
+ *	ecx = number of pages to map
+ */
+#define	fillkpt		\
+1:	movl	%eax,(%ebx)	; \
+	addl	$PAGE_SIZE,%eax	; /* increment physical address */ \
+	addl	$4,%ebx		; /* next pte */ \
+	loop	1b		;
+
+/*
+ * Build initial page tables.
+ */
+	/* Calculate end of text segment, rounded to a page. */
+	leal	(RELOC(etext)+PGOFSET),%edx
+	andl	$~PGOFSET,%edx
+	
+	/* Skip over the first 1MB. */
+	movl	$KERNTEXTOFF,%eax
+	movl	%eax,%ecx
+	subl	$KERNBASE_LOCORE,%ecx
+	shrl	$PGSHIFT,%ecx
+	leal	(SYSMAP)(%esi,%ecx,4),%ebx
+
+	/* Map the kernel text read-only. */
+	movl	%edx,%ecx
+	subl	%eax,%ecx
+	shrl	$PGSHIFT,%ecx
+	orl	$(PG_V|PG_KR),%eax
+	fillkpt
+
+/* XXX assym.h */
+#define NR_PAGES 0
+	/* Map the data, BSS, and bootstrap tables read-write. */
+	movl	$RELOC(start_info_union),%eax
+	movl	NR_PAGES(%eax),%eax
+	shll	$2,%eax
+	movl	RELOC(xpmap_phys_to_machine_mapping),%ecx
+	addl	%eax,%ecx
+	addl	$PGOFSET,%ecx			# page align up
+	andl	$~PGOFSET,%ecx
+						    # end of tables
+	subl	%edx,%ecx				# subtract end of text
+	shrl	$PGSHIFT,%ecx
+	leal	(PG_V|PG_KW)(%edx),%eax
+	fillkpt
+
+	movl	$0xffffffff,(%ebx)
+	addl	$4,%ebx
+
+/*
+ * Construct a page table directory.
+ */
+	/* Map kernel PDEs. */
+	movl	RELOC(nkpde),%ecx			# for this many pde s,
+	leal	(PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx	# kernel pde offset
+	leal	(SYSMAP+PG_V|PG_KW)(%esi),%eax		# pte for KPT in proc 0,
+	fillkpt
+
+	/* Install a PDE recursively mapping page directory as a page table! */
+	leal	(PROC0PDIR+PG_V/*|PG_KW*/)(%esi),%eax	# pte for ptd
+	movl	%eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi)	# recursive PD slot
+
+	/* Save phys. addr of PTD, for libkvm. */
+	movl	%esi,RELOC(PTDpaddr)
+
+    	call	xpmap_init
+
+	movl	$__HYPERVISOR_fpu_taskswitch,%eax
+	TRAP_INSTR
+
+	/* cr0 is 0x8005003b */
+
+	/* Relocate atdevbase. */
+	movl	$_C_LABEL(start_info_union),%edx
+	movl	NR_PAGES(%edx),%edx
+	shll	$2,%edx
+	movl	_C_LABEL(xpmap_phys_to_machine_mapping),%eax
+	addl	%eax,%edx
+	addl	$PGOFSET,%edx			# page align up
+	andl	$~PGOFSET,%edx
+	movl	%edx,_C_LABEL(HYPERVISOR_shared_info)
+	addl	$PAGE_SIZE,%edx			# shared_inf
+	movl	%edx,_C_LABEL(atdevbase)
+
+	/* Set up bootstrap stack. */
+	leal	(PROC0STACK)(%esi),%eax
+	movl	%eax,_C_LABEL(proc0paddr)
+	leal	(USPACE-FRAMESIZE)(%eax),%esp
+	subl	$KERNTEXTOFF,%esi
+	movl	%esi,PCB_CR3(%eax)	# pcb->pcb_cr3
+	xorl	%ebp,%ebp               # mark end of frames
+
+	movl	_C_LABEL(atdevbase),%eax
+	pushl	%eax
+	call	_C_LABEL(init386)	# wire 386 chip for unix operation
+	addl	$4,%esp
+
+#ifdef SAFARI_FIFO_HACK
+	movb	$5,%al
+	movw	$0x37b,%dx
+	outb	%al,%dx
+	movw	$0x37f,%dx
+	inb	%dx,%al
+	movb	%al,%cl
+
+	orb	$1,%cl
+
+	movb	$5,%al
+	movw	$0x37b,%dx
+	outb	%al,%dx
+	movw	$0x37f,%dx
+	movb	%cl,%al
+	outb	%al,%dx
+#endif /* SAFARI_FIFO_HACK */
+
+	call 	_C_LABEL(main)
+
+/*
+ * void proc_trampoline(void);
+ * This is a trampoline function pushed onto the stack of a newly created
+ * process in order to do some additional setup.  The trampoline is entered by
+ * cpu_switch()ing to the process, so we abuse the callee-saved registers used
+ * by cpu_switch() to store the information about the stub to call.
+ * NOTE: This function does not have a normal calling sequence!
+ */
+/* LINTSTUB: Func: void proc_trampoline(void) */
+NENTRY(proc_trampoline)
+#ifdef MULTIPROCESSOR
+	call	_C_LABEL(proc_trampoline_mp)
+#endif
+	movl	$IPL_NONE,CPUVAR(ILEVEL)
+	pushl	%ebx
+	call	*%esi
+	addl	$4,%esp
+	DO_DEFERRED_SWITCH(%eax)
+	INTRFASTEXIT
+	/* NOTREACHED */
+
+/*****************************************************************************/
+#ifdef COMPAT_16
+/*
+ * Signal trampoline; copied to top of user stack.
+ */
+/* LINTSTUB: Var: char sigcode[1], esigcode[1]; */
+NENTRY(sigcode)
+	/*
+	 * Handler has returned here as if we called it.  The sigcontext
+	 * is on the stack after the 3 args "we" pushed.
+	 */
+	leal	12(%esp),%eax		# get pointer to sigcontext
+	movl	%eax,4(%esp)		# put it in the argument slot
+					# fake return address already there
+#if defined(SYS_compat_16___sigreturn14)
+	movl	$SYS_compat_16___sigreturn14,%eax
+#elif defined(SYS___sigreturn14)
+	movl	$SYS___sigreturn14,%eax
+#else
+	#error "no sigreturn14 syscall"
+#endif
+	int	$0x80	 		# enter kernel with args on stack
+	movl	$SYS_exit,%eax
+	int	$0x80			# exit if sigreturn fails
+	.globl	_C_LABEL(esigcode)
+_C_LABEL(esigcode):
+#endif
+
+/*****************************************************************************/
+
+/*
+ * The following primitives are used to fill and copy regions of memory.
+ */
+
+/*
+ * XXX No section 9 man page for fillw.
+ * fillw seems to be very sparsely used (only in pccons it seems.)
+ * One wonders if it couldn't be done without.
+ * -- Perry Metzger, May 7, 2001
+ */
+/*
+ * void fillw(short pattern, void *addr, size_t len);
+ * Write len copies of pattern at addr.
+ */
+/* LINTSTUB: Func: void fillw(short pattern, void *addr, size_t len) */
+ENTRY(fillw)
+	pushl	%edi
+	movl	8(%esp),%eax
+	movl	12(%esp),%edi
+	movw	%ax,%cx
+	rorl	$16,%eax
+	movw	%cx,%ax
+	cld
+	movl	16(%esp),%ecx
+	shrl	%ecx			# do longwords
+	rep
+	stosl
+	movl	16(%esp),%ecx
+	andl	$1,%ecx			# do remainder
+	rep
+	stosw
+	popl	%edi
+	ret
+
+/*
+ * int kcopy(const void *from, void *to, size_t len);
+ * Copy len bytes, abort on fault.
+ */
+/* LINTSTUB: Func: int kcopy(const void *from, void *to, size_t len) */
+ENTRY(kcopy)
+	pushl	%esi
+	pushl	%edi
+	GET_CURPCB(%eax)		# load curpcb into eax and set on-fault
+	pushl	PCB_ONFAULT(%eax)
+	movl	$_C_LABEL(kcopy_fault), PCB_ONFAULT(%eax)
+
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%ecx
+	movl	%edi,%eax
+	subl	%esi,%eax
+	cmpl	%ecx,%eax		# overlapping?
+	jb	1f
+	cld				# nope, copy forward
+	shrl	$2,%ecx			# copy by 32-bit words
+	rep
+	movsl
+	movl	24(%esp),%ecx
+	andl	$3,%ecx			# any bytes left?
+	rep
+	movsb
+
+	GET_CURPCB(%edx)		# XXX save curpcb?
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	ret
+
+	ALIGN_TEXT
+1:	addl	%ecx,%edi		# copy backward
+	addl	%ecx,%esi
+	std
+	andl	$3,%ecx			# any fractional bytes?
+	decl	%edi
+	decl	%esi
+	rep
+	movsb
+	movl	24(%esp),%ecx		# copy remainder by 32-bit words
+	shrl	$2,%ecx
+	subl	$3,%esi
+	subl	$3,%edi
+	rep
+	movsl
+	cld
+
+	GET_CURPCB(%edx)
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	ret
+
+/*****************************************************************************/
+
+/*
+ * The following primitives are used to copy data in and out of the user's
+ * address space.
+ */
+
+/*
+ * Default to the lowest-common-denominator.  We will improve it
+ * later.
+ */
+#if defined(I386_CPU)
+#define	DEFAULT_COPYOUT		_C_LABEL(i386_copyout)
+#define	DEFAULT_COPYIN		_C_LABEL(i386_copyin)
+#elif defined(I486_CPU)
+#define	DEFAULT_COPYOUT		_C_LABEL(i486_copyout)
+#define	DEFAULT_COPYIN		_C_LABEL(i386_copyin)
+#elif defined(I586_CPU)
+#define	DEFAULT_COPYOUT		_C_LABEL(i486_copyout)	/* XXX */
+#define	DEFAULT_COPYIN		_C_LABEL(i386_copyin)	/* XXX */
+#elif defined(I686_CPU)
+#define	DEFAULT_COPYOUT		_C_LABEL(i486_copyout)	/* XXX */
+#define	DEFAULT_COPYIN		_C_LABEL(i386_copyin)	/* XXX */
+#endif
+
+	.data
+
+	.globl	_C_LABEL(copyout_func)
+_C_LABEL(copyout_func):
+	.long	DEFAULT_COPYOUT
+
+	.globl	_C_LABEL(copyin_func)
+_C_LABEL(copyin_func):
+	.long	DEFAULT_COPYIN
+
+	.text
+
+/*
+ * int copyout(const void *from, void *to, size_t len);
+ * Copy len bytes into the user's address space.
+ * see copyout(9)
+ */
+/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */
+ENTRY(copyout)
+	DO_DEFERRED_SWITCH(%eax)
+	jmp	*_C_LABEL(copyout_func)
+
+#if defined(I386_CPU)
+/* LINTSTUB: Func: int i386_copyout(const void *kaddr, void *uaddr, size_t len) */
+ENTRY(i386_copyout)
+	pushl	%esi
+	pushl	%edi
+	pushl	$0
+	
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%eax
+
+	/*
+	 * We check that the end of the destination buffer is not past the end
+	 * of the user's address space.  If it's not, then we only need to
+	 * check that each page is writable.  The 486 will do this for us; the
+	 * 386 will not.  (We assume that pages in user space that are not
+	 * writable by the user are not writable by the kernel either.)
+	 */
+	movl	%edi,%edx
+	addl	%eax,%edx
+	jc	_C_LABEL(copy_efault)
+	cmpl	$VM_MAXUSER_ADDRESS,%edx
+	ja	_C_LABEL(copy_efault)
+
+	testl	%eax,%eax		# anything to do?
+	jz	3f
+
+	/*
+	 * We have to check each PTE for (write) permission, since the CPU
+	 * doesn't do it for us.
+	 */
+
+	/* Compute number of pages. */
+	movl	%edi,%ecx
+	andl	$PGOFSET,%ecx
+	addl	%eax,%ecx
+	decl	%ecx
+	shrl	$PGSHIFT,%ecx
+
+	/* Compute PTE offset for start address. */
+	shrl	$PGSHIFT,%edi
+
+	GET_CURPCB(%edx)
+	movl	$2f,PCB_ONFAULT(%edx)
+
+1:	/* Check PTE for each page. */
+	testb	$PG_RW,_C_LABEL(PTmap)(,%edi,4)
+	jz	2f
+	
+4:	incl	%edi
+	decl	%ecx
+	jns	1b
+
+	movl	20(%esp),%edi
+	movl	24(%esp),%eax
+	jmp	3f
+	
+2:	/* Simulate a trap. */
+	pushl	%ecx
+	movl	%edi,%eax
+	shll	$PGSHIFT,%eax
+	pushl	%eax
+	call	_C_LABEL(trapwrite)	# trapwrite(addr)
+	addl	$4,%esp			# pop argument
+	popl	%ecx
+	testl	%eax,%eax		# if not ok, return EFAULT
+	jz	4b
+	jmp	_C_LABEL(copy_efault)
+
+3:	GET_CURPCB(%edx)
+	movl	$_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
+
+	/* bcopy(%esi, %edi, %eax); */
+	cld
+	movl	%eax,%ecx
+	shrl	$2,%ecx
+	rep
+	movsl
+	movl	%eax,%ecx
+	andl	$3,%ecx
+	rep
+	movsb
+
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	ret
+#endif /* I386_CPU */
+
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+/* LINTSTUB: Func: int i486_copyout(const void *kaddr, void *uaddr, size_t len) */
+ENTRY(i486_copyout)
+	pushl	%esi
+	pushl	%edi
+	pushl	$0
+	
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%eax
+
+	/*
+	 * We check that the end of the destination buffer is not past the end
+	 * of the user's address space.
+	 */
+	movl	%edi,%edx
+	addl	%eax,%edx
+	jc	_C_LABEL(copy_efault)
+	cmpl	$VM_MAXUSER_ADDRESS,%edx
+	ja	_C_LABEL(copy_efault)
+
+	GET_CURPCB(%edx)
+	movl	$_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
+
+	/* bcopy(%esi, %edi, %eax); */
+	cld
+	movl	%eax,%ecx
+	shrl	$2,%ecx
+	rep
+	movsl
+	movl	%eax,%ecx
+	andl	$3,%ecx
+	rep
+	movsb
+
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	ret
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+/*
+ * int copyin(const void *from, void *to, size_t len);
+ * Copy len bytes from the user's address space.
+ * see copyin(9)
+ */
+/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */
+ENTRY(copyin)
+	DO_DEFERRED_SWITCH(%eax)
+	jmp	*_C_LABEL(copyin_func)
+
+#if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \
+    defined(I686_CPU)
+/* LINTSTUB: Func: int i386_copyin(const void *uaddr, void *kaddr, size_t len) */
+ENTRY(i386_copyin)
+	pushl	%esi
+	pushl	%edi
+	GET_CURPCB(%eax)
+	pushl	$0
+	movl	$_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
+	
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%eax
+
+	/*
+	 * We check that the end of the destination buffer is not past the end
+	 * of the user's address space.  If it's not, then we only need to
+	 * check that each page is readable, and the CPU will do that for us.
+	 */
+	movl	%esi,%edx
+	addl	%eax,%edx
+	jc	_C_LABEL(copy_efault)
+	cmpl	$VM_MAXUSER_ADDRESS,%edx
+	ja	_C_LABEL(copy_efault)
+
+	/* bcopy(%esi, %edi, %eax); */
+	cld
+	movl	%eax,%ecx
+	shrl	$2,%ecx
+	rep
+	movsl
+	movl	%eax,%ecx
+	andl	$3,%ecx
+	rep
+	movsb
+
+	GET_CURPCB(%edx)
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	ret
+#endif /* I386_CPU || I486_CPU || I586_CPU || I686_CPU */
+
+/* LINTSTUB: Ignore */
+NENTRY(copy_efault)
+	movl	$EFAULT,%eax
+
+/*
+ * kcopy_fault is used by kcopy and copy_fault is used by copyin/out.
+ *
+ * they're distinguished for lazy pmap switching.  see trap().
+ */
+/* LINTSTUB: Ignore */
+NENTRY(kcopy_fault)
+	GET_CURPCB(%edx)
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	ret
+
+/* LINTSTUB: Ignore */
+NENTRY(copy_fault)
+	GET_CURPCB(%edx)
+	popl	PCB_ONFAULT(%edx)
+	popl	%edi
+	popl	%esi
+	ret
+
+/*
+ * int copyoutstr(const void *from, void *to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long, into the
+ * user's address space.  Return the number of characters copied (including the
+ * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
+ * return 0 or EFAULT.
+ * see copyoutstr(9)
+ */
+/* LINTSTUB: Func: int copyoutstr(const void *kaddr, void *uaddr, size_t len, size_t *done) */
+ENTRY(copyoutstr)
+	pushl	%esi
+	pushl	%edi
+
+	DO_DEFERRED_SWITCH(%eax)
+
+	movl	12(%esp),%esi		# esi = from
+	movl	16(%esp),%edi		# edi = to
+	movl	20(%esp),%edx		# edx = maxlen
+
+#if defined(I386_CPU)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	cmpl	$CPUCLASS_386,_C_LABEL(cpu_class)
+	jne	5f
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+	/* Compute number of bytes in first page. */
+	movl	%edi,%eax
+	andl	$PGOFSET,%eax
+	movl	$PAGE_SIZE,%ecx
+	subl	%eax,%ecx		# ecx = PAGE_SIZE - (src % PAGE_SIZE)
+
+	GET_CURPCB(%eax)
+	movl	$6f,PCB_ONFAULT(%eax)
+
+1:	/*
+	 * Once per page, check that we are still within the bounds of user
+	 * space, and check for a write fault.
+	 */
+	cmpl	$VM_MAXUSER_ADDRESS,%edi
+	jae	_C_LABEL(copystr_efault)
+
+	/* Compute PTE offset. */
+	movl	%edi,%eax
+	shrl	$PGSHIFT,%eax		# calculate pte address
+
+	testb	$PG_RW,_C_LABEL(PTmap)(,%eax,4)
+	jnz	2f
+
+6:	/* Simulate a trap. */
+	pushl	%edx
+	pushl	%edi
+	call	_C_LABEL(trapwrite)	# trapwrite(addr)
+	addl	$4,%esp			# clear argument from stack
+	popl	%edx
+	testl	%eax,%eax
+	jnz	_C_LABEL(copystr_efault)
+
+2:	/* Copy up to end of this page. */
+	subl	%ecx,%edx		# predecrement total count
+	jnc	3f
+	addl	%edx,%ecx		# ecx += (edx - ecx) = edx
+	xorl	%edx,%edx
+
+3:	decl	%ecx
+	js	4f
+	lodsb
+	stosb
+	testb	%al,%al
+	jnz	3b
+
+	/* Success -- 0 byte reached. */
+	addl	%ecx,%edx		# add back residual for this page
+	xorl	%eax,%eax
+	jmp	copystr_return
+
+4:	/* Go to next page, if any. */
+	movl	$PAGE_SIZE,%ecx
+	testl	%edx,%edx
+	jnz	1b
+
+	/* edx is zero -- return ENAMETOOLONG. */
+	movl	$ENAMETOOLONG,%eax
+	jmp	copystr_return
+#endif /* I386_CPU */
+
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+5:	GET_CURPCB(%eax)
+	movl	$_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
+	/*
+	 * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
+	 */
+	movl	$VM_MAXUSER_ADDRESS,%eax
+	subl	%edi,%eax
+	cmpl	%edx,%eax
+	jae	1f
+	movl	%eax,%edx
+	movl	%eax,20(%esp)
+
+1:	incl	%edx
+	cld
+
+1:	decl	%edx
+	jz	2f
+	lodsb
+	stosb
+	testb	%al,%al
+	jnz	1b
+
+	/* Success -- 0 byte reached. */
+	decl	%edx
+	xorl	%eax,%eax
+	jmp	copystr_return
+
+2:	/* edx is zero -- return EFAULT or ENAMETOOLONG. */
+	cmpl	$VM_MAXUSER_ADDRESS,%edi
+	jae	_C_LABEL(copystr_efault)
+	movl	$ENAMETOOLONG,%eax
+	jmp	copystr_return
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+/*
+ * int copyinstr(const void *from, void *to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long, from the
+ * user's address space.  Return the number of characters copied (including the
+ * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
+ * return 0 or EFAULT.
+ * see copyinstr(9)
+ */
+/* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */
+ENTRY(copyinstr)
+	pushl	%esi
+	pushl	%edi
+
+	DO_DEFERRED_SWITCH(%eax)
+
+	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
+
+	movl	12(%esp),%esi		# %esi = from
+	movl	16(%esp),%edi		# %edi = to
+	movl	20(%esp),%edx		# %edx = maxlen
+
+	/*
+	 * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
+	 */
+	movl	$VM_MAXUSER_ADDRESS,%eax
+	subl	%esi,%eax
+	cmpl	%edx,%eax
+	jae	1f
+	movl	%eax,%edx
+	movl	%eax,20(%esp)
+
+1:	incl	%edx
+	cld
+
+1:	decl	%edx
+	jz	2f
+	lodsb
+	stosb
+	testb	%al,%al
+	jnz	1b
+
+	/* Success -- 0 byte reached. */
+	decl	%edx
+	xorl	%eax,%eax
+	jmp	copystr_return
+
+2:	/* edx is zero -- return EFAULT or ENAMETOOLONG. */
+	cmpl	$VM_MAXUSER_ADDRESS,%esi
+	jae	_C_LABEL(copystr_efault)
+	movl	$ENAMETOOLONG,%eax
+	jmp	copystr_return
+
+/* LINTSTUB: Ignore */
+NENTRY(copystr_efault)
+	movl	$EFAULT,%eax
+
+/* LINTSTUB: Ignore */
+NENTRY(copystr_fault)
+copystr_return:
+	/* Set *lencopied and return %eax. */
+	GET_CURPCB(%ecx)
+	movl	$0,PCB_ONFAULT(%ecx)
+	movl	20(%esp),%ecx
+	subl	%edx,%ecx
+	movl	24(%esp),%edx
+	testl	%edx,%edx
+	jz	8f
+	movl	%ecx,(%edx)
+
+8:	popl	%edi
+	popl	%esi
+	ret
+
+/*
+ * int copystr(const void *from, void *to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long.  Return the
+ * number of characters copied (including the NUL) in *lencopied.  If the
+ * string is too long, return ENAMETOOLONG; else return 0.
+ * see copystr(9)
+ */
+/* LINTSTUB: Func: int copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done) */
+ENTRY(copystr)
+	pushl	%esi
+	pushl	%edi
+
+	movl	12(%esp),%esi		# esi = from
+	movl	16(%esp),%edi		# edi = to
+	movl	20(%esp),%edx		# edx = maxlen
+	incl	%edx
+	cld
+
+1:	decl	%edx
+	jz	4f
+	lodsb
+	stosb
+	testb	%al,%al
+	jnz	1b
+
+	/* Success -- 0 byte reached. */
+	decl	%edx
+	xorl	%eax,%eax
+	jmp	6f
+
+4:	/* edx is zero -- return ENAMETOOLONG. */
+	movl	$ENAMETOOLONG,%eax
+
+6:	/* Set *lencopied and return %eax. */
+	movl	20(%esp),%ecx
+	subl	%edx,%ecx
+	movl	24(%esp),%edx
+	testl	%edx,%edx
+	jz	7f
+	movl	%ecx,(%edx)
+
+7:	popl	%edi
+	popl	%esi
+	ret
+
+/*
+ * long fuword(const void *uaddr);
+ * Fetch an int from the user's address space.
+ * see fuword(9)
+ */
+/* LINTSTUB: Func: long fuword(const void *base) */
+ENTRY(fuword)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-4,%edx
+	ja	_C_LABEL(fusuaddrfault)
+	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+	movl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+	
+/*
+ * int fusword(const void *uaddr);
+ * Fetch a short from the user's address space.
+ * see fusword(9)
+ */
+/* LINTSTUB: Func: int fusword(const void *base) */
+ENTRY(fusword)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
+	ja	_C_LABEL(fusuaddrfault)
+	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+	movzwl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+	
+/*
+ * int fuswintr(const void *uaddr);
+ * Fetch a short from the user's address space.  Can be called during an
+ * interrupt.
+ * see fuswintr(9)
+ */
+/* LINTSTUB: Func: int fuswintr(const void *base) */
+ENTRY(fuswintr)
+	cmpl	$TLBSTATE_VALID, CPUVAR(TLBSTATE)
+	jnz	_C_LABEL(fusuaddrfault)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
+	ja	_C_LABEL(fusuaddrfault)
+	movl	CPUVAR(CURLWP),%ecx
+	movl	L_ADDR(%ecx),%ecx
+	movl	$_C_LABEL(fusubail),PCB_ONFAULT(%ecx)
+	movzwl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+	
+/*
+ * int fubyte(const void *uaddr);
+ * Fetch a byte from the user's address space.
+ * see fubyte(9)
+ */
+/* LINTSTUB: Func: int fubyte(const void *base) */
+ENTRY(fubyte)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
+	ja	_C_LABEL(fusuaddrfault)
+	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+	movzbl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * Handle faults from [fs]u*().  Clean up and return -1.
+ */
+/* LINTSTUB: Ignore */
+NENTRY(fusufault)
+	movl	$0,PCB_ONFAULT(%ecx)
+	movl	$-1,%eax
+	ret
+
+/*
+ * Handle faults from [fs]u*().  Clean up and return -1.  This differs from
+ * fusufault() in that trap() will recognize it and return immediately rather
+ * than trying to page fault.
+ */
+/* LINTSTUB: Ignore */
+NENTRY(fusubail)
+	movl	$0,PCB_ONFAULT(%ecx)
+	movl	$-1,%eax
+	ret
+
+/*
+ * Handle earlier faults from [fs]u*(), due to our of range addresses.
+ */
+/* LINTSTUB: Ignore */
+NENTRY(fusuaddrfault)
+	movl	$-1,%eax
+	ret
+
+/*
+ * int suword(void *uaddr, long x);
+ * Store an int in the user's address space.
+ * see suword(9)
+ */
+/* LINTSTUB: Func: int suword(void *base, long c) */
+ENTRY(suword)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-4,%edx
+	ja	_C_LABEL(fusuaddrfault)
+
+#if defined(I386_CPU)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	cmpl	$CPUCLASS_386,_C_LABEL(cpu_class)
+	jne	2f
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+	GET_CURPCB(%eax)
+	movl	$3f,PCB_ONFAULT(%eax)
+
+	movl	%edx,%eax
+	shrl	$PGSHIFT,%eax		# calculate pte address
+	testb	$PG_RW,_C_LABEL(PTmap)(,%eax,4)
+	jnz	1f
+
+3:	/* Simulate a trap. */
+	pushl	%edx
+	pushl	%edx
+	call	_C_LABEL(trapwrite)	# trapwrite(addr)
+	addl	$4,%esp			# clear parameter from the stack
+	popl	%edx
+	GET_CURPCB(%ecx)
+	testl	%eax,%eax
+	jnz	_C_LABEL(fusufault)
+
+1:	/* XXX also need to check the following 3 bytes for validity! */
+#endif
+
+2:	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+
+	movl	8(%esp),%eax
+	movl	%eax,(%edx)
+	xorl	%eax,%eax
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+	
+/*
+ * int susword(void *uaddr, short x);
+ * Store a short in the user's address space.
+ * see susword(9)
+ */
+/* LINTSTUB: Func: int susword(void *base, short c) */
+ENTRY(susword)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
+	ja	_C_LABEL(fusuaddrfault)
+
+#if defined(I386_CPU)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	cmpl	$CPUCLASS_386,_C_LABEL(cpu_class)
+	jne	2f
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+	GET_CURPCB(%eax)
+	movl	$3f,PCB_ONFAULT(%eax)
+
+	movl	%edx,%eax
+	shrl	$PGSHIFT,%eax		# calculate pte address
+	testb	$PG_RW,_C_LABEL(PTmap)(,%eax,4)
+	jnz	1f
+
+3:	/* Simulate a trap. */
+	pushl	%edx
+	pushl	%edx
+	call	_C_LABEL(trapwrite)	# trapwrite(addr)
+	addl	$4,%esp			# clear parameter from the stack
+	popl	%edx
+	GET_CURPCB(%ecx)
+	testl	%eax,%eax
+	jnz	_C_LABEL(fusufault)
+
+1:	/* XXX also need to check the following byte for validity! */
+#endif
+
+2:	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+
+	movl	8(%esp),%eax
+	movw	%ax,(%edx)
+	xorl	%eax,%eax
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * int suswintr(void *uaddr, short x);
+ * Store a short in the user's address space.  Can be called during an
+ * interrupt.
+ * see suswintr(9)
+ */
+/* LINTSTUB: Func: int suswintr(void *base, short c) */
+ENTRY(suswintr)
+	cmpl	$TLBSTATE_VALID, CPUVAR(TLBSTATE)
+	jnz	_C_LABEL(fusuaddrfault)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
+	ja	_C_LABEL(fusuaddrfault)
+	movl	CPUVAR(CURLWP),%ecx
+	movl	L_ADDR(%ecx),%ecx
+	movl	$_C_LABEL(fusubail),PCB_ONFAULT(%ecx)
+
+#if defined(I386_CPU)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	cmpl	$CPUCLASS_386,_C_LABEL(cpu_class)
+	jne	2f
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+	movl	%edx,%eax
+	shrl	$PGSHIFT,%eax		# calculate pte address
+	testb	$PG_RW,_C_LABEL(PTmap)(,%eax,4)
+	jnz	1f
+
+	/* Simulate a trap. */
+	jmp	_C_LABEL(fusubail)
+
+1:	/* XXX also need to check the following byte for validity! */
+#endif
+
+2:	movl	8(%esp),%eax
+	movw	%ax,(%edx)
+	xorl	%eax,%eax
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * int subyte(void *uaddr, char x);
+ * Store a byte in the user's address space.
+ * see subyte(9)
+ */
+/* LINTSTUB: Func: int subyte(void *base, int c) */
+ENTRY(subyte)
+	DO_DEFERRED_SWITCH(%eax)
+	movl	4(%esp),%edx
+	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
+	ja	_C_LABEL(fusuaddrfault)
+
+#if defined(I386_CPU)
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+	cmpl	$CPUCLASS_386,_C_LABEL(cpu_class)
+	jne	2f
+#endif /* I486_CPU || I586_CPU || I686_CPU */
+
+	GET_CURPCB(%eax)	
+	movl	$3f,PCB_ONFAULT(%eax)
+
+	movl	%edx,%eax
+	shrl	$PGSHIFT,%eax		# calculate pte address
+	testb	$PG_RW,_C_LABEL(PTmap)(,%eax,4)
+	jnz	1f
+
+3:	/* Simulate a trap. */
+	pushl	%edx
+	pushl	%edx
+	call	_C_LABEL(trapwrite)	# trapwrite(addr)
+	addl	$4,%esp			# clear parameter from the stack
+	popl	%edx
+	GET_CURPCB(%ecx)
+	testl	%eax,%eax
+	jnz	_C_LABEL(fusufault)
+
+1:
+#endif
+
+2:	GET_CURPCB(%ecx)
+	movl	$_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
+
+	movb	8(%esp),%al
+	movb	%al,(%edx)
+	xorl	%eax,%eax
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+/*****************************************************************************/
+
+/*
+ * The following is i386-specific nonsense.
+ */
+
+/*
+ * void lgdt_finish(void);
+ * Finish load a new GDT pointer (do any necessary cleanup).
+ * XXX It's somewhat questionable whether reloading all the segment registers
+ * is necessary, since the actual descriptor data is not changed except by
+ * process creation and exit, both of which clean up via task switches.  OTOH,
+ * this only happens at run time when the GDT is resized.
+ */
+/* LINTSTUB: Func: void lgdt_finish(void) */
+NENTRY(lgdt_finish)
+	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
+	movw	%ax,%ds
+	movw	%ax,%es
+	movw	%ax,%gs
+	movw	%ax,%ss
+	movl	$GSEL(GCPU_SEL, SEL_KPL),%eax
+	movw	%ax,%fs
+	/* Reload code selector by doing intersegment return. */
+	popl	%eax
+	pushl	$GSEL(GCODE_SEL, SEL_KPL)
+	pushl	%eax
+	lret
+
+/*****************************************************************************/
+
+/*
+ * These functions are primarily used by DDB.
+ */
+
+/* LINTSTUB: Func: int setjmp (label_t *l) */
+ENTRY(setjmp)
+	movl	4(%esp),%eax
+	movl	%ebx,(%eax)		# save ebx
+	movl	%esp,4(%eax)		# save esp
+	movl	%ebp,8(%eax)		# save ebp
+	movl	%esi,12(%eax)		# save esi
+	movl	%edi,16(%eax)		# save edi
+	movl	(%esp),%edx		# get rta
+	movl	%edx,20(%eax)		# save eip
+	xorl	%eax,%eax		# return (0);
+	ret
+
+/* LINTSTUB: Func: void longjmp (label_t *l) */
+ENTRY(longjmp)
+	movl	4(%esp),%eax
+	movl	(%eax),%ebx		# restore ebx
+	movl	4(%eax),%esp		# restore esp
+	movl	8(%eax),%ebp		# restore ebp
+	movl	12(%eax),%esi		# restore esi
+	movl	16(%eax),%edi		# restore edi
+	movl	20(%eax),%edx		# get rta
+	movl	%edx,(%esp)		# put in return frame
+	xorl	%eax,%eax		# return (1);
+	incl	%eax
+	ret
+
+/*****************************************************************************/
+
+	.globl	_C_LABEL(sched_whichqs),_C_LABEL(sched_qs)
+	.globl	_C_LABEL(uvmexp),_C_LABEL(panic)
+
+#ifdef DIAGNOSTIC
+NENTRY(switch_error)
+	pushl	$1f
+3:	call	_C_LABEL(panic)
+	/* NOTREACHED */
+1:	.asciz	"cpu_switch"
+#endif /* DIAGNOSTIC */
+
+/*
+ * void cpu_switch(struct lwp *)
+ * Find a runnable process and switch to it.  Wait if necessary.  If the new
+ * process is the same as the old one, we short-circuit the context save and
+ * restore.
+ *	
+ * Note that the stack frame layout is known to "struct switchframe"
+ * in <machine/frame.h> and to the code in cpu_fork() which initializes 
+ * it for a new lwp.
+ */
+ENTRY(cpu_switch)
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+#ifdef DEBUG
+	cmpl	$IPL_SCHED,CPUVAR(ILEVEL)
+	jae	1f
+	pushl	$2f
+	call	_C_LABEL(panic)
+	/* NOTREACHED */
+2:	.asciz	"not splsched() in cpu_switch!"
+1:	
+#endif /* DEBUG */
+
+	movl	16(%esp),%esi		# current
+
+	/*
+	 * Clear curlwp so that we don't accumulate system time while idle.
+	 * This also insures that schedcpu() will move the old lwp to
+	 * the correct queue if it happens to get called from the spllower()
+	 * below and changes the priority.  (See corresponding comment in
+	 * userret()).
+	 */
+	movl	$0,CPUVAR(CURLWP)
+	/*
+	 * First phase: find new lwp.
+	 *
+	 * Registers:
+	 *   %eax - queue head, scratch, then zero
+	 *   %ebx - queue number
+	 *   %ecx - cached value of whichqs
+	 *   %edx - next lwp in queue
+	 *   %esi - old lwp
+	 *   %edi - new lwp
+	 */
+
+	/* Look for new lwp. */
+	CLI(%ecx)			# splhigh doesn't do a cli
+	movl	_C_LABEL(sched_whichqs),%ecx
+	bsfl	%ecx,%ebx		# find a full q
+	jnz	switch_dequeue
+
+	/*
+	 * idling:	save old context.
+	 *
+	 * Registers:
+	 *   %eax, %ecx - scratch
+	 *   %esi - old lwp, then old pcb
+	 *   %edi - idle pcb
+	 */
+
+	pushl	%esi
+	call	_C_LABEL(pmap_deactivate2)	# pmap_deactivate(oldproc)
+	addl	$4,%esp
+
+	movl	L_ADDR(%esi),%esi
+
+	/* Save stack pointers. */
+	movl	%esp,PCB_ESP(%esi)
+	movl	%ebp,PCB_EBP(%esi)
+
+	/* Find idle PCB for this CPU */
+#ifndef MULTIPROCESSOR
+	movl	$_C_LABEL(lwp0),%ebx
+	movl	L_ADDR(%ebx),%edi
+	movl	L_MD_TSS_SEL(%ebx),%edx
+#else
+	movl	CPUVAR(IDLE_PCB),%edi
+	movl	CPUVAR(IDLE_TSS_SEL),%edx
+#endif
+	movl	$0,CPUVAR(CURLWP)		/* In case we fault... */
+
+	/* Restore the idle context (avoid interrupts) */
+	CLI(%ecx)
+
+	/* Restore stack pointers. */
+	movl	PCB_ESP(%edi),%esp
+	movl	PCB_EBP(%edi),%ebp
+
+#ifdef XENDEBUG_LOW
+	movl	_C_LABEL(xen_once),%ecx
+	testl	%ecx,%ecx
+	jz	1f
+	movl	$2,%ecx
+	movl	%ecx,_C_LABEL(xen_once)
+1:
+	call	_C_LABEL(xen_dbg2)
+#endif
+
+#ifdef XENDEBUG_LOW
+	#call	_C_LABEL(xen_dbg2)
+	movl	%ds,%ecx
+	movl	%cs,%ebx
+	pushl	%ecx
+	pushl	%ebx
+	call	_C_LABEL(xen_dbg3)
+	addl	$8,%esp
+#endif
+
+	/* Switch TSS. */
+	movl	PCB_TSS_ESP0(%edi),%ecx
+	movl	PCB_TSS_SS0(%edi),%ebx
+	movl	$__HYPERVISOR_stack_switch,%eax
+	TRAP_INSTR
+
+#ifdef XENDEBUG_LOW
+	pushl	PCB_TSS_SS0(%edi)
+	pushl	PCB_TSS_ESP0(%edi)
+	call	_C_LABEL(xen_dbg1)
+	addl	$8,%esp
+#endif
+
+	/* Reset FPU state if necessary. */
+	movl	PCB_CR0(%edi),%ecx
+	testl	$CR0_TS,%ecx
+	jz	1f
+	andl	$~CR0_TS,%ecx
+	movl	%ecx,PCB_CR0(%edi)
+	movl	$__HYPERVISOR_fpu_taskswitch,%eax
+	TRAP_INSTR
+1:
+
+	/* Record new pcb. */
+	SET_CURPCB(%edi)
+
+	xorl	%esi,%esi
+	STI(%eax)
+idle_unlock:	
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)	
+	call	_C_LABEL(sched_unlock_idle)
+#endif
+	/* Interrupts are okay again. */
+	pushl	$IPL_NONE		# spl0()
+	call	_C_LABEL(Xspllower)	# process pending interrupts
+	addl	$4,%esp
+	jmp	idle_start
+idle_zero:		
+	STIC(%eax)
+    	jz	4f
+	call	_C_LABEL(stipending)
+	testl	%eax,%eax
+	jz	4f
+	pushl	$IPL_NONE
+	call	_C_LABEL(Xspllower)
+	addl	$4,%esp
+4:
+	call	_C_LABEL(uvm_pageidlezero)
+	CLI(%eax)
+	cmpl	$0,_C_LABEL(sched_whichqs)
+	jnz	idle_exit
+idle_loop:
+	/* Try to zero some pages. */
+	movl	_C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx
+	testl	%ecx,%ecx
+	jnz	idle_zero
+	STIC(%eax)
+    	jz	4f
+	call	_C_LABEL(stipending)
+	testl	%eax,%eax
+	jz	4f
+	pushl	$IPL_NONE
+	call	_C_LABEL(Xspllower)
+	addl	$4,%esp
+	jmp	idle_start
+4:
+	movl	$__HYPERVISOR_yield,%eax
+	TRAP_INSTR
+NENTRY(mpidle)
+idle_start:	
+	CLI(%eax)
+	cmpl	$0,_C_LABEL(sched_whichqs)
+	jz	idle_loop
+idle_exit:	
+	movl	$IPL_HIGH,CPUVAR(ILEVEL)		# splhigh
+	STI(%eax)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)	
+	call	_C_LABEL(sched_lock_idle)
+#endif
+	movl	_C_LABEL(sched_whichqs),%ecx
+	bsfl	%ecx,%ebx
+	jz	idle_unlock
+
+#ifdef XENDEBUG_LOW
+	pushl	%ecx
+	call	_C_LABEL(xen_dbg1)
+	xorl	%ecx,%ecx
+	movl	%ecx,_C_LABEL(xen_once)
+	popl	%ecx
+#endif
+switch_dequeue:		
+	/* 
+	 * we're running at splhigh(), but it's otherwise okay to take
+	 * interrupts here. 
+	 */
+	STI(%edi)
+	leal	_C_LABEL(sched_qs)(,%ebx,8),%eax # select q
+
+	movl	L_FORW(%eax),%edi	# unlink from front of process q
+#ifdef	DIAGNOSTIC
+	cmpl	%edi,%eax		# linked to self (i.e. nothing queued)?
+	je	_C_LABEL(switch_error)	# not possible
+#endif /* DIAGNOSTIC */
+	movl	L_FORW(%edi),%edx
+	movl	%edx,L_FORW(%eax)
+	movl	%eax,L_BACK(%edx)
+
+	cmpl	%edx,%eax		# q empty?
+	jne	3f
+
+	btrl	%ebx,%ecx		# yes, clear to indicate empty
+	movl	%ecx,_C_LABEL(sched_whichqs) # update q status
+
+3:	/* We just did it. */
+	xorl	%eax,%eax
+	CLEAR_RESCHED(%eax)
+
+switch_resume:
+#ifdef	DIAGNOSTIC
+	cmpl	%eax,L_WCHAN(%edi)	# Waiting for something?
+	jne	_C_LABEL(switch_error)	# Yes; shouldn't be queued.
+	cmpb	$LSRUN,L_STAT(%edi)	# In run state?
+	jne	_C_LABEL(switch_error)	# No; shouldn't be queued.
+#endif /* DIAGNOSTIC */
+
+	/* Isolate lwp.  XXX Is this necessary? */
+	movl	%eax,L_BACK(%edi)
+
+	/* Record new lwp. */
+	movb	$LSONPROC,L_STAT(%edi)	# l->l_stat = LSONPROC
+	SET_CURLWP(%edi,%ecx)
+
+	/* Skip context switch if same lwp. */
+	xorl	%ebx,%ebx
+	cmpl	%edi,%esi
+	je	switch_return
+
+	/* If old lwp exited, don't bother. */
+	testl	%esi,%esi
+	jz	switch_exited
+
+	/*
+	 * Second phase: save old context.
+	 *
+	 * Registers:
+	 *   %eax, %ecx - scratch
+	 *   %esi - old lwp, then old pcb
+	 *   %edi - new lwp
+	 */
+
+	pushl	%esi
+	call	_C_LABEL(pmap_deactivate2)	# pmap_deactivate(oldproc)
+	addl	$4,%esp
+
+	movl	L_ADDR(%esi),%esi
+
+	/* Save stack pointers. */
+	movl	%esp,PCB_ESP(%esi)
+	movl	%ebp,PCB_EBP(%esi)
+
+switch_exited:
+	/*
+	 * Third phase: restore saved context.
+	 *
+	 * Registers:
+	 *   %eax, %ebx, %ecx, %edx - scratch
+	 *   %esi - new pcb
+	 *   %edi - new lwp
+	 */
+
+	/* No interrupts while loading new state. */
+	CLI(%eax)
+	movl	L_ADDR(%edi),%esi
+
+	/* Restore stack pointers. */
+	movl	PCB_ESP(%esi),%esp
+	movl	PCB_EBP(%esi),%ebp
+
+#if 0
+	/* Don't bother with the rest if switching to a system process. */
+	testl	$P_SYSTEM,L_FLAG(%edi);	XXX NJWLWP lwp's don't have P_SYSTEM!
+	jnz	switch_restored
+#endif
+
+	/* Switch TSS. */
+	movl	PCB_TSS_ESP0(%esi),%ecx
+	movl	PCB_TSS_SS0(%esi),%ebx
+	movl	$__HYPERVISOR_stack_switch,%eax
+	TRAP_INSTR
+
+	pushl	%edi
+	call	_C_LABEL(pmap_activate)		# pmap_activate(p)
+	addl	$4,%esp
+
+#if 0
+switch_restored:
+#endif
+	/* Reset FPU state if necessary. */
+	movl	PCB_CR0(%esi),%ecx
+	testl	$CR0_TS,%ecx
+	jz	1f
+	andl	$~CR0_TS,%ecx
+	movl	%ecx,PCB_CR0(%esi)
+	movl	$__HYPERVISOR_fpu_taskswitch,%eax
+	TRAP_INSTR
+1:
+	/* Record new pcb. */
+	SET_CURPCB(%esi)
+
+	/* Interrupts are okay again. */
+	STI(%edi)
+
+/*
+ *  Check for restartable atomic sequences (RAS)
+ */
+	movl	CPUVAR(CURLWP),%edi
+	movl	L_PROC(%edi),%esi
+	cmpl	$0,P_RASLIST(%esi)
+	jne	2f
+1:
+	movl	$1,%ebx
+
+switch_return:
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)     
+	call    _C_LABEL(sched_unlock_idle)
+#endif
+	pushl	$IPL_NONE		# spl0()
+	call	_C_LABEL(Xspllower)	# process pending interrupts
+	addl	$4,%esp
+	movl	$IPL_HIGH,CPUVAR(ILEVEL)	# splhigh()
+
+	movl	%ebx,%eax
+
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	ret
+
+2:					# check RAS list
+	movl	L_MD_REGS(%edi),%ebx
+	movl	TF_EIP(%ebx),%eax
+	pushl	%eax
+	pushl	%esi
+	call	_C_LABEL(ras_lookup)
+	addl	$8,%esp
+	cmpl	$-1,%eax
+	je	1b
+	movl	%eax,TF_EIP(%ebx)
+	jmp	1b
+
+/*
+ * void cpu_switchto(struct lwp *current, struct lwp *next)
+ * Switch to the specified next LWP.
+ */
+ENTRY(cpu_switchto)
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+#ifdef DEBUG
+	cmpl	$IPL_SCHED,CPUVAR(ILEVEL)
+	jae	1f
+	pushl	$2f
+	call	_C_LABEL(panic)
+	/* NOTREACHED */
+2:	.asciz	"not splsched() in cpu_switchto!"
+1:
+#endif /* DEBUG */
+
+	movl	16(%esp),%esi		# current
+	movl	20(%esp),%edi		# next
+
+	/*
+	 * Clear curlwp so that we don't accumulate system time while idle.
+	 * This also insures that schedcpu() will move the old process to
+	 * the correct queue if it happens to get called from the spllower()
+	 * below and changes the priority.  (See corresponding comment in
+	 * usrret()).
+	 *
+	 * XXX Is this necessary?  We know we won't go idle.
+	 */
+	movl	$0,CPUVAR(CURLWP)
+
+	/*
+	 * We're running at splhigh(), but it's otherwise okay to take
+	 * interrupts here.
+	 */
+	STI(%eax)
+
+	/* Jump into the middle of cpu_switch */
+	xorl	%eax,%eax
+	jmp	switch_resume
+
+/*
+ * void cpu_exit(struct lwp *l)
+ * Switch to the appropriate idle context (lwp0's if uniprocessor; the CPU's 
+ * if multiprocessor) and deallocate the address space and kernel stack for p. 
+ * Then jump into cpu_switch(), as if we were in the idle proc all along.
+ */
+#ifndef MULTIPROCESSOR
+	.globl	_C_LABEL(lwp0)
+#endif
+	.globl  _C_LABEL(uvmspace_free),_C_LABEL(kernel_map)
+	.globl	_C_LABEL(uvm_km_free),_C_LABEL(tss_free)
+/* LINTSTUB: Func: void cpu_exit(struct lwp *l) */
+ENTRY(cpu_exit)
+	movl	4(%esp),%edi		# old process
+#ifndef MULTIPROCESSOR
+	movl	$_C_LABEL(lwp0),%ebx
+	movl	L_ADDR(%ebx),%esi
+	movl	L_MD_TSS_SEL(%ebx),%edx
+#else
+	movl	CPUVAR(IDLE_PCB),%esi
+	movl	CPUVAR(IDLE_TSS_SEL),%edx
+#endif
+	/* In case we fault... */
+	movl	$0,CPUVAR(CURLWP)
+
+	/* Restore the idle context. */
+	CLI(%eax)
+
+	/* Restore stack pointers. */
+	movl	PCB_ESP(%esi),%esp
+	movl	PCB_EBP(%esi),%ebp
+
+	/* Switch TSS. */
+    	movl	PCB_TSS_ESP0(%esi),%ecx
+	movl	PCB_TSS_SS0(%esi),%ebx
+	movl	$__HYPERVISOR_stack_switch,%eax
+	TRAP_INSTR
+
+	/* Reset FPU state if necessary. */
+	movl	PCB_CR0(%esi),%ecx
+	testl	$CR0_TS,%ecx
+	jz	1f
+	andl	$~CR0_TS,%ecx
+	movl	%ecx,PCB_CR0(%esi)
+	movl	$__HYPERVISOR_fpu_taskswitch,%eax
+	TRAP_INSTR
+1:
+
+	/* Record new pcb. */
+	SET_CURPCB(%esi)
+
+	/* Interrupts are okay again. */
+	STI(%eax)
+
+	/*
+	 * Schedule the dead LWP's stack to be freed.
+	 */
+	pushl	%edi
+	call	_C_LABEL(lwp_exit2)
+	addl	$4,%esp
+
+	/* Jump into cpu_switch() with the right state. */
+	xorl	%esi,%esi
+	movl	%esi,CPUVAR(CURLWP)
+	jmp	idle_start
+
+/*
+ * void savectx(struct pcb *pcb);
+ * Update pcb, saving current processor state.
+ */
+/* LINTSTUB: Func: void savectx(struct pcb *pcb) */
+ENTRY(savectx)
+	movl	4(%esp),%edx		# edx = p->p_addr
+  
+	/* Save stack pointers. */
+	movl	%esp,PCB_ESP(%edx)
+	movl	%ebp,PCB_EBP(%edx)
+
+	ret
+
+/*
+ * Old call gate entry for syscall
+ */
+/* LINTSTUB: Var: char Xosyscall[1]; */
+IDTVEC(osyscall)
+	/* Set eflags in trap frame. */
+	pushfl
+	popl	8(%esp)
+	pushl	$7		# size of instruction for restart
+	jmp	syscall1
+
+/*
+ * Trap gate entry for syscall
+ */
+/* LINTSTUB: Var: char Xsyscall[1]; */
+IDTVEC(syscall)
+	pushl	$2		# size of instruction for restart
+syscall1:
+	pushl	$T_ASTFLT	# trap # for doing ASTs
+	INTRENTRY
+
+#ifdef DIAGNOSTIC
+	cmpl    $0, CPUVAR(WANT_PMAPLOAD)
+	jz	1f
+	pushl	$6f
+	call	_C_LABEL(printf)
+	addl	$4, %esp
+1:
+	movl	CPUVAR(ILEVEL),%ebx
+	testl	%ebx,%ebx
+	jz	1f
+	pushl	$5f
+	call	_C_LABEL(printf)
+	addl	$4,%esp
+#ifdef DDB
+	int	$3
+#endif
+1:	
+#endif /* DIAGNOSTIC */
+	movl	CPUVAR(CURLWP),%edx
+	movl	%esp,L_MD_REGS(%edx)	# save pointer to frame
+	movl	L_PROC(%edx),%edx
+	pushl	%esp
+	call	*P_MD_SYSCALL(%edx)	# get pointer to syscall() function
+	addl	$4,%esp
+syscall_checkast:
+	/* Check for ASTs on exit to user mode. */
+	CLI(%eax)
+	CHECK_ASTPENDING(%eax)
+	je	1f
+	/* Always returning to user mode here. */
+	CLEAR_ASTPENDING(%eax)
+	STI(%eax)
+	/* Pushed T_ASTFLT into tf_trapno on entry. */
+	pushl	%esp
+	call	_C_LABEL(trap)
+	addl	$4,%esp
+	jmp	syscall_checkast
+1:	STI(%eax)
+	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
+#ifndef DIAGNOSTIC
+	INTRFASTEXIT
+#else /* DIAGNOSTIC */
+	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
+	jne	3f
+	INTRFASTEXIT
+3:	pushl	$4f
+	call	_C_LABEL(printf)
+	addl	$4,%esp
+#ifdef DDB
+	int	$3
+#endif /* DDB */
+	movl	$IPL_NONE,CPUVAR(ILEVEL)
+	jmp	2b
+4:	.asciz	"WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n"
+5:	.asciz	"WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n"	
+6:	.asciz	"WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n"     
+#endif /* DIAGNOSTIC */
+9:	call    _C_LABEL(pmap_load)
+	jmp     syscall_checkast        /* re-check ASTs */
+
+#if NNPX > 0
+/*
+ * Special interrupt handlers.  Someday intr0-intr15 will be used to count
+ * interrupts.  We'll still need a special exception 16 handler.  The busy
+ * latch stuff in probintr() can be moved to npxprobe().
+ */
+
+/* LINTSTUB: Func: void probeintr(void) */
+NENTRY(probeintr)
+	ss
+	incl	_C_LABEL(npx_intrs_while_probing)
+	pushl	%eax
+	movb	$0x20,%al	# EOI (asm in strings loses cpp features)
+	outb	%al,$0xa0	# IO_ICU2
+	outb	%al,$0x20	# IO_ICU1
+	movb	$0,%al
+	outb	%al,$0xf0	# clear BUSY# latch
+	popl	%eax
+	iret
+
+/* LINTSTUB: Func: void probetrap(void) */
+NENTRY(probetrap)
+	ss
+	incl	_C_LABEL(npx_traps_while_probing)
+	fnclex
+	iret
+
+/* LINTSTUB: Func: int npx586bug1(int a, int b) */
+NENTRY(npx586bug1)
+	fildl	4(%esp)		# x
+	fildl	8(%esp)		# y
+	fld	%st(1)
+	fdiv	%st(1),%st	# x/y
+	fmulp	%st,%st(1)	# (x/y)*y
+	fsubrp	%st,%st(1)	# x-(x/y)*y
+	pushl	$0
+	fistpl	(%esp)
+	popl	%eax
+	ret
+#endif /* NNPX > 0 */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/machdep.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,2551 @@
+/*	$NetBSD: machdep.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: machdep.c,v 1.550 2004/03/05 11:34:17 junyoung Exp 	*/
+
+/*-
+ * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
+ * Simulation Facility, NASA Ames Research Center.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include "opt_beep.h"
+#include "opt_compat_ibcs2.h"
+#include "opt_compat_mach.h"	/* need to get the right segment def */
+#include "opt_compat_netbsd.h"
+#include "opt_compat_svr4.h"
+#include "opt_cpureset_delay.h"
+#include "opt_cputype.h"
+#include "opt_ddb.h"
+#include "opt_ipkdb.h"
+#include "opt_kgdb.h"
+#include "opt_mtrr.h"
+#include "opt_multiprocessor.h"
+#include "opt_realmem.h"
+#include "opt_user_ldt.h"
+#include "opt_vm86.h"
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/signal.h>
+#include <sys/signalvar.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/exec.h>
+#include <sys/buf.h>
+#include <sys/reboot.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/msgbuf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/extent.h>
+#include <sys/syscallargs.h>
+#include <sys/core.h>
+#include <sys/kcore.h>
+#include <sys/ucontext.h>
+#include <machine/kcore.h>
+#include <sys/ras.h>
+#include <sys/sa.h>
+#include <sys/savar.h>
+#include <sys/ksyms.h>
+
+#ifdef IPKDB
+#include <ipkdb/ipkdb.h>
+#endif
+
+#ifdef KGDB
+#include <sys/kgdb.h>
+#endif
+
+#include <dev/cons.h>
+
+#include <uvm/uvm_extern.h>
+#include <uvm/uvm_page.h>
+
+#include <sys/sysctl.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
+#include <machine/gdt.h>
+#include <machine/pio.h>
+#include <machine/psl.h>
+#include <machine/reg.h>
+#include <machine/specialreg.h>
+#include <machine/bootinfo.h>
+#include <machine/mtrr.h>
+
+#include <dev/isa/isareg.h>
+#include <machine/isa_machdep.h>
+#include <dev/ic/i8042reg.h>
+
+#ifdef DDB
+#include <machine/db_machdep.h>
+#include <ddb/db_extern.h>
+#endif
+
+#ifdef VM86
+#include <machine/vm86.h>
+#endif
+
+#include "acpi.h"
+#include "apm.h"
+#include "bioscall.h"
+
+#if NBIOSCALL > 0
+#include <machine/bioscall.h>
+#endif
+
+#if NACPI > 0
+#include <dev/acpi/acpivar.h>
+#define ACPI_MACHDEP_PRIVATE
+#include <machine/acpi_machdep.h>
+#endif
+
+#if NAPM > 0
+#include <machine/apmvar.h>
+#endif
+
+#include "isa.h"
+#include "isadma.h"
+#include "npx.h"
+#include "ksyms.h"
+
+#include "mca.h"
+#if NMCA > 0
+#include <machine/mca_machdep.h>	/* for mca_busprobe() */
+#endif
+
+#ifdef MULTIPROCESSOR		/* XXX */
+#include <machine/mpbiosvar.h>	/* XXX */
+#endif				/* XXX */
+
+#include <machine/xen.h>
+#include <machine/hypervisor.h>
+
+#if defined(DDB) || defined(KGDB)
+#include <ddb/db_interface.h>
+#include <ddb/db_output.h>
+
+void ddb_trap_hook(int);
+#endif
+
+/* #define	XENDEBUG */
+/* #define	XENDEBUG_LOW */
+
+#ifdef XENDEBUG
+#define	XENPRINTF(x) printf x
+#define	XENPRINTK(x) printk x
+#else
+#define	XENPRINTF(x)
+#define	XENPRINTK(x)
+#endif
+#define	PRINTK(x) printf x
+
+#ifdef XENDEBUG_LOW
+void xen_dbglow_init(void);
+#endif
+
+#ifndef BEEP_ONHALT_COUNT
+#define BEEP_ONHALT_COUNT 3
+#endif
+#ifndef BEEP_ONHALT_PITCH
+#define BEEP_ONHALT_PITCH 1500
+#endif
+#ifndef BEEP_ONHALT_PERIOD
+#define BEEP_ONHALT_PERIOD 250
+#endif
+
+/* the following is used externally (sysctl_hw) */
+char machine[] = "i386";		/* CPU "architecture" */
+char machine_arch[] = "i386";		/* machine == machine_arch */
+
+char bootinfo[BOOTINFO_MAXSIZE];
+
+struct bi_devmatch *i386_alldisks = NULL;
+int i386_ndisks = 0;
+
+#ifdef CPURESET_DELAY
+int	cpureset_delay = CPURESET_DELAY;
+#else
+int     cpureset_delay = 2000; /* default to 2s */
+#endif
+
+#ifdef MTRR
+struct mtrr_funcs *mtrr_funcs;
+#endif
+
+#ifdef COMPAT_NOMID
+static int exec_nomid  (struct proc *, struct exec_package *);
+#endif
+
+int	physmem;
+int	dumpmem_low;
+int	dumpmem_high;
+unsigned int cpu_feature;
+int	cpu_class;
+int	i386_fpu_present;
+int	i386_fpu_exception;
+int	i386_fpu_fdivbug;
+
+int	i386_use_fxsave;
+int	i386_has_sse;
+int	i386_has_sse2;
+
+int	tmx86_has_longrun;
+
+vaddr_t	msgbuf_vaddr;
+paddr_t msgbuf_paddr;
+
+vaddr_t	idt_vaddr;
+paddr_t	idt_paddr;
+
+#ifdef I586_CPU
+vaddr_t	pentium_idt_vaddr;
+#endif
+
+struct vm_map *exec_map = NULL;
+struct vm_map *mb_map = NULL;
+struct vm_map *phys_map = NULL;
+
+extern	paddr_t avail_start, avail_end;
+
+#ifdef ISA_CLOCK
+void (*delay_func)(int) = i8254_delay;
+void (*microtime_func)(struct timeval *) = i8254_microtime;
+void (*initclock_func)(void) = i8254_initclocks;
+#else
+void (*delay_func)(int) = xen_delay;
+void (*microtime_func)(struct timeval *) = xen_microtime;
+void (*initclock_func)(void) = xen_initclocks;
+#endif
+
+void hypervisor_callback(void);
+void failsafe_callback(void);
+
+/*
+ * Size of memory segments, before any memory is stolen.
+ */
+phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
+int	mem_cluster_cnt;
+
+int	cpu_dump(void);
+int	cpu_dumpsize(void);
+u_long	cpu_dump_mempagecnt(void);
+void	dumpsys(void);
+void	init386(paddr_t);
+void	initgdt(void);
+
+#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
+void	add_mem_cluster(u_int64_t, u_int64_t, u_int32_t);
+#endif /* !defnied(REALBASEMEM) && !defined(REALEXTMEM) */
+
+extern int time_adjusted;
+
+/*
+ * Machine-dependent startup code
+ */
+void
+cpu_startup()
+{
+	int x;
+	vaddr_t minaddr, maxaddr;
+	char pbuf[9];
+
+	/*
+	 * Initialize error message buffer (et end of core).
+	 */
+	msgbuf_vaddr = uvm_km_valloc(kernel_map, x86_round_page(MSGBUFSIZE));
+	if (msgbuf_vaddr == 0)
+		panic("failed to valloc msgbuf_vaddr");
+
+	/* msgbuf_paddr was init'd in pmap */
+	for (x = 0; x < btoc(MSGBUFSIZE); x++)
+		pmap_kenter_pa((vaddr_t)msgbuf_vaddr + x * PAGE_SIZE,
+		    msgbuf_paddr + x * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE);
+	pmap_update(pmap_kernel());
+
+	initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE));
+
+	printf("%s", version);
+
+#ifdef TRAPLOG
+	/*
+	 * Enable recording of branch from/to in MSR's
+	 */
+	wrmsr(MSR_DEBUGCTLMSR, 0x1);
+#endif
+
+	format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
+	printf("total memory = %s\n", pbuf);
+
+	minaddr = 0;
+
+	/*
+	 * Allocate a submap for exec arguments.  This map effectively
+	 * limits the number of processes exec'ing at any time.
+	 */
+	exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
+				   16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
+
+	/*
+	 * Allocate a submap for physio
+	 */
+	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
+				   VM_PHYS_SIZE, 0, FALSE, NULL);
+
+	/*
+	 * Finally, allocate mbuf cluster submap.
+	 */
+	mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
+	    nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL);
+
+	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
+	printf("avail memory = %s\n", pbuf);
+
+	/* Safe for i/o port / memory space allocation to use malloc now. */
+	x86_bus_space_mallocok();
+}
+
+/*
+ * Set up proc0's TSS and LDT.
+ */
+void
+i386_proc0_tss_ldt_init()
+{
+	struct pcb *pcb;
+	int x;
+
+	gdt_init();
+
+	cpu_info_primary.ci_curpcb = pcb = &lwp0.l_addr->u_pcb;
+
+	pcb->pcb_tss.tss_ioopt =
+	    ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
+
+	for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
+		pcb->pcb_iomap[x] = 0xffffffff;
+
+	pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
+	pcb->pcb_cr0 = rcr0();
+	pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+	pcb->pcb_tss.tss_esp0 = (int)lwp0.l_addr + USPACE - 16;
+	lwp0.l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
+	lwp0.l_md.md_tss_sel = tss_alloc(pcb);
+
+#ifndef XEN
+	ltr(lwp0.l_md.md_tss_sel);
+	lldt(pcb->pcb_ldt_sel);
+#else
+	XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
+		      (void *)pcb->pcb_tss.tss_esp0,
+		      pcb->pcb_tss.tss_ss0, IDXSEL(pcb->pcb_tss.tss_ss0)));
+	HYPERVISOR_stack_switch(pcb->pcb_tss.tss_ss0, pcb->pcb_tss.tss_esp0);
+#endif
+}
+
+/*
+ * Set up TSS and LDT for a new PCB.
+ */
+
+void
+i386_init_pcb_tss_ldt(ci)
+	struct cpu_info *ci;
+{
+	int x;
+	struct pcb *pcb = ci->ci_idle_pcb;
+
+	pcb->pcb_tss.tss_ioopt =
+	    ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
+	for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
+		pcb->pcb_iomap[x] = 0xffffffff;
+
+	pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
+	pcb->pcb_cr0 = rcr0();
+
+	ci->ci_idle_tss_sel = tss_alloc(pcb);
+}
+
+/*
+ * sysctl helper routine for machdep.tm* nodes.
+ */
+static int
+sysctl_machdep_tm_longrun(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node;
+	int io, error;
+
+	if (!tmx86_has_longrun)
+		return (EOPNOTSUPP);
+
+	node = *rnode;
+	node.sysctl_data = &io;
+
+	switch (rnode->sysctl_num) {
+	case CPU_TMLR_MODE:
+		io = (int)(crusoe_longrun = tmx86_get_longrun_mode());
+		break;
+	case CPU_TMLR_FREQUENCY:
+		tmx86_get_longrun_status_all();
+		io = crusoe_frequency;
+		break;
+	case CPU_TMLR_VOLTAGE:
+		tmx86_get_longrun_status_all();
+		io = crusoe_voltage;
+		break;
+	case CPU_TMLR_PERCENTAGE:
+		tmx86_get_longrun_status_all();
+		io = crusoe_percentage;
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return (error);
+
+	if (rnode->sysctl_num == CPU_TMLR_MODE) {
+		if (tmx86_set_longrun_mode(io))
+			crusoe_longrun = (u_int)io;
+		else
+			return (EINVAL);
+	}
+
+	return (0);
+}
+
+/*
+ * sysctl helper routine for machdep.booted_kernel
+ */
+static int
+sysctl_machdep_booted_kernel(SYSCTLFN_ARGS)
+{
+	struct btinfo_bootpath *bibp;
+	struct sysctlnode node;
+
+	bibp = lookup_bootinfo(BTINFO_BOOTPATH);
+	if(!bibp)
+		return(ENOENT); /* ??? */
+
+	node = *rnode;
+	node.sysctl_data = bibp->bootpath;
+	node.sysctl_size = sizeof(bibp->bootpath);
+	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
+}
+
+/*
+ * sysctl helper routine for machdep.diskinfo
+ */
+static int
+sysctl_machdep_diskinfo(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node;
+
+	node = *rnode;
+	node.sysctl_data = i386_alldisks;
+	node.sysctl_size = sizeof(struct disklist) +
+	    (i386_ndisks - 1) * sizeof(struct nativedisk_info);
+        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
+}
+
+/*
+ * machine dependent system variables.
+ */
+SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
+{
+
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_NODE, "machdep", NULL,
+		       NULL, 0, NULL, 0,
+		       CTL_MACHDEP, CTL_EOL);
+
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_STRUCT, "console_device", NULL,
+		       sysctl_consdev, 0, NULL, sizeof(dev_t),
+		       CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "biosbasemem", NULL,
+		       NULL, 0, &biosbasemem, 0,
+		       CTL_MACHDEP, CPU_BIOSBASEMEM, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "biosextmem", NULL,
+		       NULL, 0, &biosextmem, 0,
+		       CTL_MACHDEP, CPU_BIOSEXTMEM, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "nkpde", NULL,
+		       NULL, 0, &nkpde, 0,
+		       CTL_MACHDEP, CPU_NKPDE, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_STRING, "booted_kernel", NULL,
+		       sysctl_machdep_booted_kernel, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_STRUCT, "diskinfo", NULL,
+		       sysctl_machdep_diskinfo, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_DISKINFO, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "fpu_present", NULL,
+		       NULL, 0, &i386_fpu_present, 0,
+		       CTL_MACHDEP, CPU_FPU_PRESENT, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "osfxsr", NULL,
+		       NULL, 0, &i386_use_fxsave, 0,
+		       CTL_MACHDEP, CPU_OSFXSR, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "sse", NULL,
+		       NULL, 0, &i386_has_sse, 0,
+		       CTL_MACHDEP, CPU_SSE, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "sse2", NULL,
+		       NULL, 0, &i386_has_sse2, 0,
+		       CTL_MACHDEP, CPU_SSE2, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT|SYSCTL_READWRITE,
+		       CTLTYPE_INT, "tm_longrun_mode", NULL,
+		       sysctl_machdep_tm_longrun, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_TMLR_MODE, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "tm_longrun_frequency", NULL,
+		       sysctl_machdep_tm_longrun, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_TMLR_FREQUENCY, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "tm_longrun_voltage", NULL,
+		       sysctl_machdep_tm_longrun, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_TMLR_VOLTAGE, CTL_EOL);
+	sysctl_createv(SYSCTL_PERMANENT,
+		       CTLTYPE_INT, "tm_longrun_percentage", NULL,
+		       sysctl_machdep_tm_longrun, 0, NULL, 0,
+		       CTL_MACHDEP, CPU_TMLR_PERCENTAGE, CTL_EOL);
+}
+
+void *
+getframe(struct lwp *l, int sig, int *onstack)
+{
+	struct proc *p = l->l_proc;
+	struct sigctx *ctx = &p->p_sigctx;
+	struct trapframe *tf = l->l_md.md_regs;
+
+	/* Do we need to jump onto the signal stack? */
+	*onstack = (ctx->ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
+	    && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
+	if (*onstack)
+		return (char *)ctx->ps_sigstk.ss_sp + ctx->ps_sigstk.ss_size;
+#ifdef VM86
+	if (tf->tf_eflags & PSL_VM)
+		return (void *)(tf->tf_esp + (tf->tf_ss << 4));
+	else
+#endif
+		return (void *)tf->tf_esp;
+}
+
+/*
+ * Build context to run handler in.  We invoke the handler
+ * directly, only returning via the trampoline.  Note the
+ * trampoline version numbers are coordinated with machine-
+ * dependent code in libc.
+ */
+void
+buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
+{
+	struct trapframe *tf = l->l_md.md_regs;
+
+	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_eip = (int)catcher;
+	tf->tf_cs = GSEL(sel, SEL_UPL);
+	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
+	tf->tf_esp = (int)fp;
+	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+}
+
+static void
+sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
+{
+	struct lwp *l = curlwp;
+	struct proc *p = l->l_proc;
+	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
+	int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
+	    GUCODEBIG_SEL : GUCODE_SEL;
+	struct sigacts *ps = p->p_sigacts;
+	int onstack;
+	int sig = ksi->ksi_signo;
+	struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
+	sig_t catcher = SIGACTION(p, sig).sa_handler;
+	struct trapframe *tf = l->l_md.md_regs;
+
+	fp--;
+
+	/* Build stack frame for signal trampoline. */
+	switch (ps->sa_sigdesc[sig].sd_vers) {
+	case 0:		/* handled by sendsig_sigcontext */
+	case 1:		/* handled by sendsig_sigcontext */
+	default:	/* unknown version */
+		printf("nsendsig: bad version %d\n",
+		    ps->sa_sigdesc[sig].sd_vers);
+		sigexit(l, SIGILL);
+	case 2:
+		break;
+	}
+
+	frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
+	frame.sf_signum = sig;
+	frame.sf_sip = &fp->sf_si;
+	frame.sf_ucp = &fp->sf_uc;
+	frame.sf_si._info = ksi->ksi_info;
+	frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
+	frame.sf_uc.uc_sigmask = *mask;
+	frame.sf_uc.uc_link = NULL;
+	frame.sf_uc.uc_flags |= (p->p_sigctx.ps_sigstk.ss_flags & SS_ONSTACK)
+	    ? _UC_SETSTACK : _UC_CLRSTACK;
+	memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
+	cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
+
+	if (tf->tf_eflags & PSL_VM)
+		(*p->p_emul->e_syscall_intern)(p);
+
+	if (copyout(&frame, fp, sizeof(frame)) != 0) {
+		/*
+		 * Process has trashed its stack; give it an illegal
+		 * instruction to halt it in its tracks.
+		 */
+		sigexit(l, SIGILL);
+		/* NOTREACHED */
+	}
+
+	buildcontext(l, sel, catcher, fp);
+
+	/* Remember that we're now on the signal stack. */
+	if (onstack)
+		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
+}
+
+void
+sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
+{
+#ifdef COMPAT_16
+	if (curproc->p_sigacts->sa_sigdesc[ksi->ksi_signo].sd_vers < 2)
+		sendsig_sigcontext(ksi, mask);
+	else
+#endif
+		sendsig_siginfo(ksi, mask);
+}
+
+void
+cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted, void *sas,
+    void *ap, void *sp, sa_upcall_t upcall)
+{
+	struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
+	struct saframe *sf, frame;
+	struct trapframe *tf;
+
+	tf = l->l_md.md_regs;
+
+	/* Finally, copy out the rest of the frame. */
+	frame.sa_type = type;
+	frame.sa_sas = sas;
+	frame.sa_events = nevents;
+	frame.sa_interrupted = ninterrupted;
+	frame.sa_arg = ap;
+	frame.sa_ra = 0;
+
+	sf = (struct saframe *)sp - 1;
+	if (copyout(&frame, sf, sizeof(frame)) != 0) {
+		/* Copying onto the stack didn't work. Die. */
+		sigexit(l, SIGILL);
+		/* NOTREACHED */
+	}
+
+	tf->tf_eip = (int) upcall;
+	tf->tf_esp = (int) sf;
+	tf->tf_ebp = 0; /* indicate call-frame-top to debuggers */
+	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
+	    GSEL(GUCODEBIG_SEL, SEL_UPL) : GSEL(GUCODE_SEL, SEL_UPL);
+	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
+}
+
+int	waittime = -1;
+struct pcb dumppcb;
+
+void
+cpu_reboot(howto, bootstr)
+	int howto;
+	char *bootstr;
+{
+
+	if (cold) {
+		howto |= RB_HALT;
+		goto haltsys;
+	}
+
+	boothowto = howto;
+	if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
+		waittime = 0;
+		vfs_shutdown();
+		/*
+		 * If we've been adjusting the clock, the todr
+		 * will be out of synch; adjust it now.
+		 */
+		if (time_adjusted != 0)
+			resettodr();
+	}
+
+	/* Disable interrupts. */
+	splhigh();
+
+	/* Do a dump if requested. */
+	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
+		dumpsys();
+
+haltsys:
+	doshutdownhooks();
+
+#ifdef MULTIPROCESSOR
+	x86_broadcast_ipi(X86_IPI_HALT);
+#endif
+
+	if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
+#if NACPI > 0
+		if (acpi_softc != NULL) {
+			delay(500000);
+			acpi_enter_sleep_state(acpi_softc, ACPI_STATE_S5);
+			printf("WARNING: ACPI powerdown failed!\n");
+		}
+#endif
+#if NAPM > 0 && !defined(APM_NO_POWEROFF)
+		/* turn off, if we can.  But try to turn disk off and
+		 * wait a bit first--some disk drives are slow to clean up
+		 * and users have reported disk corruption.
+		 */
+		delay(500000);
+		apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
+		delay(500000);
+		apm_set_powstate(APM_DEV_ALLDEVS, APM_SYS_OFF);
+		printf("WARNING: APM powerdown failed!\n");
+		/*
+		 * RB_POWERDOWN implies RB_HALT... fall into it...
+		 */
+#endif
+	}
+
+	if (howto & RB_HALT) {
+		printf("\n");
+		printf("The operating system has halted.\n");
+		printf("Please press any key to reboot.\n\n");
+
+#ifdef BEEP_ONHALT
+		{
+			int c;
+			for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
+				sysbeep(BEEP_ONHALT_PITCH,
+				        BEEP_ONHALT_PERIOD * hz / 1000);
+				delay(BEEP_ONHALT_PERIOD * 1000);
+				sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
+				delay(BEEP_ONHALT_PERIOD * 1000);
+			}
+		}
+#endif
+
+		cnpollc(1);	/* for proper keyboard command handling */
+		if (cngetc() == 0) {
+			/* no console attached, so just hlt */
+			for(;;) {
+				__asm __volatile("hlt");
+			}
+		}
+		cnpollc(0);
+	}
+
+	printf("rebooting...\n");
+	if (cpureset_delay > 0)
+		delay(cpureset_delay * 1000);
+	cpu_reset();
+	for(;;) ;
+	/*NOTREACHED*/
+}
+
+/*
+ * These variables are needed by /sbin/savecore
+ */
+u_int32_t dumpmag = 0x8fca0101;	/* magic number */
+int 	dumpsize = 0;		/* pages */
+long	dumplo = 0; 		/* blocks */
+
+/*
+ * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers.
+ */
+int
+cpu_dumpsize()
+{
+	int size;
+
+	size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
+	    ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
+	if (roundup(size, dbtob(1)) != dbtob(1))
+		return (-1);
+
+	return (1);
+}
+
+/*
+ * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped.
+ */
+u_long
+cpu_dump_mempagecnt()
+{
+	u_long i, n;
+
+	n = 0;
+	for (i = 0; i < mem_cluster_cnt; i++)
+		n += atop(mem_clusters[i].size);
+	return (n);
+}
+
+/*
+ * cpu_dump: dump the machine-dependent kernel core dump headers.
+ */
+int
+cpu_dump()
+{
+	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
+	char buf[dbtob(1)];
+	kcore_seg_t *segp;
+	cpu_kcore_hdr_t *cpuhdrp;
+	phys_ram_seg_t *memsegp;
+	const struct bdevsw *bdev;
+	int i;
+
+	bdev = bdevsw_lookup(dumpdev);
+	if (bdev == NULL)
+		return (ENXIO);
+	dump = bdev->d_dump;
+
+	memset(buf, 0, sizeof buf);
+	segp = (kcore_seg_t *)buf;
+	cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))];
+	memsegp = (phys_ram_seg_t *)&buf[ ALIGN(sizeof(*segp)) +
+	    ALIGN(sizeof(*cpuhdrp))];
+
+	/*
+	 * Generate a segment header.
+	 */
+	CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
+	segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
+
+	/*
+	 * Add the machine-dependent header info.
+	 */
+	cpuhdrp->ptdpaddr = PTDpaddr;
+	cpuhdrp->nmemsegs = mem_cluster_cnt;
+
+	/*
+	 * Fill in the memory segment descriptors.
+	 */
+	for (i = 0; i < mem_cluster_cnt; i++) {
+		memsegp[i].start = mem_clusters[i].start;
+		memsegp[i].size = mem_clusters[i].size;
+	}
+
+	return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
+}
+
+/*
+ * This is called by main to set dumplo and dumpsize.
+ * Dumps always skip the first PAGE_SIZE of disk space
+ * in case there might be a disk label stored there.
+ * If there is extra space, put dump at the end to
+ * reduce the chance that swapping trashes it.
+ */
+void
+cpu_dumpconf()
+{
+	const struct bdevsw *bdev;
+	int nblks, dumpblks;	/* size of dump area */
+
+	if (dumpdev == NODEV)
+		goto bad;
+	bdev = bdevsw_lookup(dumpdev);
+	if (bdev == NULL)
+		panic("dumpconf: bad dumpdev=0x%x", dumpdev);
+	if (bdev->d_psize == NULL)
+		goto bad;
+	nblks = (*bdev->d_psize)(dumpdev);
+	if (nblks <= ctod(1))
+		goto bad;
+
+	dumpblks = cpu_dumpsize();
+	if (dumpblks < 0)
+		goto bad;
+	dumpblks += ctod(cpu_dump_mempagecnt());
+
+	/* If dump won't fit (incl. room for possible label), punt. */
+	if (dumpblks > (nblks - ctod(1)))
+		goto bad;
+
+	/* Put dump at end of partition */
+	dumplo = nblks - dumpblks;
+
+	/* dumpsize is in page units, and doesn't include headers. */
+	dumpsize = cpu_dump_mempagecnt();
+	return;
+
+ bad:
+	dumpsize = 0;
+}
+
+/*
+ * Doadump comes here after turning off memory management and
+ * getting on the dump stack, either when called above, or by
+ * the auto-restart code.
+ */
+#define BYTES_PER_DUMP  PAGE_SIZE /* must be a multiple of pagesize XXX small */
+static vaddr_t dumpspace;
+
+vaddr_t
+reserve_dumppages(p)
+	vaddr_t p;
+{
+
+	dumpspace = p;
+	return (p + BYTES_PER_DUMP);
+}
+
+void
+dumpsys()
+{
+	u_long totalbytesleft, bytes, i, n, memseg;
+	u_long maddr;
+	int psize;
+	daddr_t blkno;
+	const struct bdevsw *bdev;
+	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
+	int error;
+
+	/* Save registers. */
+	savectx(&dumppcb);
+
+	if (dumpdev == NODEV)
+		return;
+
+	bdev = bdevsw_lookup(dumpdev);
+	if (bdev == NULL || bdev->d_psize == NULL)
+		return;
+
+	/*
+	 * For dumps during autoconfiguration,
+	 * if dump device has already configured...
+	 */
+	if (dumpsize == 0)
+		cpu_dumpconf();
+	if (dumplo <= 0 || dumpsize == 0) {
+		printf("\ndump to dev %u,%u not possible\n", major(dumpdev),
+		    minor(dumpdev));
+		return;
+	}
+	printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev),
+	    minor(dumpdev), dumplo);
+
+	psize = (*bdev->d_psize)(dumpdev);
+	printf("dump ");
+	if (psize == -1) {
+		printf("area unavailable\n");
+		return;
+	}
+
+#if 0	/* XXX this doesn't work.  grr. */
+        /* toss any characters present prior to dump */
+	while (sget() != NULL); /*syscons and pccons differ */
+#endif
+
+	if ((error = cpu_dump()) != 0)
+		goto err;
+
+	totalbytesleft = ptoa(cpu_dump_mempagecnt());
+	blkno = dumplo + cpu_dumpsize();
+	dump = bdev->d_dump;
+	error = 0;
+
+	for (memseg = 0; memseg < mem_cluster_cnt; memseg++) {
+		maddr = mem_clusters[memseg].start;
+		bytes = mem_clusters[memseg].size;
+
+		for (i = 0; i < bytes; i += n, totalbytesleft -= n) {
+			/* Print out how many MBs we have left to go. */
+			if ((totalbytesleft % (1024*1024)) == 0)
+				printf("%ld ", totalbytesleft / (1024 * 1024));
+
+			/* Limit size for next transfer. */
+			n = bytes - i;
+			if (n > BYTES_PER_DUMP)
+				n = BYTES_PER_DUMP;
+
+			(void) pmap_map(dumpspace, maddr, maddr + n,
+			    VM_PROT_READ);
+
+			error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, n);
+			if (error)
+				goto err;
+			maddr += n;
+			blkno += btodb(n);		/* XXX? */
+
+#if 0	/* XXX this doesn't work.  grr. */
+			/* operator aborting dump? */
+			if (sget() != NULL) {
+				error = EINTR;
+				break;
+			}
+#endif
+		}
+	}
+
+ err:
+	switch (error) {
+
+	case ENXIO:
+		printf("device bad\n");
+		break;
+
+	case EFAULT:
+		printf("device not ready\n");
+		break;
+
+	case EINVAL:
+		printf("area improper\n");
+		break;
+
+	case EIO:
+		printf("i/o error\n");
+		break;
+
+	case EINTR:
+		printf("aborted from console\n");
+		break;
+
+	case 0:
+		printf("succeeded\n");
+		break;
+
+	default:
+		printf("error %d\n", error);
+		break;
+	}
+	printf("\n\n");
+	delay(5000000);		/* 5 seconds */
+}
+
+/*
+ * Clear registers on exec
+ */
+void
+setregs(l, pack, stack)
+	struct lwp *l;
+	struct exec_package *pack;
+	u_long stack;
+{
+	struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
+	struct pcb *pcb = &l->l_addr->u_pcb;
+	struct trapframe *tf;
+
+#if NNPX > 0
+	/* If we were using the FPU, forget about it. */
+	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
+		npxsave_lwp(l, 0);
+#endif
+
+#ifdef USER_LDT
+	pmap_ldt_cleanup(l);
+#endif
+
+	l->l_md.md_flags &= ~MDL_USEDFPU;
+	if (i386_use_fxsave) {
+		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__;
+		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
+	} else
+		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__;
+
+	tf = l->l_md.md_regs;
+	tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
+	tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
+	tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
+	tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
+	tf->tf_edi = 0;
+	tf->tf_esi = 0;
+	tf->tf_ebp = 0;
+	tf->tf_ebx = (int)l->l_proc->p_psstr;
+	tf->tf_edx = 0;
+	tf->tf_ecx = 0;
+	tf->tf_eax = 0;
+	tf->tf_eip = pack->ep_entry;
+	tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
+	    LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
+	tf->tf_eflags = PSL_USERSET;
+	tf->tf_esp = stack;
+	tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
+}
+
+/*
+ * Initialize segments and descriptor tables
+ */
+
+union	descriptor *gdt, *ldt;
+struct gate_descriptor *idt;
+char idt_allocmap[NIDT];
+struct simplelock idt_lock = SIMPLELOCK_INITIALIZER;
+#ifdef I586_CPU
+union	descriptor *pentium_idt;
+#endif
+extern  struct user *proc0paddr;
+
+void
+setgate(gd, func, args, type, dpl, sel)
+	struct gate_descriptor *gd;
+	void *func;
+	int args, type, dpl, sel;
+{
+
+	gd->gd_looffset = (int)func;
+	gd->gd_selector = sel;
+	gd->gd_stkcpy = args;
+	gd->gd_xx = 0;
+	gd->gd_type = type;
+	gd->gd_dpl = dpl;
+	gd->gd_p = 1;
+	gd->gd_hioffset = (int)func >> 16;
+}
+
+void
+unsetgate(gd)
+	struct gate_descriptor *gd;
+{
+	gd->gd_p = 0;
+	gd->gd_hioffset = 0;
+	gd->gd_looffset = 0;
+	gd->gd_selector = 0;
+	gd->gd_xx = 0;
+	gd->gd_stkcpy = 0;
+	gd->gd_type = 0;
+	gd->gd_dpl = 0;
+}
+
+
+void
+setregion(rd, base, limit)
+	struct region_descriptor *rd;
+	void *base;
+	size_t limit;
+{
+
+	rd->rd_limit = (int)limit;
+	rd->rd_base = (int)base;
+}
+
+void
+setsegment(sd, base, limit, type, dpl, def32, gran)
+	struct segment_descriptor *sd;
+	void *base;
+	size_t limit;
+	int type, dpl, def32, gran;
+{
+
+	sd->sd_lolimit = (int)limit;
+	sd->sd_lobase = (int)base;
+	sd->sd_type = type;
+	sd->sd_dpl = dpl;
+	sd->sd_p = 1;
+	sd->sd_hilimit = (int)limit >> 16;
+	sd->sd_xx = 0;
+	sd->sd_def32 = def32;
+	sd->sd_gran = gran;
+	sd->sd_hibase = (int)base >> 24;
+}
+
+#define	IDTVEC(name)	__CONCAT(X, name)
+typedef void (vector)(void);
+extern vector IDTVEC(syscall);
+extern vector IDTVEC(osyscall);
+extern vector *IDTVEC(exceptions)[];
+#ifdef COMPAT_SVR4
+extern vector IDTVEC(svr4_fasttrap);
+#endif /* COMPAT_SVR4 */
+#ifdef COMPAT_MACH
+extern vector IDTVEC(mach_trap);
+#endif
+#define MAX_XEN_IDT 128
+trap_info_t xen_idt[MAX_XEN_IDT];
+int xen_idt_idx;
+
+#define	KBTOB(x)	((size_t)(x) * 1024UL)
+
+void cpu_init_idt()
+{
+	struct region_descriptor region;
+
+	panic("cpu_init_idt");
+#ifdef I586_CPU
+	setregion(&region, pentium_idt, NIDT * sizeof(idt[0]) - 1);
+#else
+	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
+#endif
+        lidt(&region);
+}
+
+#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
+void
+add_mem_cluster(seg_start, seg_end, type)
+	u_int64_t seg_start, seg_end;
+	u_int32_t type;
+{
+	extern struct extent *iomem_ex;
+	int i;
+
+	if (seg_end > 0x100000000ULL) {
+		printf("WARNING: skipping large "
+		    "memory map entry: "
+		    "0x%qx/0x%qx/0x%x\n",
+		    seg_start,
+		    (seg_end - seg_start),
+		    type);
+		return;
+	}
+
+	/*
+	 * XXX Chop the last page off the size so that
+	 * XXX it can fit in avail_end.
+	 */
+	if (seg_end == 0x100000000ULL)
+		seg_end -= PAGE_SIZE;
+
+	if (seg_end <= seg_start)
+		return;
+
+	for (i = 0; i < mem_cluster_cnt; i++) {
+		if ((mem_clusters[i].start == round_page(seg_start))
+		    && (mem_clusters[i].size
+			    == trunc_page(seg_end) - mem_clusters[i].start)) {
+#ifdef DEBUG_MEMLOAD
+			printf("WARNING: skipping duplicate segment entry\n");
+#endif
+			return;
+		}
+	}
+
+	/*
+	 * Allocate the physical addresses used by RAM
+	 * from the iomem extent map.  This is done before
+	 * the addresses are page rounded just to make
+	 * sure we get them all.
+	 */
+	if (extent_alloc_region(iomem_ex, seg_start,
+	    seg_end - seg_start, EX_NOWAIT)) {
+		/* XXX What should we do? */
+		printf("WARNING: CAN'T ALLOCATE "
+		    "MEMORY SEGMENT "
+		    "(0x%qx/0x%qx/0x%x) FROM "
+		    "IOMEM EXTENT MAP!\n",
+		    seg_start, seg_end - seg_start, type);
+		return;
+	}
+
+	/*
+	 * If it's not free memory, skip it.
+	 */
+	if (type != BIM_Memory)
+		return;
+
+	/* XXX XXX XXX */
+	if (mem_cluster_cnt >= VM_PHYSSEG_MAX)
+		panic("init386: too many memory segments");
+
+	seg_start = round_page(seg_start);
+	seg_end = trunc_page(seg_end);
+
+	if (seg_start == seg_end)
+		return;
+
+	mem_clusters[mem_cluster_cnt].start = seg_start;
+	mem_clusters[mem_cluster_cnt].size =
+	    seg_end - seg_start;
+
+	if (avail_end < seg_end)
+		avail_end = seg_end;
+	physmem += atop(mem_clusters[mem_cluster_cnt].size);
+	mem_cluster_cnt++;
+}
+#endif /* !defined(REALBASEMEM) && !defined(REALEXTMEM) */
+
+void
+initgdt()
+{
+#if !defined(XEN)
+	struct region_descriptor region;
+#else
+	paddr_t frames[16];
+#endif
+
+#if !defined(XEN)
+	gdt = tgdt;
+	memset(gdt, 0, NGDT*sizeof(*gdt));
+#endif
+	/* make gdt gates and memory segments */
+	setsegment(&gdt[GCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 1, 1);
+	setsegment(&gdt[GDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 1, 1);
+	setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
+	    SDT_MEMERA, SEL_UPL, 1, 1);
+	setsegment(&gdt[GUCODEBIG_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
+	    SDT_MEMERA, SEL_UPL, 1, 1);
+	setsegment(&gdt[GUDATA_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
+	    SDT_MEMRWA, SEL_UPL, 1, 1);
+#ifdef COMPAT_MACH
+	setgate(&gdt[GMACHCALLS_SEL].gd, &IDTVEC(mach_trap), 1,
+	    SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+#if NBIOSCALL > 0
+	/* bios trampoline GDT entries */
+	setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 0,
+	    0);
+	setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 0,
+	    0);
+#endif
+	setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary,
+	    sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1);
+
+#if !defined(XEN)
+	setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1);
+	lgdt(&region);
+#else
+	frames[0] = xpmap_ptom((uint32_t)gdt - KERNTEXTOFF) >> PAGE_SHIFT;
+	/* pmap_kremove((vaddr_t)gdt, PAGE_SIZE); */
+	pmap_kenter_pa((vaddr_t)gdt, (uint32_t)gdt - KERNTEXTOFF,
+	    VM_PROT_READ);
+	XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT,
+	    LAST_RESERVED_GDT_ENTRY + 1));
+	if (HYPERVISOR_set_gdt(frames, LAST_RESERVED_GDT_ENTRY + 1))
+		panic("HYPERVISOR_set_gdt failed!\n");
+	lgdt_finish();
+#endif
+}
+
+void
+init386(first_avail)
+	paddr_t first_avail;
+{
+#if !defined(XEN)
+	union descriptor *tgdt;
+#endif
+	extern void consinit(void);
+#if !defined(XEN)
+	extern struct extent *iomem_ex;
+#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
+	struct btinfo_memmap *bim;
+#endif
+	struct region_descriptor region;
+#endif
+	int x;
+#if !defined(XEN)
+	int first16q;
+	u_int64_t seg_start, seg_end;
+	u_int64_t seg_start1, seg_end1;
+#endif
+	paddr_t realmode_reserved_start;
+	psize_t realmode_reserved_size;
+	int needs_earlier_install_pte0;
+#if NBIOSCALL > 0
+	extern int biostramp_image_size;
+	extern u_char biostramp_image[];
+#endif
+
+	XENPRINTK(("HYPERVISOR_shared_info %p\n", HYPERVISOR_shared_info));
+#ifdef XENDEBUG_LOW
+	xen_dbglow_init();
+#endif
+
+	cpu_probe_features(&cpu_info_primary);
+	cpu_feature = cpu_info_primary.ci_feature_flags;
+
+	/* not on Xen... */
+	cpu_feature &= ~(CPUID_PGE|CPUID_PSE|CPUID_MTRR|CPUID_FXSR);
+
+	lwp0.l_addr = proc0paddr;
+	cpu_info_primary.ci_curpcb = &lwp0.l_addr->u_pcb;
+
+	XENPRINTK(("proc0paddr %p pcb %p first_avail %p\n",
+	    proc0paddr, cpu_info_primary.ci_curpcb, (void *)first_avail));
+	XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PTDpaddr,
+		      (void *)atdevbase));
+
+	x86_bus_space_init();
+	consinit();	/* XXX SHOULD NOT BE DONE HERE */
+	/*
+	 * Initailize PAGE_SIZE-dependent variables.
+	 */
+	uvm_setpagesize();
+
+	/*
+	 * Saving SSE registers won't work if the save area isn't
+	 * 16-byte aligned.
+	 */
+	if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
+		panic("init386: pcb_savefpu not 16-byte aligned");
+
+	/*
+	 * Start with 2 color bins -- this is just a guess to get us
+	 * started.  We'll recolor when we determine the largest cache
+	 * sizes on the system.
+	 */
+	uvmexp.ncolors = 2;
+
+#if !defined(XEN)
+	/*
+	 * BIOS leaves data in physical page 0
+	 * Even if it didn't, our VM system doesn't like using zero as a
+	 * physical page number.
+	 * We may also need pages in low memory (one each) for secondary CPU
+	 * startup, for BIOS calls, and for ACPI, plus a page table page to map
+	 * them into the first few pages of the kernel's pmap.
+	 */
+	avail_start = PAGE_SIZE;
+#else
+	/* Make sure the end of the space used by the kernel is rounded. */
+	first_avail = round_page(first_avail);
+	avail_start = first_avail - KERNTEXTOFF;
+	avail_end = ptoa(xen_start_info.nr_pages);
+	mem_clusters[0].start = avail_start;
+	mem_clusters[0].size = avail_end - avail_start;
+	mem_cluster_cnt++;
+	physmem += atop(mem_clusters[0].size);
+#endif
+
+	/*
+	 * reserve memory for real-mode call
+	 */
+	needs_earlier_install_pte0 = 0;
+	realmode_reserved_start = 0;
+	realmode_reserved_size = 0;
+#if NBIOSCALL > 0
+	/* save us a page for trampoline code */
+	realmode_reserved_size += PAGE_SIZE;
+	needs_earlier_install_pte0 = 1;
+#endif
+#ifdef MULTIPROCESSOR						 /* XXX */
+#if !defined(XEN)
+	KASSERT(avail_start == PAGE_SIZE);			 /* XXX */
+#endif
+	if (realmode_reserved_size < MP_TRAMPOLINE)		 /* XXX */
+		realmode_reserved_size = MP_TRAMPOLINE;		 /* XXX */
+	needs_earlier_install_pte0 = 1;				 /* XXX */
+#endif								 /* XXX */
+#if NACPI > 0
+	/* trampoline code for wake handler */
+	realmode_reserved_size += ptoa(acpi_md_get_npages_of_wakecode()+1);
+	needs_earlier_install_pte0 = 1;
+#endif
+	if (needs_earlier_install_pte0) {
+		/* page table for directory entry 0 */
+		realmode_reserved_size += PAGE_SIZE;
+	}
+	if (realmode_reserved_size>0) {
+		realmode_reserved_start = avail_start;
+		avail_start += realmode_reserved_size;
+	}
+
+#ifdef DEBUG_MEMLOAD
+	printf("mem_cluster_count: %d\n", mem_cluster_cnt);
+#endif
+
+	/*
+	 * Call pmap initialization to make new kernel address space.
+	 * We must do this before loading pages into the VM system.
+	 */
+	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
+
+#if !defined(XEN)
+#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
+	/*
+	 * Check to see if we have a memory map from the BIOS (passed
+	 * to us by the boot program.
+	 */
+	bim = lookup_bootinfo(BTINFO_MEMMAP);
+	if (bim != NULL && bim->num > 0) {
+#ifdef DEBUG_MEMLOAD
+		printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num);
+#endif
+		for (x = 0; x < bim->num; x++) {
+#ifdef DEBUG_MEMLOAD
+			printf("    addr 0x%qx  size 0x%qx  type 0x%x\n",
+			    bim->entry[x].addr,
+			    bim->entry[x].size,
+			    bim->entry[x].type);
+#endif
+
+			/*
+			 * If the segment is not memory, skip it.
+			 */
+			switch (bim->entry[x].type) {
+			case BIM_Memory:
+			case BIM_ACPI:
+			case BIM_NVS:
+				break;
+			default:
+				continue;
+			}
+
+			/*
+			 * Sanity check the entry.
+			 * XXX Need to handle uint64_t in extent code
+			 * XXX and 64-bit physical addresses in i386
+			 * XXX port.
+			 */
+			seg_start = bim->entry[x].addr;
+			seg_end = bim->entry[x].addr + bim->entry[x].size;
+
+			/*
+			 *   Avoid Compatibility Holes.
+			 * XXX  Holes within memory space that allow access
+			 * XXX to be directed to the PC-compatible frame buffer
+			 * XXX (0xa0000-0xbffff),to adapter ROM space
+			 * XXX (0xc0000-0xdffff), and to system BIOS space
+			 * XXX (0xe0000-0xfffff).
+			 * XXX  Some laptop(for example,Toshiba Satellite2550X)
+			 * XXX report this area and occurred problems,
+			 * XXX so we avoid this area.
+			 */
+			if (seg_start < 0x100000 && seg_end > 0xa0000) {
+				printf("WARNING: memory map entry overlaps "
+				    "with ``Compatibility Holes'': "
+				    "0x%qx/0x%qx/0x%x\n", seg_start,
+				    seg_end - seg_start, bim->entry[x].type);
+				add_mem_cluster(seg_start, 0xa0000,
+				    bim->entry[x].type);
+				add_mem_cluster(0x100000, seg_end,
+				    bim->entry[x].type);
+			} else
+				add_mem_cluster(seg_start, seg_end,
+				    bim->entry[x].type);
+		}
+	}
+#endif /* ! REALBASEMEM && ! REALEXTMEM */
+	/*
+	 * If the loop above didn't find any valid segment, fall back to
+	 * former code.
+	 */
+	if (mem_cluster_cnt == 0) {
+		/*
+		 * Allocate the physical addresses used by RAM from the iomem
+		 * extent map.  This is done before the addresses are
+		 * page rounded just to make sure we get them all.
+		 */
+		if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem),
+		    EX_NOWAIT)) {
+			/* XXX What should we do? */
+			printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
+			    "IOMEM EXTENT MAP!\n");
+		}
+		mem_clusters[0].start = 0;
+		mem_clusters[0].size = trunc_page(KBTOB(biosbasemem));
+		physmem += atop(mem_clusters[0].size);
+		if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
+		    EX_NOWAIT)) {
+			/* XXX What should we do? */
+			printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
+			    "IOMEM EXTENT MAP!\n");
+		}
+#if NISADMA > 0
+		/*
+		 * Some motherboards/BIOSes remap the 384K of RAM that would
+		 * normally be covered by the ISA hole to the end of memory
+		 * so that it can be used.  However, on a 16M system, this
+		 * would cause bounce buffers to be allocated and used.
+		 * This is not desirable behaviour, as more than 384K of
+		 * bounce buffers might be allocated.  As a work-around,
+		 * we round memory down to the nearest 1M boundary if
+		 * we're using any isadma devices and the remapped memory
+		 * is what puts us over 16M.
+		 */
+		if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
+			char pbuf[9];
+
+			format_bytes(pbuf, sizeof(pbuf),
+			    biosextmem - (15*1024));
+			printf("Warning: ignoring %s of remapped memory\n",
+			    pbuf);
+			biosextmem = (15*1024);
+		}
+#endif
+		mem_clusters[1].start = IOM_END;
+		mem_clusters[1].size = trunc_page(KBTOB(biosextmem));
+		physmem += atop(mem_clusters[1].size);
+
+		mem_cluster_cnt = 2;
+
+		avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
+	}
+	/*
+	 * If we have 16M of RAM or less, just put it all on
+	 * the default free list.  Otherwise, put the first
+	 * 16M of RAM on a lower priority free list (so that
+	 * all of the ISA DMA'able memory won't be eaten up
+	 * first-off).
+	 */
+	if (avail_end <= (16 * 1024 * 1024))
+		first16q = VM_FREELIST_DEFAULT;
+	else
+		first16q = VM_FREELIST_FIRST16;
+
+	/* Make sure the end of the space used by the kernel is rounded. */
+	first_avail = round_page(first_avail);
+#endif
+
+	XENPRINTK(("load the memory cluster %p(%d) - %p(%ld)\n",
+	    (void *)avail_start, (int)atop(avail_start),
+	    (void *)ptoa(xen_start_info.nr_pages), xen_start_info.nr_pages));
+	uvm_page_physload(atop(avail_start), xen_start_info.nr_pages,
+	    atop(avail_start), xen_start_info.nr_pages,
+	    VM_FREELIST_DEFAULT);
+
+#if !defined(XEN)
+
+	/*
+	 * Now, load the memory clusters (which have already been
+	 * rounded and truncated) into the VM system.
+	 *
+	 * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL
+	 * IS LOADED AT IOM_END (1M).
+	 */
+	for (x = 0; x < mem_cluster_cnt; x++) {
+		seg_start = mem_clusters[x].start;
+		seg_end = mem_clusters[x].start + mem_clusters[x].size;
+		seg_start1 = 0;
+		seg_end1 = 0;
+
+		/*
+		 * Skip memory before our available starting point.
+		 */
+		if (seg_end <= avail_start)
+			continue;
+
+		if (avail_start >= seg_start && avail_start < seg_end) {
+			if (seg_start != 0)
+				panic("init386: memory doesn't start at 0");
+			seg_start = avail_start;
+			if (seg_start == seg_end)
+				continue;
+		}
+
+		/*
+		 * If this segment contains the kernel, split it
+		 * in two, around the kernel.
+		 */
+		if (seg_start <= IOM_END && first_avail <= seg_end) {
+			seg_start1 = first_avail;
+			seg_end1 = seg_end;
+			seg_end = IOM_END;
+		}
+
+		/* First hunk */
+		if (seg_start != seg_end) {
+			if (seg_start < (16 * 1024 * 1024) &&
+			    first16q != VM_FREELIST_DEFAULT) {
+				u_int64_t tmp;
+
+				if (seg_end > (16 * 1024 * 1024))
+					tmp = (16 * 1024 * 1024);
+				else
+					tmp = seg_end;
+
+				if (tmp != seg_start) {
+#ifdef DEBUG_MEMLOAD
+					printf("loading 0x%qx-0x%qx "
+					    "(0x%lx-0x%lx)\n",
+				    	    seg_start, tmp,
+				  	    atop(seg_start), atop(tmp));
+#endif
+					uvm_page_physload(atop(seg_start),
+				    	    atop(tmp), atop(seg_start),
+				    	    atop(tmp), first16q);
+				}
+				seg_start = tmp;
+			}
+
+			if (seg_start != seg_end) {
+#ifdef DEBUG_MEMLOAD
+				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
+				    seg_start, seg_end,
+				    atop(seg_start), atop(seg_end));
+#endif
+				uvm_page_physload(atop(seg_start),
+				    atop(seg_end), atop(seg_start),
+				    atop(seg_end), VM_FREELIST_DEFAULT);
+			}
+		}
+
+		/* Second hunk */
+		if (seg_start1 != seg_end1) {
+			if (seg_start1 < (16 * 1024 * 1024) &&
+			    first16q != VM_FREELIST_DEFAULT) {
+				u_int64_t tmp;
+
+				if (seg_end1 > (16 * 1024 * 1024))
+					tmp = (16 * 1024 * 1024);
+				else
+					tmp = seg_end1;
+
+				if (tmp != seg_start1) {
+#ifdef DEBUG_MEMLOAD
+					printf("loading 0x%qx-0x%qx "
+					    "(0x%lx-0x%lx)\n",
+				    	    seg_start1, tmp,
+				    	    atop(seg_start1), atop(tmp));
+#endif
+					uvm_page_physload(atop(seg_start1),
+				    	    atop(tmp), atop(seg_start1),
+				    	    atop(tmp), first16q);
+				}
+				seg_start1 = tmp;
+			}
+
+			if (seg_start1 != seg_end1) {
+#ifdef DEBUG_MEMLOAD
+				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
+				    seg_start1, seg_end1,
+				    atop(seg_start1), atop(seg_end1));
+#endif
+				uvm_page_physload(atop(seg_start1),
+				    atop(seg_end1), atop(seg_start1),
+				    atop(seg_end1), VM_FREELIST_DEFAULT);
+			}
+		}
+	}
+#endif
+
+	/*
+	 * Steal memory for the message buffer (at end of core).
+	 */
+	{
+		struct vm_physseg *vps;
+		psize_t sz = round_page(MSGBUFSIZE);
+		psize_t reqsz = sz;
+
+		for (x = 0; x < vm_nphysseg; x++) {
+			vps = &vm_physmem[x];
+			if (ptoa(vps->avail_end) == avail_end)
+				goto found;
+		}
+		panic("init386: can't find end of memory");
+
+	found:
+		/* Shrink so it'll fit in the last segment. */
+		if ((vps->avail_end - vps->avail_start) < atop(sz))
+			sz = ptoa(vps->avail_end - vps->avail_start);
+
+		vps->avail_end -= atop(sz);
+		vps->end -= atop(sz);
+		msgbuf_paddr = ptoa(vps->avail_end);
+
+		/* Remove the last segment if it now has no pages. */
+		if (vps->start == vps->end) {
+			for (vm_nphysseg--; x < vm_nphysseg; x++)
+				vm_physmem[x] = vm_physmem[x + 1];
+		}
+
+		/* Now find where the new avail_end is. */
+		for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
+			if (vm_physmem[x].avail_end > avail_end)
+				avail_end = vm_physmem[x].avail_end;
+		avail_end = ptoa(avail_end);
+
+		/* Warn if the message buffer had to be shrunk. */
+		if (sz != reqsz)
+			printf("WARNING: %ld bytes not available for msgbuf "
+			    "in last cluster (%ld used)\n", reqsz, sz);
+	}
+
+	/*
+	 * install PT page for the first 4M if needed.
+	 */
+	if (needs_earlier_install_pte0) {
+		paddr_t paddr;
+#ifdef DIAGNOSTIC
+		if (realmode_reserved_size < PAGE_SIZE) {
+			panic("cannot steal memory for first 4M PT page.");
+		}
+#endif
+		paddr=realmode_reserved_start+realmode_reserved_size-PAGE_SIZE;
+		pmap_enter(pmap_kernel(), (vaddr_t)vtopte(0), paddr,
+			   VM_PROT_READ|VM_PROT_WRITE,
+			   PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE);
+		pmap_update(pmap_kernel());
+		/* make sure it is clean before using */
+		memset(vtopte(0), 0, PAGE_SIZE);
+		realmode_reserved_size -= PAGE_SIZE;
+	}
+
+#if NBIOSCALL > 0
+	/*
+	 * this should be caught at kernel build time, but put it here
+	 * in case someone tries to fake it out...
+	 */
+#ifdef DIAGNOSTIC
+	if (realmode_reserved_start > BIOSTRAMP_BASE ||
+	    (realmode_reserved_start+realmode_reserved_size) < (BIOSTRAMP_BASE+
+							       PAGE_SIZE)) {
+	    panic("cannot steal memory for PT page of bioscall.");
+	}
+	if (biostramp_image_size > PAGE_SIZE)
+	    panic("biostramp_image_size too big: %x vs. %x",
+		  biostramp_image_size, PAGE_SIZE);
+#endif
+	pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE,	/* virtual */
+		       (paddr_t)BIOSTRAMP_BASE,	/* physical */
+		       VM_PROT_ALL);		/* protection */
+	pmap_update(pmap_kernel());
+	memcpy((caddr_t)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
+#ifdef DEBUG_BIOSCALL
+	printf("biostramp installed @ %x\n", BIOSTRAMP_BASE);
+#endif
+	realmode_reserved_size  -= PAGE_SIZE;
+	realmode_reserved_start += PAGE_SIZE;
+#endif
+
+#if NACPI > 0
+	/*
+	 * Steal memory for the acpi wake code
+	 */
+	{
+		paddr_t paddr, p;
+		psize_t sz;
+		int npg;
+
+		paddr = realmode_reserved_start;
+		npg = acpi_md_get_npages_of_wakecode();
+		sz = ptoa(npg);
+#ifdef DIAGNOSTIC
+		if (realmode_reserved_size < sz) {
+			panic("cannot steal memory for ACPI wake code.");
+		}
+#endif
+
+		/* identical mapping */
+		p = paddr;
+		for (x=0; x<npg; x++) {
+			printf("kenter: 0x%08X\n", (unsigned)p);
+			pmap_kenter_pa((vaddr_t)p, p, VM_PROT_ALL);
+			p += PAGE_SIZE;
+		}
+		pmap_update(pmap_kernel());
+
+		acpi_md_install_wakecode(paddr);
+
+		realmode_reserved_size  -= sz;
+		realmode_reserved_start += sz;
+	}
+#endif
+
+	pmap_enter(pmap_kernel(), idt_vaddr, idt_paddr,
+	    VM_PROT_READ|VM_PROT_WRITE, PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE);
+	pmap_update(pmap_kernel());
+	memset((void *)idt_vaddr, 0, PAGE_SIZE);
+
+#if !defined(XEN)
+	idt = (struct gate_descriptor *)idt_vaddr;
+#ifdef I586_CPU
+	pmap_enter(pmap_kernel(), pentium_idt_vaddr, idt_paddr,
+	    VM_PROT_READ, PMAP_WIRED|VM_PROT_READ);
+	pentium_idt = (union descriptor *)pentium_idt_vaddr;
+#endif
+#endif
+	pmap_update(pmap_kernel());
+
+	initgdt();
+
+	HYPERVISOR_set_callbacks(
+		GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
+		GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
+
+#if !defined(XEN)
+	tgdt = gdt;
+	gdt = (union descriptor *)
+		    ((char *)idt + NIDT * sizeof (struct gate_descriptor));
+	ldt = gdt + NGDT;
+
+	memcpy(gdt, tgdt, NGDT*sizeof(*gdt));
+
+	setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1,
+	    SDT_SYSLDT, SEL_KPL, 0, 0);
+#else
+	ldt = (union descriptor *)idt_vaddr;
+#endif
+
+	/* make ldt gates and memory segments */
+	setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1,
+	    SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	ldt[LUCODE_SEL] = gdt[GUCODE_SEL];
+	ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL];
+	ldt[LUDATA_SEL] = gdt[GUDATA_SEL];
+	ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
+
+#if !defined(XEN)
+	/* exceptions */
+	for (x = 0; x < 32; x++) {
+		setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386TGT,
+		    (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
+		    GSEL(GCODE_SEL, SEL_KPL));
+		idt_allocmap[x] = 1;
+	}
+
+	/* new-style interrupt gate for syscalls */
+	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	idt_allocmap[128] = 1;
+#ifdef COMPAT_SVR4
+	setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386TGT,
+	    SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
+	idt_allocmap[0xd2] = 1;
+#endif /* COMPAT_SVR4 */
+#endif
+
+	memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT);
+	xen_idt_idx = 0;
+	for (x = 0; x < 32; x++) {
+		KASSERT(xen_idt_idx < MAX_XEN_IDT);
+		xen_idt[xen_idt_idx].vector = x;
+		xen_idt[xen_idt_idx].flags =
+			(x == 3 || x == 4) ? SEL_UPL : SEL_XEN;
+		xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
+		xen_idt[xen_idt_idx].address =
+			(uint32_t)IDTVEC(exceptions)[x];
+		xen_idt_idx++;
+	}
+	KASSERT(xen_idt_idx < MAX_XEN_IDT);
+	xen_idt[xen_idt_idx].vector = 128;
+	xen_idt[xen_idt_idx].flags = SEL_UPL;
+	xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
+	xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall);
+	xen_idt_idx++;
+#ifdef COMPAT_SVR4
+	KASSERT(xen_idt_idx < MAX_XEN_IDT);
+	xen_idt[xen_idt_idx].vector = 0xd2;
+	xen_idt[xen_idt_idx].flags = SEL_UPL;
+	xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
+	xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap);
+	xen_idt_idx++;
+#endif /* COMPAT_SVR4 */
+
+#if !defined(XEN)
+	setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1);
+	lgdt(&region);
+#else
+	lldt(GSEL(GLDT_SEL, SEL_KPL));
+#endif
+
+#if !defined(XEN)
+	cpu_init_idt();
+#else
+	db_trap_callback = ddb_trap_hook;
+
+	XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt));
+	if (HYPERVISOR_set_trap_table(xen_idt))
+		panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt);
+#endif
+
+#if NKSYMS || defined(DDB) || defined(LKM)
+	{
+		extern int end;
+		extern int *esym;
+		struct btinfo_symtab *symtab;
+
+#ifdef DDB
+		db_machine_init();
+#endif
+
+		symtab = lookup_bootinfo(BTINFO_SYMTAB);
+
+		if (symtab) {
+			symtab->ssym += KERNBASE;
+			symtab->esym += KERNBASE;
+			ksyms_init(symtab->nsym, (int *)symtab->ssym,
+			    (int *)symtab->esym);
+		}
+		else
+			ksyms_init(*(int *)&end, ((int *)&end) + 1, esym);
+	}
+#endif
+#ifdef DDB
+	if (boothowto & RB_KDB)
+		Debugger();
+#endif
+#ifdef IPKDB
+	ipkdb_init();
+	if (boothowto & RB_KDB)
+		ipkdb_connect(0);
+#endif
+#ifdef KGDB
+	kgdb_port_init();
+	if (boothowto & RB_KDB) {
+		kgdb_debug_init = 1;
+		kgdb_connect(1);
+	}
+#endif
+
+#if NMCA > 0
+	/* check for MCA bus, needed to be done before ISA stuff - if
+	 * MCA is detected, ISA needs to use level triggered interrupts
+	 * by default */
+	mca_busprobe();
+#endif
+
+#if !defined(XEN)
+	intr_default_setup();
+#endif
+
+	/* Initialize software interrupts. */
+	softintr_init();
+
+	splraise(IPL_IPI);
+	enable_intr();
+
+	if (physmem < btoc(2 * 1024 * 1024)) {
+		printf("warning: too little memory available; "
+		       "have %lu bytes, want %lu bytes\n"
+		       "running in degraded mode\n"
+		       "press a key to confirm\n\n",
+		       ptoa(physmem), 2*1024*1024UL);
+		cngetc();
+	}
+
+#ifdef __HAVE_CPU_MAXPROC
+	/* Make sure maxproc is sane */
+	if (maxproc > cpu_maxproc())
+		maxproc = cpu_maxproc();
+#endif
+}
+
+#ifdef COMPAT_NOMID
+static int
+exec_nomid(p, epp)
+	struct proc *p;
+	struct exec_package *epp;
+{
+	int error;
+	u_long midmag, magic;
+	u_short mid;
+	struct exec *execp = epp->ep_hdr;
+
+	/* check on validity of epp->ep_hdr performed by exec_out_makecmds */
+
+	midmag = ntohl(execp->a_midmag);
+	mid = (midmag >> 16) & 0xffff;
+	magic = midmag & 0xffff;
+
+	if (magic == 0) {
+		magic = (execp->a_midmag & 0xffff);
+		mid = MID_ZERO;
+	}
+
+	midmag = mid << 16 | magic;
+
+	switch (midmag) {
+	case (MID_ZERO << 16) | ZMAGIC:
+		/*
+		 * 386BSD's ZMAGIC format:
+		 */
+		error = exec_aout_prep_oldzmagic(p, epp);
+		break;
+
+	case (MID_ZERO << 16) | QMAGIC:
+		/*
+		 * BSDI's QMAGIC format:
+		 * same as new ZMAGIC format, but with different magic number
+		 */
+		error = exec_aout_prep_zmagic(p, epp);
+		break;
+
+	case (MID_ZERO << 16) | NMAGIC:
+		/*
+		 * BSDI's NMAGIC format:
+		 * same as NMAGIC format, but with different magic number
+		 * and with text starting at 0.
+		 */
+		error = exec_aout_prep_oldnmagic(p, epp);
+		break;
+
+	case (MID_ZERO << 16) | OMAGIC:
+		/*
+		 * BSDI's OMAGIC format:
+		 * same as OMAGIC format, but with different magic number
+		 * and with text starting at 0.
+		 */
+		error = exec_aout_prep_oldomagic(p, epp);
+		break;
+
+	default:
+		error = ENOEXEC;
+	}
+
+	return error;
+}
+#endif
+
+/*
+ * cpu_exec_aout_makecmds():
+ *	CPU-dependent a.out format hook for execve().
+ *
+ * Determine of the given exec package refers to something which we
+ * understand and, if so, set up the vmcmds for it.
+ *
+ * On the i386, old (386bsd) ZMAGIC binaries and BSDI QMAGIC binaries
+ * if COMPAT_NOMID is given as a kernel option.
+ */
+int
+cpu_exec_aout_makecmds(p, epp)
+	struct proc *p;
+	struct exec_package *epp;
+{
+	int error = ENOEXEC;
+
+#ifdef COMPAT_NOMID
+	if ((error = exec_nomid(p, epp)) == 0)
+		return error;
+#endif /* ! COMPAT_NOMID */
+
+	return error;
+}
+
+void *
+lookup_bootinfo(type)
+int type;
+{
+	struct btinfo_common *help;
+	int n = *(int*)bootinfo;
+	help = (struct btinfo_common *)(bootinfo + sizeof(int));
+	while(n--) {
+		if(help->type == type)
+			return(help);
+		help = (struct btinfo_common *)((char*)help + help->len);
+	}
+	return(0);
+}
+
+#include <dev/ic/mc146818reg.h>		/* for NVRAM POST */
+#include <i386/isa/nvram.h>		/* for NVRAM POST */
+
+void
+cpu_reset()
+{
+
+	disable_intr();
+
+	/*
+	 * Ensure the NVRAM reset byte contains something vaguely sane.
+	 */
+
+	outb(IO_RTC, NVRAM_RESET);
+	outb(IO_RTC+1, NVRAM_RESET_RST);
+
+	/*
+	 * The keyboard controller has 4 random output pins, one of which is
+	 * connected to the RESET pin on the CPU in many PCs.  We tell the
+	 * keyboard controller to pulse this line a couple of times.
+	 */
+	outb(IO_KBD + KBCMDP, KBC_PULSE0);
+	delay(100000);
+	outb(IO_KBD + KBCMDP, KBC_PULSE0);
+	delay(100000);
+
+	/*
+	 * Try to cause a triple fault and watchdog reset by making the IDT
+	 * invalid and causing a fault.
+	 */
+	memset((caddr_t)idt, 0, NIDT * sizeof(idt[0]));
+	__asm __volatile("divl %0,%1" : : "q" (0), "a" (0));
+
+#if 0
+	/*
+	 * Try to cause a triple fault and watchdog reset by unmapping the
+	 * entire address space and doing a TLB flush.
+	 */
+	memset((caddr_t)PTD, 0, PAGE_SIZE);
+	tlbflush();
+#endif
+
+	for (;;);
+}
+
+void
+cpu_getmcontext(l, mcp, flags)
+	struct lwp *l;
+	mcontext_t *mcp;
+	unsigned int *flags;
+{
+	const struct trapframe *tf = l->l_md.md_regs;
+	__greg_t *gr = mcp->__gregs;
+	__greg_t ras_eip;
+
+	/* Save register context. */
+#ifdef VM86
+	if (tf->tf_eflags & PSL_VM) {
+		gr[_REG_GS]  = tf->tf_vm86_gs;
+		gr[_REG_FS]  = tf->tf_vm86_fs;
+		gr[_REG_ES]  = tf->tf_vm86_es;
+		gr[_REG_DS]  = tf->tf_vm86_ds;
+		gr[_REG_EFL] = get_vflags(l);
+	} else
+#endif
+	{
+		gr[_REG_GS]  = tf->tf_gs;
+		gr[_REG_FS]  = tf->tf_fs;
+		gr[_REG_ES]  = tf->tf_es;
+		gr[_REG_DS]  = tf->tf_ds;
+		gr[_REG_EFL] = tf->tf_eflags;
+	}
+	gr[_REG_EDI]    = tf->tf_edi;
+	gr[_REG_ESI]    = tf->tf_esi;
+	gr[_REG_EBP]    = tf->tf_ebp;
+	gr[_REG_EBX]    = tf->tf_ebx;
+	gr[_REG_EDX]    = tf->tf_edx;
+	gr[_REG_ECX]    = tf->tf_ecx;
+	gr[_REG_EAX]    = tf->tf_eax;
+	gr[_REG_EIP]    = tf->tf_eip;
+	gr[_REG_CS]     = tf->tf_cs;
+	gr[_REG_ESP]    = tf->tf_esp;
+	gr[_REG_UESP]   = tf->tf_esp;
+	gr[_REG_SS]     = tf->tf_ss;
+	gr[_REG_TRAPNO] = tf->tf_trapno;
+	gr[_REG_ERR]    = tf->tf_err;
+
+	if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
+	    (caddr_t) gr[_REG_EIP])) != -1)
+		gr[_REG_EIP] = ras_eip;
+
+	*flags |= _UC_CPU;
+
+	/* Save floating point register context, if any. */
+	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
+#if NNPX > 0
+		/*
+		 * If this process is the current FP owner, dump its
+		 * context to the PCB first.
+		 * XXX npxsave() also clears the FPU state; depending on the
+		 * XXX application this might be a penalty.
+		 */
+		if (l->l_addr->u_pcb.pcb_fpcpu) {
+			npxsave_lwp(l, 1);
+		}
+#endif
+		if (i386_use_fxsave) {
+			memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
+			    &l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
+			    sizeof (mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm));
+			*flags |= _UC_FXSAVE;
+		} else {
+			memcpy(&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
+			    &l->l_addr->u_pcb.pcb_savefpu.sv_87,
+			    sizeof (mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state));
+		}
+#if 0
+		/* Apparently nothing ever touches this. */
+		ucp->mcp.mc_fp.fp_emcsts = l->l_addr->u_pcb.pcb_saveemc;
+#endif
+		*flags |= _UC_FPU;
+	}
+}
+
+int
+cpu_setmcontext(l, mcp, flags)
+	struct lwp *l;
+	const mcontext_t *mcp;
+	unsigned int flags;
+{
+	struct trapframe *tf = l->l_md.md_regs;
+	__greg_t *gr = mcp->__gregs;
+
+	/* Restore register context, if any. */
+	if ((flags & _UC_CPU) != 0) {
+#ifdef VM86
+		if (gr[_REG_EFL] & PSL_VM) {
+			tf->tf_vm86_gs = gr[_REG_GS];
+			tf->tf_vm86_fs = gr[_REG_FS];
+			tf->tf_vm86_es = gr[_REG_ES];
+			tf->tf_vm86_ds = gr[_REG_DS];
+			set_vflags(l, gr[_REG_EFL]);
+			if (flags & _UC_VM) {
+				void syscall_vm86(struct trapframe *);
+				l->l_proc->p_md.md_syscall = syscall_vm86;
+			}
+		} else
+#endif
+		{
+			/*
+			 * Check for security violations.  If we're returning
+			 * to protected mode, the CPU will validate the segment
+			 * registers automatically and generate a trap on
+			 * violations.  We handle the trap, rather than doing
+			 * all of the checking here.
+			 */
+			if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
+			    !USERMODE(gr[_REG_CS], gr[_REG_EFL])) {
+				printf("cpu_setmcontext error: uc EFL: 0x%08x"
+				    " tf EFL: 0x%08x uc CS: 0x%x\n",
+				    gr[_REG_EFL], tf->tf_eflags, gr[_REG_CS]);
+				return (EINVAL);
+			}
+			tf->tf_gs = gr[_REG_GS];
+			tf->tf_fs = gr[_REG_FS];
+			tf->tf_es = gr[_REG_ES];
+			tf->tf_ds = gr[_REG_DS];
+			/* Only change the user-alterable part of eflags */
+			tf->tf_eflags &= ~PSL_USER;
+			tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
+		}
+		tf->tf_edi    = gr[_REG_EDI];
+		tf->tf_esi    = gr[_REG_ESI];
+		tf->tf_ebp    = gr[_REG_EBP];
+		tf->tf_ebx    = gr[_REG_EBX];
+		tf->tf_edx    = gr[_REG_EDX];
+		tf->tf_ecx    = gr[_REG_ECX];
+		tf->tf_eax    = gr[_REG_EAX];
+		tf->tf_eip    = gr[_REG_EIP];
+		tf->tf_cs     = gr[_REG_CS];
+		tf->tf_esp    = gr[_REG_UESP];
+		tf->tf_ss     = gr[_REG_SS];
+	}
+
+	/* Restore floating point register context, if any. */
+	if ((flags & _UC_FPU) != 0) {
+#if NNPX > 0
+		/*
+		 * If we were using the FPU, forget that we were.
+		 */
+		if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
+			npxsave_lwp(l, 0);
+#endif
+		if (flags & _UC_FXSAVE) {
+			if (i386_use_fxsave) {
+				memcpy(
+					&l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
+					&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
+					sizeof (&l->l_addr->u_pcb.pcb_savefpu.sv_xmm));
+			} else {
+				/* This is a weird corner case */
+				process_xmm_to_s87((struct savexmm *)
+				    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
+				    &l->l_addr->u_pcb.pcb_savefpu.sv_87);
+			}
+		} else {
+			if (i386_use_fxsave) {
+				process_s87_to_xmm((struct save87 *)
+				    &mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
+				    &l->l_addr->u_pcb.pcb_savefpu.sv_xmm);
+			} else {
+				memcpy(&l->l_addr->u_pcb.pcb_savefpu.sv_87,
+				    &mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
+				    sizeof (l->l_addr->u_pcb.pcb_savefpu.sv_87));
+			}
+		}
+		/* If not set already. */
+		l->l_md.md_flags |= MDL_USEDFPU;
+#if 0
+		/* Apparently unused. */
+		l->l_addr->u_pcb.pcb_saveemc = mcp->mc_fp.fp_emcsts;
+#endif
+	}
+	if (flags & _UC_SETSTACK)
+		l->l_proc->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
+	if (flags & _UC_CLRSTACK)
+		l->l_proc->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
+	return (0);
+}
+
+void
+cpu_initclocks()
+{
+	(*initclock_func)();
+}
+
+#ifdef MULTIPROCESSOR
+void
+need_resched(struct cpu_info *ci)
+{
+
+	if (ci->ci_want_resched)
+		return;
+
+	ci->ci_want_resched = 1;
+	if ((ci)->ci_curlwp != NULL)
+		aston((ci)->ci_curlwp->l_proc);
+	else if (ci != curcpu())
+		x86_send_ipi(ci, 0);
+}
+#endif
+
+/*
+ * Allocate an IDT vector slot within the given range.
+ * XXX needs locking to avoid MP allocation races.
+ */
+
+int
+idt_vec_alloc(low, high)
+	int low;
+	int high;
+{
+	int vec;
+
+	simple_lock(&idt_lock);
+	for (vec = low; vec <= high; vec++) {
+		if (idt_allocmap[vec] == 0) {
+			idt_allocmap[vec] = 1;
+			simple_unlock(&idt_lock);
+			return vec;
+		}
+	}
+	simple_unlock(&idt_lock);
+	return 0;
+}
+
+void
+idt_vec_set(vec, function)
+	int vec;
+	void (*function)(void);
+{
+	/*
+	 * Vector should be allocated, so no locking needed.
+	 */
+	KASSERT(idt_allocmap[vec] == 1);
+	setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+}
+
+void
+idt_vec_free(vec)
+	int vec;
+{
+	simple_lock(&idt_lock);
+	unsetgate(&idt[vec]);
+	idt_allocmap[vec] = 0;
+	simple_unlock(&idt_lock);
+}
+
+/*
+ * Number of processes is limited by number of available GDT slots.
+ */
+int
+cpu_maxproc(void)
+{
+#ifdef USER_LDT
+	return ((MAXGDTSIZ - NGDT) / 2);
+#else
+	return (MAXGDTSIZ - NGDT);
+#endif
+}
+
+#if defined(DDB) || defined(KGDB)
+
+/* 
+ * Callback to output a backtrace when entering ddb.
+ */
+void
+ddb_trap_hook(int where)
+{
+	static int once = 0;
+	db_addr_t db_dot;
+
+	if (once != 0 || where != 1)
+		return;
+	once = 1;
+
+	if (curlwp != NULL) {
+		db_printf("Stopped");
+		if (curproc == NULL)
+			db_printf("; curlwp = %p,"
+			    " curproc is NULL at\t", curlwp);
+		else
+			db_printf(" in pid %d.%d (%s) at\t", 
+			    curproc->p_pid, curlwp->l_lid,
+			    curproc->p_comm);
+	} else
+		db_printf("Stopped at\t");
+	db_dot = PC_REGS(DDB_REGS);
+	db_print_loc_and_inst(db_dot);
+
+	db_stack_trace_print((db_expr_t) db_dot, FALSE, 65535,
+	    "", db_printf);
+}
+
+#endif /* DDB || KGDB */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/mainbus.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,409 @@
+/*	$NetBSD: mainbus.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: mainbus.c,v 1.53 2003/10/27 14:11:47 junyoung Exp 	*/
+
+/*
+ * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christopher G. Demetriou
+ *	for the NetBSD Project.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+
+#include <machine/bus.h>
+
+#include <dev/isa/isavar.h>
+#include <dev/eisa/eisavar.h>
+#include <dev/pci/pcivar.h>
+
+#include <dev/isa/isareg.h>		/* for ISA_HOLE_VADDR */
+
+#include "pci.h"
+#include "eisa.h"
+#include "isa.h"
+#include "isadma.h"
+#include "mca.h"
+#include "apm.h"
+#include "pnpbios.h"
+#include "acpi.h"
+#include "vesabios.h"
+#include "xenc.h"
+#include "xennet.h"
+#include "npx.h"
+
+#include "opt_mpacpi.h"
+#include "opt_mpbios.h"
+#include "opt_xen.h"
+
+#include <machine/cpuvar.h>
+#include <machine/i82093var.h>
+#include <machine/mpbiosvar.h>
+#include <machine/mpacpi.h>
+
+#if NAPM > 0
+#include <machine/bioscall.h>
+#include <machine/apmvar.h>
+#endif
+
+#if NPNPBIOS > 0
+#include <arch/i386/pnpbios/pnpbiosvar.h>
+#endif
+
+#if NACPI > 0
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/acpi_madt.h>
+#endif
+
+#if NMCA > 0
+#include <dev/mca/mcavar.h>
+#endif
+
+#if NVESABIOS > 0
+#include <arch/i386/bios/vesabios.h>
+#endif
+
+#ifdef XEN
+#include <machine/xen.h>
+#include <machine/hypervisor.h>
+#endif
+
+#if NXENNET > 0
+#include <net/if.h>
+#include <net/if_ether.h>
+#include <net/if_media.h>
+#include <machine/if_xennetvar.h>
+#endif
+
+int	mainbus_match(struct device *, struct cfdata *, void *);
+void	mainbus_attach(struct device *, struct device *, void *);
+
+CFATTACH_DECL(mainbus, sizeof(struct device),
+    mainbus_match, mainbus_attach, NULL, NULL);
+
+int	mainbus_print(void *, const char *);
+
+union mainbus_attach_args {
+	const char *mba_busname;		/* first elem of all */
+	struct pcibus_attach_args mba_pba;
+	struct eisabus_attach_args mba_eba;
+	struct isabus_attach_args mba_iba;
+#if NMCA > 0
+	struct mcabus_attach_args mba_mba;
+#endif
+#if NAPM > 0
+	struct apm_attach_args mba_aaa;
+#endif
+#if NPNPBIOS > 0
+	struct pnpbios_attach_args mba_paa;
+#endif
+	struct cpu_attach_args mba_caa;
+	struct apic_attach_args aaa_caa;
+#if NACPI > 0
+	struct acpibus_attach_args mba_acpi;
+#endif
+#if NVESABIOS > 0
+	struct vesabios_attach_args mba_vba;
+#endif
+#if NXENC > 0
+	struct xenc_attach_args mba_xenc;
+#endif
+#if NXENNET > 0
+	struct xennet_attach_args mba_xennet;
+#endif
+#if NXENDISK > 0
+	struct xendisk_attach_args mba_xendisk;
+#endif
+#if NNPX > 0
+	struct xen_npx_attach_args mba_xennpx;
+#endif
+};
+
+/*
+ * This is set when the ISA bus is attached.  If it's not set by the
+ * time it's checked below, then mainbus attempts to attach an ISA.
+ */
+int	isa_has_been_seen;
+struct x86_isa_chipset x86_isa_chipset;
+#if NISA > 0
+struct isabus_attach_args mba_iba = {
+	"isa",
+	X86_BUS_SPACE_IO, X86_BUS_SPACE_MEM,
+	&isa_bus_dma_tag,
+	&x86_isa_chipset
+};
+#endif
+
+/*
+ * Same as above, but for EISA.
+ */
+int	eisa_has_been_seen;
+
+#if defined(MPBIOS) || defined(MPACPI)
+struct mp_bus *mp_busses;
+int mp_nbus;
+struct mp_intr_map *mp_intrs;
+int mp_nintr;
+ 
+int mp_isa_bus = -1;            /* XXX */
+int mp_eisa_bus = -1;           /* XXX */
+
+#ifdef MPVERBOSE
+int mp_verbose = 1;
+#else
+int mp_verbose = 0;
+#endif
+#endif
+
+
+/*
+ * Probe for the mainbus; always succeeds.
+ */
+int
+mainbus_match(parent, match, aux)
+	struct device *parent;
+	struct cfdata *match;
+	void *aux;
+{
+
+	return 1;
+}
+
+/*
+ * Attach the mainbus.
+ */
+void
+mainbus_attach(parent, self, aux)
+	struct device *parent, *self;
+	void *aux;
+{
+	union mainbus_attach_args mba;
+#if NACPI > 0
+	int acpi_present = 0;
+#endif
+#ifdef MPBIOS
+	int mpbios_present = 0;
+#endif
+	int mpacpi_active = 0;
+
+	printf("\n");
+
+#ifdef MPBIOS
+	mpbios_present = mpbios_probe(self);
+#endif
+
+#if NPCI > 0
+	/*
+	 * ACPI needs to be able to access PCI configuration space.
+	 */
+	pci_mode = pci_mode_detect();
+#endif
+
+#if NACPI > 0
+	acpi_present = acpi_probe();
+#ifdef MPACPI
+	/*
+	 * First, see if the MADT contains CPUs, and possibly I/O APICs.
+	 * Building the interrupt routing structures can only
+	 * be done later (via a callback).
+	 */
+	if (acpi_present)
+		mpacpi_active = mpacpi_scan_apics(self);
+#endif
+#endif
+
+	if (!mpacpi_active) {
+#ifdef MPBIOS
+		if (mpbios_present)
+			mpbios_scan(self);
+		else
+#endif
+		{
+			struct cpu_attach_args caa;
+			
+			memset(&caa, 0, sizeof(caa));
+			caa.caa_name = "cpu";
+			caa.cpu_number = 0;
+			caa.cpu_role = CPU_ROLE_SP;
+			caa.cpu_func = 0;
+			
+			config_found(self, &caa, mainbus_print);
+		}
+	}
+
+#if NVESABIOS > 0
+	if (vbeprobe()) {
+		mba.mba_vba.vaa_busname = "vesabios";
+		config_found(self, &mba.mba_vba, mainbus_print);
+	}
+#endif
+
+#if NISADMA > 0 && (NACPI > 0 || NPNPBIOS > 0)
+	/*
+	 * ACPI and PNPBIOS need ISA DMA initialized before they start probing.
+	 */
+	isa_dmainit(&x86_isa_chipset, X86_BUS_SPACE_IO, &isa_bus_dma_tag,
+	    self);
+#endif
+
+#if NACPI > 0
+	if (acpi_present) {
+		mba.mba_acpi.aa_busname = "acpi";
+		mba.mba_acpi.aa_iot = X86_BUS_SPACE_IO;
+		mba.mba_acpi.aa_memt = X86_BUS_SPACE_MEM;
+		mba.mba_acpi.aa_pc = NULL;
+		mba.mba_acpi.aa_pciflags =
+		    PCI_FLAGS_IO_ENABLED | PCI_FLAGS_MEM_ENABLED |
+		    PCI_FLAGS_MRL_OKAY | PCI_FLAGS_MRM_OKAY |
+		    PCI_FLAGS_MWI_OKAY;
+		mba.mba_acpi.aa_ic = &x86_isa_chipset;
+		config_found(self, &mba.mba_acpi, mainbus_print);
+#if 0 /* XXXJRT not yet */
+		if (acpi_active) {
+			/*
+			 * ACPI already did all the work for us, there
+			 * is nothing more for us to do.
+			 */
+			return;
+		}
+#endif
+	}
+#endif
+
+#if NPNPBIOS > 0
+#if NACPI > 0
+	if (acpi_active == 0)
+#endif
+	if (pnpbios_probe()) {
+		mba.mba_paa.paa_busname = "pnpbios";
+		mba.mba_paa.paa_ic = &x86_isa_chipset;
+		config_found(self, &mba.mba_paa, mainbus_print);
+	}
+#endif
+
+	/*
+	 * XXX Note also that the presence of a PCI bus should
+	 * XXX _always_ be checked, and if present the bus should be
+	 * XXX 'found'.  However, because of the structure of the code,
+	 * XXX that's not currently possible.
+	 */
+#if NPCI > 0
+	if (pci_mode != 0) {
+		mba.mba_pba.pba_busname = "pci";
+		mba.mba_pba.pba_iot = X86_BUS_SPACE_IO;
+		mba.mba_pba.pba_memt = X86_BUS_SPACE_MEM;
+		mba.mba_pba.pba_dmat = &pci_bus_dma_tag;
+		mba.mba_pba.pba_dmat64 = NULL;
+		mba.mba_pba.pba_pc = NULL;
+		mba.mba_pba.pba_flags = pci_bus_flags();
+		mba.mba_pba.pba_bus = 0;
+		mba.mba_pba.pba_bridgetag = NULL;
+#if defined(MPACPI) && defined(MPACPI_SCANPCI)
+		if (mpacpi_active)
+			mpacpi_scan_pci(self, &mba.mba_pba, mainbus_print);
+		else
+#endif
+#if defined(MPBIOS) && defined(MPBIOS_SCANPCI)
+		if (mpbios_scanned != 0)
+			mpbios_scan_pci(self, &mba.mba_pba, mainbus_print);
+		else
+#endif
+		config_found(self, &mba.mba_pba, mainbus_print);
+	}
+#endif
+
+#if NMCA > 0
+	/* Note: MCA bus probe is done in i386/machdep.c */
+	if (MCA_system) {
+		mba.mba_mba.mba_busname = "mca";
+		mba.mba_mba.mba_iot = X86_BUS_SPACE_IO;
+		mba.mba_mba.mba_memt = X86_BUS_SPACE_MEM;
+		mba.mba_mba.mba_dmat = &mca_bus_dma_tag;
+		mba.mba_mba.mba_mc = NULL;
+		mba.mba_mba.mba_bus = 0;
+		config_found(self, &mba.mba_mba, mainbus_print);
+	}
+#endif
+
+#ifndef XEN
+	if (memcmp(ISA_HOLE_VADDR(EISA_ID_PADDR), EISA_ID, EISA_ID_LEN) == 0 &&
+	    eisa_has_been_seen == 0) {
+		mba.mba_eba.eba_busname = "eisa";
+		mba.mba_eba.eba_iot = X86_BUS_SPACE_IO;
+		mba.mba_eba.eba_memt = X86_BUS_SPACE_MEM;
+#if NEISA > 0
+		mba.mba_eba.eba_dmat = &eisa_bus_dma_tag;
+#endif
+		config_found(self, &mba.mba_eba, mainbus_print);
+	}
+#endif
+
+#if NISA > 0
+	if (isa_has_been_seen == 0)
+		config_found(self, &mba_iba, mainbus_print);
+#endif
+
+#if NAPM > 0
+#if NACPI > 0
+	if (acpi_active == 0)
+#endif
+	if (apm_busprobe()) {
+		mba.mba_aaa.aaa_busname = "apm";
+		config_found(self, &mba.mba_aaa, mainbus_print);
+	}
+#endif
+
+#if NXENC > 0
+	mba.mba_xenc.xa_busname = "xenc";
+	config_found(self, &mba.mba_xenc, mainbus_print);
+#endif
+#if NXENNET > 0
+	mba.mba_xennet.xa_busname = "xennet";
+	xennet_scan(self, &mba.mba_xennet, mainbus_print);
+#endif
+#if NNPX > 0
+	mba.mba_xennpx.xa_busname = "npx";
+	config_found(self, &mba.mba_xennpx, mainbus_print);
+#endif
+}
+
+int
+mainbus_print(aux, pnp)
+	void *aux;
+	const char *pnp;
+{
+	union mainbus_attach_args *mba = aux;
+
+	if (pnp)
+		aprint_normal("%s at %s", mba->mba_busname, pnp);
+	if (strcmp(mba->mba_busname, "pci") == 0)
+		aprint_normal(" bus %d", mba->mba_pba.pba_bus);
+	return (UNCONF);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/i386/npx.c	Thu Mar 11 21:44:08 2004 +0000
@@ -0,0 +1,795 @@
+/*	$NetBSD: npx.c,v 1.1 2004/03/11 21:44:08 cl Exp $	*/
+/*	NetBSD: npx.c,v 1.102 2004/02/13 11:36:14 wiz Exp 	*/
+
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+/*-
+ * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1990 William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
+
+#if 0
+#define IPRINTF(x)	printf x
+#else
+#define	IPRINTF(x)
+#endif
+
+#include "opt_cputype.h"
+#include "opt_multiprocessor.h"
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/ioctl.h>
+#include <sys/device.h>
+#include <sys/vmmeter.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/intr.h>
+#include <machine/cpufunc.h>
+#include <machine/pcb.h>
+#include <machine/trap.h>
+#include <machine/specialreg.h>
+#include <machine/pio.h>
+#include <machine/i8259.h>
+
+#include <dev/isa/isareg.h>
+#include <dev/isa/isavar.h>
+
+#include <i386/isa/npxvar.h>
+
+#ifdef XEN
+#include <machine/xen.h>
+#include <machine/hypervisor.h>
+#endif
+
+/*
+ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
+ *
+ * We do lazy initialization and switching using the TS bit in cr0 and the
+ * MDL_USEDFPU bit in mdproc.
+ *
+ * DNA exceptions are handled like this:
+ *
+ * 1) If there is no NPX, return and go to the emulator.
+ * 2) If someone else has used the NPX, save its state into that process's PCB.
+ * 3a) If MDL_USEDFPU is not set, set it and initialize the NPX.
+ * 3b) Otherwise, reload the process's previous NPX state.
+ *
+ * When a process is created or exec()s, its saved cr0 image has the TS bit
+ * set and the MDL_USEDFPU bit clear.  The MDL_USEDFPU bit is set when the
+ * process first gets a DNA and the NPX is initialized.  The TS bit is turned
+ * off when the NPX is used, and turned on again later when the process's NPX
+ * state is saved.
+ */
+
+#define	fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
+#define	fnclex()		__asm("fnclex")
+#define	fninit()		__asm("fninit")
+#define	fnsave(addr)		__asm("fnsave %0" : "=m" (*addr))
+#define	fnstcw(addr)		__asm("fnstcw %0" : "=m" (*addr))
+#define	fnstsw(addr)		__asm("fnstsw %0" : "=m" (*addr))
+#define	fp_divide_by_0()	__asm("fldz; fld1; fdiv %st,%st(1); fwait")
+#define	frstor(addr)		__asm("frstor %0" : : "m" (*addr))
+#define	fwait()			__asm("fwait")
+#ifndef XEN
+#define	clts()			__asm("clts")
+#define	stts()			lcr0(rcr0() | CR0_TS)
+#else
+#define	clts()
+#define	stts()			HYPERVISOR_fpu_taskswitch()
+#endif
+
+int npxdna(struct cpu_info *);
+static int	npxdna_notset(struct cpu_info *);
+static int	npxdna_s87(struct cpu_info *);
+#ifdef I686_CPU
+static int	npxdna_xmm(struct cpu_info  *);
+#endif /* I686_CPU */
+static int	x86fpflags_to_ksiginfo(u_int32_t flags);
+
+#ifdef I686_CPU
+#define	fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
+#define	fxrstor(addr)		__asm("fxrstor %0" : : "m" (*addr))
+#endif /* I686_CPU */
+
+static	enum npx_type		npx_type;
+volatile u_int			npx_intrs_while_probing;
+volatile u_int			npx_traps_while_probing;
+
+extern int i386_fpu_present;
+extern int i386_fpu_exception;
+extern int i386_fpu_fdivbug;
+
+struct npx_softc		*npx_softc;
+
+static __inline void
+fpu_save(union savefpu *addr)
+{
+#ifdef I686_CPU
+	if (i386_use_fxsave)
+	{
+                fxsave(&addr->sv_xmm);
+
+		/* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */
+		fninit();
+	} else
+#endif /* I686_CPU */
+		fnsave(&addr->sv_87);
+}
+
+static int
+npxdna_notset(struct cpu_info *ci)
+{
+	panic("npxdna vector not initialized");
+}
+
+int    (*npxdna_func)(struct cpu_info *) = npxdna_notset;
+
+#if 0
+static int
+npxdna_empty(struct cpu_info *ci)
+{
+
+	/* raise a DNA TRAP, math_emulate would take over eventually */
+	IPRINTF(("Emul"));
+	return 0;
+}
+
+
+/*
+ * This calls i8259_* directly, but currently we can count on systems
+ * having a i8259 compatible setup all the time. Maybe have to change
+ * that in the future.
+ */
+enum npx_type
+npxprobe1(bus_space_tag_t iot, bus_space_handle_t ioh, int irq)
+{
+	struct gate_descriptor save_idt_npxintr;
+	struct gate_descriptor save_idt_npxtrap;
+	enum npx_type rv = NPX_NONE;
+	u_long	save_eflags;
+	int control;
+	int status;
+	unsigned irqmask;
+
+	save_eflags = read_eflags();
+	disable_intr();
+	save_idt_npxintr = idt[NRSVIDT + irq];
+	save_idt_npxtrap = idt[16];
+	setgate(&idt[NRSVIDT + irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+
+	irqmask = i8259_setmask(~((1 << IRQ_SLAVE) | (1 << irq)));
+
+	/*
+	 * Partially reset the coprocessor, if any.  Some BIOS's don't reset
+	 * it after a warm boot.
+	 */
+	/* full reset on some systems, NOP on others */
+	bus_space_write_1(iot, ioh, 1, 0);
+	delay(1000);
+	/* clear BUSY# latch */
+	bus_space_write_1(iot, ioh, 0, 0);
+
+	/*
+	 * We set CR0 in locore to trap all ESC and WAIT instructions.
+	 * We have to turn off the CR0_EM bit temporarily while probing.
+	 */
+	lcr0(rcr0() & ~(CR0_EM|CR0_TS));
+	enable_intr();
+
+	/*
+	 * Finish resetting the coprocessor, if any.  If there is an error
+	 * pending, then we may get a bogus IRQ13, but probeintr() will handle
+	 * it OK.  Bogus halts have never been observed, but we enabled
+	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
+	 */
+	fninit();
+	delay(1000);		/* wait for any IRQ13 (fwait might hang) */
+
+	/*
+	 * Check for a status of mostly zero.
+	 */
+	status = 0x5a5a;
+	fnstsw(&status);
+	if ((status & 0xb8ff) == 0) {
+		/*
+		 * Good, now check for a proper control word.
+		 */
+		control = 0x5a5a;
+		fnstcw(&control);
+		if ((control & 0x1f3f) == 0x033f) {
+			/*
+			 * We have an npx, now divide by 0 to see if exception
+			 * 16 works.
+			 */
+			control &= ~(1 << 2);	/* enable divide by 0 trap */
+			fldcw(&control);
+			npx_traps_while_probing = npx_intrs_while_probing = 0;
+			fp_divide_by_0();
+			if (npx_traps_while_probing != 0) {
+				/*
+				 * Good, exception 16 works.
+				 */
+				rv = NPX_EXCEPTION;
+				i386_fpu_exception = 1;
+			} else if (npx_intrs_while_probing != 0) {
+				/*
+				 * Bad, we are stuck with IRQ13.
+				 */
+				rv = NPX_INTERRUPT;
+			} else {
+				/*
+				 * Worse, even IRQ13 is broken.  Use emulator.
+				 */
+				rv = NPX_BROKEN;
+			}
+		}
+	}
+
+	disable_intr();
+	lcr0(rcr0() | (CR0_EM|CR0_TS));
+
+	irqmask = i8259_setmask(irqmask);
+
+	idt[NRSVIDT + irq] = save_idt_npxintr;
+	idt_allocmap[NRSVIDT + irq] = 1;
+
+	idt[16] = save_idt_npxtrap;
+	write_eflags(save_eflags);
+
+	if ((rv == NPX_NONE) || (rv == NPX_BROKEN)) {
+		/* No FPU. Handle it here, npxattach won't be called */
+		npxdna_func = npxdna_empty;
+	}
+
+	return (rv);
+}
+#endif
+
+void npxinit(ci)
+	struct cpu_info *ci;
+{
+	lcr0(rcr0() & ~(CR0_EM|CR0_TS));
+	fninit();
+	if (npx586bug1(4195835, 3145727) != 0) {
+		i386_fpu_fdivbug = 1;
+		printf("%s: WARNING: Pentium FDIV bug detected!\n",
+		    ci->ci_dev->dv_xname);
+	}
+	lcr0(rcr0() | (CR0_TS));
+}
+
+/*
+ * Common attach routine.
+ */
+void
+npxattach(struct npx_softc *sc)
+{
+
+	npx_softc = sc;
+	npx_type = sc->sc_type;
+
+	i386_fpu_present = 1;
+
+#ifdef I686_CPU
+	if (i386_use_fxsave)
+		npxdna_func = npxdna_xmm;
+	else
+#endif /* I686_CPU */
+		npxdna_func = npxdna_s87;
+}
+
+/*
+ * Record the FPU state and reinitialize it all except for the control word.
+ * Then generate a SIGFPE.
+ *
+ * Reinitializing the state allows naive SIGFPE handlers to longjmp without
+ * doing any fixups.
+ *
+ * XXX there is currently no way to pass the full error state to signal
+ * handlers, and if this is a nested interrupt there is no way to pass even
+ * a status code!  So there is no way to have a non-naive SIGFPE handler.  At
+ * best a handler could do an fninit followed by an fldcw of a static value.
+ * fnclex would be of little use because it would leave junk on the FPU stack.
+ * Returning from the handler would be even less safe than usual because
+ * IRQ13 exception handling makes exceptions even less precise than usual.
+ */
+int
+npxintr(void *arg, struct intrframe iframe)
+{
+	struct cpu_info *ci = curcpu();
+	struct lwp *l = ci->ci_fpcurlwp;
+	union savefpu *addr;
+	struct intrframe *frame = &iframe;
+	struct npx_softc *sc;
+	ksiginfo_t ksi;
+
+	sc = npx_softc;
+
+	uvmexp.traps++;
+	IPRINTF(("%s: fp intr\n", ci->ci_dev->dv_xname));
+	panic("not supported\n");
+
+	/*
+	 * Clear the interrupt latch.
+	 */
+	bus_space_write_1(sc->sc_iot, sc->sc_ioh, 0, 0);
+
+	/*
+	 * If we're saving, ignore the interrupt.  The FPU will generate
+	 * another one when we restore the state later.
+	 */
+	if (ci->ci_fpsaving)
+		return (1);
+
+	if (l == NULL || npx_type == NPX_NONE) {
+		printf("npxintr: l = %p, curproc = %p, npx_type = %d\n",
+		    l, curproc, npx_type);
+		printf("npxintr: came from nowhere");
+		return 1;
+	}
+
+#ifdef DIAGNOSTIC
+	/*
+	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS
+	 * bit should be set, and we should have gotten a DNA exception.
+	 */
+	if (l != curlwp)
+		panic("npxintr: wrong process");
+#endif
+
+	/*
+	 * Find the address of fpcurproc's saved FPU state.  (Given the
+	 * invariant above, this is always the one in curpcb.)
+	 */
+	addr = &l->l_addr->u_pcb.pcb_savefpu;
+	/*
+	 * Save state.  This does an implied fninit.  It had better not halt
+	 * the CPU or we'll hang.
+	 */
+	fpu_save(addr);
+	fwait();
+        if (i386_use_fxsave) {
+		fldcw(&addr->sv_xmm.sv_env.en_cw);
+		/*
+		 * FNINIT doesn't affect MXCSR or the XMM registers;
+		 * no need to re-load MXCSR here.
+		 */
+        } else
+                fldcw(&addr->sv_87.sv_env.en_cw);
+	fwait();
+	/*
+	 * Remember the exception status word and tag word.  The current
+	 * (almost fninit'ed) fpu state is in the fpu and the exception
+	 * state just saved will soon be junk.  However, the implied fninit
+	 * doesn't change the error pointers or register contents, and we
+	 * preserved the control word and will copy the status and tag
+	 * words, so the complete exception state can be recovered.
+	 */
+        if