[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] [PATCH] [ppc] merge with upstream



235 files changed, 7718 insertions(+), 4608 deletions(-)
.hgignore                                                               |    2 
buildconfigs/linux-defconfig_xen0_ia64                                  |   38 
buildconfigs/linux-defconfig_xenU_ia64                                  |   30 
buildconfigs/linux-defconfig_xen_ia64                                   |   38 
docs/src/interface.tex                                                  |   29 
docs/src/user.tex                                                       |   32 
extras/mini-os/Makefile                                                 |   15 
extras/mini-os/include/mm.h                                             |   79 
extras/mini-os/include/types.h                                          |    7 
extras/mini-os/kernel.c                                                 |   11 
extras/mini-os/lib/printf.c                                             |    4 
extras/mini-os/lib/string.c                                             |    4 
extras/mini-os/mm.c                                                     |   71 
extras/mini-os/traps.c                                                  |   21 
extras/mini-os/x86_32.S                                                 |    8 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c                       |  266 
+-
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c                         |    1 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                            |   13 
linux-2.6-xen-sparse/arch/ia64/Kconfig                                  |   54 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c                           |    3 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre                      |   14 
linux-2.6-xen-sparse/arch/ia64/xen/Makefile                             |    6 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile                     |   24 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile                 |   12 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c                |   17 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c               |    9 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S                          |    4 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c                         |  550 
+++-
linux-2.6-xen-sparse/arch/ia64/xen/util.c                               |  115 
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c                          |   12 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S                           |  198 -
linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S                             |  586 
++--
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h                        |    2 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S                           |   21 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c                      |  257 
--
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c                     |   73 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                          |   26 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c                         |    2 
linux-2.6-xen-sparse/drivers/xen/Makefile                               |    1 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c                      |   12 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c                      |    4 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c                       |    2 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c                    |    6 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c                         |    2 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                        |    2 
linux-2.6-xen-sparse/drivers/xen/console/console.c                      |   32 
linux-2.6-xen-sparse/drivers/xen/core/Makefile                          |   11 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c                     |  185 +
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c                          |   31 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c                          |    5 
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c                |    3 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c                          |    9 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c                         |  215 -
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c                        |    3 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c                      |   58 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c                      |    7 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c                       |   31 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c                    |   91 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c                       |    2 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c                      |    4 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c                      |   26 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c                       |    6 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c                 |    8 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c                  |   12 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h         |    2 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h        |   28 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h                       |   84 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h                      |   34 
linux-2.6-xen-sparse/include/asm-ia64/page.h                            |   50 
linux-2.6-xen-sparse/include/asm-ia64/privop.h                          |   11 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h                      |   58 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h             |   63 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h                          |   42 
linux-2.6-xen-sparse/include/xen/net_driver_util.h                      |   48 
linux-2.6-xen-sparse/include/xen/xenbus.h                               |    8 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch                       |   18 
patches/linux-2.6.16.13/xen-hotplug.patch                               |   11 
tools/examples/network-bridge                                           |    2 
tools/examples/xmexample.vti                                            |    2 
tools/libxc/Makefile                                                    |    1 
tools/libxc/xc_acm.c                                                    |   33 
tools/libxc/xc_csched.c                                                 |   50 
tools/libxc/xc_ia64_stubs.c                                             |    6 
tools/libxc/xc_linux_build.c                                            |   57 
tools/libxc/xc_linux_restore.c                                          |  122 
tools/libxc/xc_load_elf.c                                               |   54 
tools/libxc/xc_private.c                                                |   22 
tools/libxc/xc_ptrace.c                                                 |  173 -
tools/libxc/xc_ptrace.h                                                 |    3 
tools/libxc/xc_ptrace_core.c                                            |    7 
tools/libxc/xc_tbuf.c                                                   |   56 
tools/libxc/xenctrl.h                                                   |   13 
tools/libxc/xg_private.h                                                |   10 
tools/python/xen/lowlevel/acm/acm.c                                     |   54 
tools/python/xen/lowlevel/xc/xc.c                                       |   68 
tools/python/xen/lowlevel/xs/xs.c                                       |   11 
tools/python/xen/xend/XendDomain.py                                     |   22 
tools/python/xen/xend/XendDomainInfo.py                                 |   24 
tools/python/xen/xend/balloon.py                                        |   11 
tools/python/xen/xend/image.py                                          |   27 
tools/python/xen/xend/server/SrvDomain.py                               |   14 
tools/python/xen/xend/xenstore/xstransact.py                            |   28 
tools/python/xen/xm/main.py                                             |   45 
tools/security/secpol_tool.c                                            |   32 
tools/tests/test_x86_emulator.c                                         |   67 
tools/xenstore/Makefile                                                 |    8 
tools/xenstore/xenstored_core.c                                         |    7 
tools/xenstore/xenstored_core.h                                         |    8 
tools/xenstore/xenstored_domain.c                                       |   37 
tools/xenstore/xenstored_linux.c                                        |   69 
tools/xenstore/xenstored_proc.h                                         |   27 
tools/xentrace/xentrace_format                                          |    6 
tools/xm-test/configure.ac                                              |    1 
tools/xm-test/ramdisk/bin/create_disk_image                             |    7 
tools/xm-test/tests/Makefile.am                                         |    7 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py      |   62 
tools/xm-test/tests/block-integrity/Makefile.am                         |   21 
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py |   32 
tools/xm-test/tests/network/03_network_local_tcp_pos.py                 |    4 
tools/xm-test/tests/network/04_network_local_udp_pos.py                 |    4 
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py                  |    2 
tools/xm-test/tests/network/07_network_dom0_udp_pos.py                  |    2 
tools/xm-test/tests/network/12_network_domU_tcp_pos.py                  |    2 
tools/xm-test/tests/network/13_network_domU_udp_pos.py                  |    2 
xen/arch/ia64/Rules.mk                                                  |   28 
xen/arch/ia64/asm-offsets.c                                             |    2 
xen/arch/ia64/linux-xen/setup.c                                         |   10 
xen/arch/ia64/linux-xen/smp.c                                           |   32 
xen/arch/ia64/linux-xen/unaligned.c                                     |    2 
xen/arch/ia64/tools/sparse-merge                                        |  144 +
xen/arch/ia64/vmx/pal_emul.c                                            |    6 
xen/arch/ia64/vmx/vlsapic.c                                             |    7 
xen/arch/ia64/vmx/vmmu.c                                                |   22 
xen/arch/ia64/vmx/vmx_entry.S                                           |   69 
xen/arch/ia64/vmx/vmx_init.c                                            |   24 
xen/arch/ia64/vmx/vmx_interrupt.c                                       |    6 
xen/arch/ia64/vmx/vmx_ivt.S                                             |  127 -
xen/arch/ia64/vmx/vmx_phy_mode.c                                        |    9 
xen/arch/ia64/vmx/vmx_process.c                                         |    2 
xen/arch/ia64/vmx/vmx_support.c                                         |    8 
xen/arch/ia64/vmx/vmx_vcpu.c                                            |    4 
xen/arch/ia64/vmx/vmx_virt.c                                            |   23 
xen/arch/ia64/vmx/vtlb.c                                                |   86 
xen/arch/ia64/xen/Makefile                                              |    1 
xen/arch/ia64/xen/dom0_ops.c                                            |   12 
xen/arch/ia64/xen/dom_fw.c                                              |   51 
xen/arch/ia64/xen/domain.c                                              |  687 
++++-
xen/arch/ia64/xen/efi_emul.c                                            |  180 +
xen/arch/ia64/xen/hypercall.c                                           |   98 
xen/arch/ia64/xen/hyperprivop.S                                         |  138 -
xen/arch/ia64/xen/ivt.S                                                 |   49 
xen/arch/ia64/xen/privop.c                                              |   27 
xen/arch/ia64/xen/process.c                                             |  301 
--
xen/arch/ia64/xen/regionreg.c                                           |   10 
xen/arch/ia64/xen/vcpu.c                                                |   68 
xen/arch/ia64/xen/vhpt.c                                                |  214 +
xen/arch/ia64/xen/xenasm.S                                              |  349 
--
xen/arch/ia64/xen/xenmisc.c                                             |    3 
xen/arch/ia64/xen/xensetup.c                                            |    3 
xen/arch/x86/dom0_ops.c                                                 |   21 
xen/arch/x86/domain.c                                                   |    2 
xen/arch/x86/domain_build.c                                             |    5 
xen/arch/x86/hvm/hvm.c                                                  |   22 
xen/arch/x86/hvm/i8254.c                                                |  405 
+--
xen/arch/x86/hvm/intercept.c                                            |   82 
xen/arch/x86/hvm/svm/intr.c                                             |   47 
xen/arch/x86/hvm/svm/svm.c                                              |   48 
xen/arch/x86/hvm/svm/vmcb.c                                             |   31 
xen/arch/x86/hvm/vmx/io.c                                               |   62 
xen/arch/x86/hvm/vmx/vmx.c                                              |   37 
xen/arch/x86/mm.c                                                       |  204 +
xen/arch/x86/shadow.c                                                   |    6 
xen/arch/x86/shadow32.c                                                 |    8 
xen/arch/x86/shadow_public.c                                            |    5 
xen/arch/x86/traps.c                                                    |    4 
xen/arch/x86/x86_emulate.c                                              |   81 
xen/common/Makefile                                                     |    1 
xen/common/acm_ops.c                                                    |  282 
+-
xen/common/elf.c                                                        |   49 
xen/common/grant_table.c                                                |   15 
xen/common/kernel.c                                                     |    5 
xen/common/sched_credit.c                                               | 1233 
++++++++++
xen/common/schedule.c                                                   |    5 
xen/common/trace.c                                                      |    6 
xen/include/asm-ia64/config.h                                           |   11 
xen/include/asm-ia64/dom_fw.h                                           |   14 
xen/include/asm-ia64/domain.h                                           |   26 
xen/include/asm-ia64/event.h                                            |    2 
xen/include/asm-ia64/flushtlb.h                                         |    9 
xen/include/asm-ia64/grant_table.h                                      |   33 
xen/include/asm-ia64/linux-xen/asm/pgalloc.h                            |    2 
xen/include/asm-ia64/linux-xen/asm/pgtable.h                            |   14 
xen/include/asm-ia64/linux-xen/asm/tlbflush.h                           |  119 
xen/include/asm-ia64/mm.h                                               |   10 
xen/include/asm-ia64/shadow.h                                           |   57 
xen/include/asm-ia64/tlbflush.h                                         |   37 
xen/include/asm-ia64/vcpu.h                                             |    8 
xen/include/asm-ia64/vhpt.h                                             |   18 
xen/include/asm-ia64/vmx_vcpu.h                                         |    2 
xen/include/asm-x86/domain.h                                            |   21 
xen/include/asm-x86/fixmap.h                                            |   10 
xen/include/asm-x86/hvm/domain.h                                        |    6 
xen/include/asm-x86/hvm/svm/intr.h                                      |    1 
xen/include/asm-x86/hvm/svm/svm.h                                       |    1 
xen/include/asm-x86/hvm/vcpu.h                                          |    3 
xen/include/asm-x86/hvm/vmx/vmx.h                                       |    1 
xen/include/asm-x86/hvm/vpit.h                                          |   67 
xen/include/asm-x86/string.h                                            |  162 -
xen/include/asm-x86/x86_emulate.h                                       |   66 
xen/include/public/acm_ops.h                                            |   54 
xen/include/public/arch-ia64.h                                          |  119 
xen/include/public/arch-x86_32.h                                        |   36 
xen/include/public/arch-x86_64.h                                        |   29 
xen/include/public/callback.h                                           |   10 
xen/include/public/dom0_ops.h                                           |  205 -
xen/include/public/event_channel.h                                      |   99 
xen/include/public/grant_table.h                                        |   30 
xen/include/public/hvm/ioreq.h                                          |   26 
xen/include/public/hvm/vmx_assist.h                                     |    5 
xen/include/public/io/blkif.h                                           |   12 
xen/include/public/io/netif.h                                           |   32 
xen/include/public/io/tpmif.h                                           |   19 
xen/include/public/io/xenbus.h                                          |   59 
xen/include/public/memory.h                                             |   48 
xen/include/public/nmi.h                                                |    5 
xen/include/public/physdev.h                                            |   45 
xen/include/public/sched.h                                              |   15 
xen/include/public/sched_ctl.h                                          |    5 
xen/include/public/vcpu.h                                               |   10 
xen/include/public/version.h                                            |   15 
xen/include/public/xen.h                                                |   47 
xen/include/public/xenoprof.h                                           |   15 
xen/include/xen/hypercall.h                                             |    2 
xen/include/xen/sched-if.h                                              |    2 
xen/include/xen/softirq.h                                               |   13 


# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Node ID f54d38cea8acaa870e6b73990fbff61fe4c3e2ac
# Parent  e7424645152709dfbacd30df4b996db736403408
# Parent  d5f98d23427a0d256b896fc63ccfd2c1f79e55ba
[ppc] merge with upstream
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>

diff -r e74246451527 -r f54d38cea8ac .hgignore
--- a/.hgignore Tue May 30 12:52:02 2006 -0500
+++ b/.hgignore Tue May 30 14:30:34 2006 -0500
@@ -14,7 +14,7 @@
 .*\.orig$
 .*\.rej$
 .*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
 ^[^/]*\.bz2$
 ^TAGS$
 ^dist/.*$
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64    Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen0_ia64    Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen0
-# Mon Mar 27 14:46:03 2006
+# Linux kernel version: 2.6.16.13-xen0
+# Mon May 22 14:46:31 2006
 #
 
 #
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1522,3 +1517,30 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64    Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xenU_ia64    Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xenU
-# Mon Mar 27 14:01:13 2006
+# Linux kernel version: 2.6.16.13-xenU
+# Mon May 22 15:05:32 2006
 #
 
 #
@@ -89,12 +89,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1386,3 +1381,22 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+CONFIG_XEN_UNPRIVILEGED_GUEST=y
+# CONFIG_XEN_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen_ia64
--- a/buildconfigs/linux-defconfig_xen_ia64     Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen_ia64     Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen
-# Mon Mar 27 14:36:21 2006
+# Linux kernel version: 2.6.16.13-xen
+# Mon May 22 14:15:20 2006
 #
 
 #
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1528,3 +1523,30 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac docs/src/interface.tex
--- a/docs/src/interface.tex    Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/interface.tex    Tue May 30 14:30:34 2006 -0500
@@ -205,30 +205,23 @@ implement timeout values when they block
 implement timeout values when they block.
 
 
-
-%% % akw: demoting this to a section -- not sure if there is any point
-%% % though, maybe just remove it.
-
-% KAF: Remove these random sections!
-\begin{comment}
 \section{Xen CPU Scheduling}
 
 Xen offers a uniform API for CPU schedulers.  It is possible to choose
 from a number of schedulers at boot and it should be easy to add more.
-The BVT, Atropos and Round Robin schedulers are part of the normal Xen
-distribution.  BVT provides proportional fair shares of the CPU to the
-running domains.  Atropos can be used to reserve absolute shares of
-the CPU for each domain.  Round-robin is provided as an example of
-Xen's internal scheduler API.
+The SEDF, BVT, and Credit schedulers are part of the normal Xen
+distribution.  BVT and SEDF will be going away and their use should be
+avoided once the credit scheduler has stabilized and become the default.
+The Credit scheduler provides proportional fair shares of the
+host's CPUs to the running domains. It does this while transparently
+load balancing runnable VCPUs across the whole system.
 
 \paragraph*{Note: SMP host support}
-Xen has always supported SMP host systems.  Domains are statically
-assigned to CPUs, either at creation time or when manually pinning to
-a particular CPU.  The current schedulers then run locally on each CPU
-to decide which of the assigned domains should be run there. The
-user-level control software can be used to perform coarse-grain
-load-balancing between CPUs.
-\end{comment}
+Xen has always supported SMP host systems. When using the credit scheduler,
+a domain's VCPUs will be dynamically moved across physical CPUs to maximise
+domain and system throughput. VCPUs can also be manually restricted to be
+mapped only on a subset of the host's physical CPUs, using the pinning
+mechanism.
 
 
 %% More information on the characteristics and use of these schedulers
diff -r e74246451527 -r f54d38cea8ac docs/src/user.tex
--- a/docs/src/user.tex Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/user.tex Tue May 30 14:30:34 2006 -0500
@@ -1093,6 +1093,36 @@ running domains in \xend's SXP configura
 
 You can get access to the console of a particular domain using 
 the \verb_# xm console_ command  (e.g.\ \verb_# xm console myVM_). 
+
+\subsection{Domain Scheduling Management Commands}
+
+The credit CPU scheduler automatically load balances guest VCPUs
+across all available physical CPUs on an SMP host. The user need
+not manually pin VCPUs to load balance the system. However, she
+can restrict which CPUs a particular VCPU may run on using
+the \path{xm vcpu-pin} command.
+
+Each guest domain is assigned a \path{weight} and a \path{cap}.
+
+A domain with a weight of 512 will get twice as much CPU as a
+domain with a weight of 256 on a contended host. Legal weights
+range from 1 to 65535 and the default is 256.
+
+The cap optionally fixes the maximum amount of CPU a guest will
+be able to consume, even if the host system has idle CPU cycles.
+The cap is expressed in percentage of one physical CPU: 100 is
+1 physical CPU, 50 is half a CPU, 400 is 4 CPUs, etc... The
+default, 0, means there is no upper cap.
+
+When you are running with the credit scheduler, you can check and
+modify your domains' weights and caps using the \path{xm sched-credit}
+command:
+
+\begin{tabular}{ll}
+\verb!xm sched-credit -d <domain>! & lists weight and cap \\
+\verb!xm sched-credit -d <domain> -w <weight>! & sets the weight \\
+\verb!xm sched-credit -d <domain> -c <cap>! & sets the cap
+\end{tabular}
 
 
 
@@ -1985,7 +2015,7 @@ editing \path{grub.conf}.
 \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
   pages (default 0).  
 \item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
-  current possibilities are `sedf' (default) and `bvt'.
+  current possibilities are `sedf' (default), `credit', and `bvt'.
 \item [ apic\_verbosity=debug,verbose ] Print more detailed
   information about local APIC and IOAPIC configuration.
 \item [ lapic ] Force use of local APIC even when left disabled by
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/Makefile   Tue May 30 14:30:34 2006 -0500
@@ -1,4 +1,5 @@ debug ?= y
 debug ?= y
+pae ?= n
 
 include $(CURDIR)/../../Config.mk
 
@@ -12,11 +13,17 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
 override CPPFLAGS := -Iinclude $(CPPFLAGS)
 ASFLAGS = -D__ASSEMBLY__
 
+LDLIBS =  -L. -lminios
 LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
 
 ifeq ($(TARGET_ARCH),x86_32)
 CFLAGS += -m32 -march=i686
 LDFLAGS += -m elf_i386
+endif
+
+ifeq ($(TARGET_ARCH)$(pae),x86_32y)
+CFLAGS  += -DCONFIG_X86_PAE=1
+ASFLAGS += -DCONFIG_X86_PAE=1
 endif
 
 ifeq ($(TARGET_ARCH),x86_64)
@@ -49,11 +56,11 @@ links:
 links:
        [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
 
-libminios.a: $(OBJS) $(HEAD)
-       ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+       $(AR) r libminios.a $(HEAD) $(OBJS)
 
-$(TARGET): links libminios.a $(HEAD)
-       $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+       $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
        gzip -f -9 -c $@.elf >$@.gz
 
 .PHONY: clean
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/mm.h       Tue May 30 14:30:34 2006 -0500
@@ -43,6 +43,8 @@
 
 #if defined(__i386__)
 
+#if !defined(CONFIG_X86_PAE)
+
 #define L2_PAGETABLE_SHIFT      22
 
 #define L1_PAGETABLE_ENTRIES    1024
@@ -50,6 +52,30 @@
 
 #define PADDR_BITS              32
 #define PADDR_MASK              (~0UL)
+
+#define UNMAPPED_PT_FRAMES        1
+#define PRIpte "08lx"
+typedef unsigned long pgentry_t;
+
+#else /* defined(CONFIG_X86_PAE) */
+
+#define L2_PAGETABLE_SHIFT      21
+#define L3_PAGETABLE_SHIFT      30
+
+#define L1_PAGETABLE_ENTRIES    512
+#define L2_PAGETABLE_ENTRIES    512
+#define L3_PAGETABLE_ENTRIES    4
+
+#define PADDR_BITS              44
+#define PADDR_MASK              ((1ULL << PADDR_BITS)-1)
+
+#define L2_MASK  ((1UL << L3_PAGETABLE_SHIFT) - 1)
+
+#define UNMAPPED_PT_FRAMES        2
+#define PRIpte "016llx"
+typedef uint64_t pgentry_t;
+
+#endif /* !defined(CONFIG_X86_PAE) */
 
 #elif defined(__x86_64__)
 
@@ -81,6 +107,10 @@
 #define L2_MASK  ((1UL << L3_PAGETABLE_SHIFT) - 1)
 #define L3_MASK  ((1UL << L4_PAGETABLE_SHIFT) - 1)
 
+#define UNMAPPED_PT_FRAMES        3
+#define PRIpte "016lx"
+typedef unsigned long pgentry_t;
+
 #endif
 
 #define L1_MASK  ((1UL << L2_PAGETABLE_SHIFT) - 1)
@@ -90,9 +120,11 @@
   (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
 #define l2_table_offset(_a) \
   (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
 #define l3_table_offset(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#endif
+#if defined(__x86_64__)
 #define l4_table_offset(_a) \
   (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
 #endif
@@ -111,14 +143,21 @@
 #if defined(__i386__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
+#if defined(CONFIG_X86_PAE)
+#define L3_PROT (_PAGE_PRESENT)
+#endif /* CONFIG_X86_PAE */
 #elif defined(__x86_64__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#endif
-
+#endif /* __i386__ || __x86_64__ */
+
+#ifndef CONFIG_X86_PAE
 #define PAGE_SIZE       (1UL << L1_PAGETABLE_SHIFT)
+#else
+#define PAGE_SIZE       (1ULL << L1_PAGETABLE_SHIFT)
+#endif
 #define PAGE_SHIFT      L1_PAGETABLE_SHIFT
 #define PAGE_MASK       (~(PAGE_SIZE-1))
 
@@ -129,23 +168,31 @@
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
 extern unsigned long *phys_to_machine_mapping;
 extern char _text, _etext, _edata, _end;
 #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
-static __inline__ unsigned long phys_to_machine(unsigned long phys)
-{
-    unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
-    machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
-    return machine;
-}
-
+static __inline__ maddr_t phys_to_machine(paddr_t phys)
+{
+       maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+       machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+       return machine;
+}
 
 #define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
-static __inline__ unsigned long machine_to_phys(unsigned long machine)
-{
-    unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
-    phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
-    return phys;
+static __inline__ paddr_t machine_to_phys(maddr_t machine)
+{
+       paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+       phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+       return phys;
 }
 
 #define VIRT_START                 ((unsigned long)&_text)
@@ -155,7 +202,7 @@ static __inline__ unsigned long machine_
 
 #define virt_to_pfn(_virt)         (PFN_DOWN(to_phys(_virt)))
 #define mach_to_virt(_mach)        (to_virt(machine_to_phys(_mach)))
-#define mfn_to_virt(_mfn)          (mach_to_virt(_mfn << PAGE_SHIFT))
+#define mfn_to_virt(_mfn)          (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT))
 #define pfn_to_virt(_pfn)          (to_virt(_pfn << PAGE_SHIFT))
 
 /* Pagetable walking. */
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h    Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/types.h    Tue May 30 14:30:34 2006 -0500
@@ -43,14 +43,19 @@ typedef unsigned long long  u_quad_t;
 typedef unsigned long long  u_quad_t;
 typedef unsigned int        uintptr_t;
 
+#if !defined(CONFIG_X86_PAE)
 typedef struct { unsigned long pte_low; } pte_t;
+#else
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+#endif /* CONFIG_X86_PAE */
+
 #elif defined(__x86_64__)
 typedef long                quad_t;
 typedef unsigned long       u_quad_t;
 typedef unsigned long       uintptr_t;
 
 typedef struct { unsigned long pte; } pte_t;
-#endif
+#endif /* __i386__ || __x86_64__ */
 
 typedef  u8 uint8_t;
 typedef  s8 int8_t;
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/kernel.c   Tue May 30 14:30:34 2006 -0500
@@ -63,7 +63,12 @@ void failsafe_callback(void);
 
 extern char shared_info[PAGE_SIZE];
 
+#if !defined(CONFIG_X86_PAE)
 #define __pte(x) ((pte_t) { (x) } )
+#else
+#define __pte(x) ({ unsigned long long _x = (x);        \
+    ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+#endif
 
 static shared_info_t *map_shared_info(unsigned long pa)
 {
@@ -71,7 +76,7 @@ static shared_info_t *map_shared_info(un
         (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) )
     {
         printk("Failed to map shared_info!!\n");
-        *(int*)0=0;
+        do_exit();
     }
     return (shared_info_t *)shared_info;
 }
@@ -126,6 +131,10 @@ void start_kernel(start_info_t *si)
     /* WARN: don't do printk before here, it uses information from
        shared_info. Use xprintk instead. */
     memcpy(&start_info, si, sizeof(*si));
+    
+    /* set up minimal memory infos */
+    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+
     /* Grab the shared_info pointer and put it in a safe place. */
     HYPERVISOR_shared_info = map_shared_info(start_info.shared_info);
 
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/printf.c       Tue May 30 14:30:34 2006 -0500
@@ -53,6 +53,8 @@
  *
  * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char 
        return i;
 }
 
-
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/string.c       Tue May 30 14:30:34 2006 -0500
@@ -17,6 +17,8 @@
  * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
  ****************************************************************************
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
         }
         return NULL;
 }
+
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/mm.c       Tue May 30 14:30:34 2006 -0500
@@ -368,7 +368,7 @@ void new_pt_frame(unsigned long *pt_pfn,
 void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, 
                                 unsigned long offset, unsigned long level)
 {   
-    unsigned long *tab = (unsigned long *)start_info.pt_base;
+    pgentry_t *tab = (pgentry_t *)start_info.pt_base;
     unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
     unsigned long prot_e, prot_t, pincmd;
     mmu_update_t mmu_updates[1];
@@ -382,40 +382,45 @@ void new_pt_frame(unsigned long *pt_pfn,
        as a page table page */
     memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE);  
  
-    if (level == L1_FRAME)
-    {
+    switch ( level )
+    {
+    case L1_FRAME:
          prot_e = L1_PROT;
          prot_t = L2_PROT;
          pincmd = MMUEXT_PIN_L1_TABLE;
-    }
-#if (defined __x86_64__)
-    else if (level == L2_FRAME)
-    {
+         break;
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
+    case L2_FRAME:
          prot_e = L2_PROT;
          prot_t = L3_PROT;
          pincmd = MMUEXT_PIN_L2_TABLE;
-    }
-    else if (level == L3_FRAME)
-    {
+         break;
+#endif
+#if defined(__x86_64__)
+    case L3_FRAME:
          prot_e = L3_PROT;
          prot_t = L4_PROT;
          pincmd = MMUEXT_PIN_L3_TABLE;
-    }
-#endif
-    else
-    {
+         break;
+#endif
+    default:
          printk("new_pt_frame() called with invalid level number %d\n", level);
          do_exit();
-    }    
+         break;
+    }
 
     /* Update the entry */
-#if (defined __x86_64__)
+#if defined(__x86_64__)
     tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
     tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
 #endif
-    mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + 
-                         sizeof(void *)* l1_table_offset(pt_page);
-    mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | 
+#if defined(CONFIG_X86_PAE)
+    tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
+#endif
+
+    mmu_updates[0].ptr = ((pgentry_t)tab[l2_table_offset(pt_page)] & 
PAGE_MASK) + 
+                         sizeof(pgentry_t) * l1_table_offset(pt_page);
+    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | 
                          (prot_e & ~_PAGE_RW);
     if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
     {
@@ -434,8 +439,8 @@ void new_pt_frame(unsigned long *pt_pfn,
 
     /* Now fill the new page table page with entries.
        Update the page directory as well. */
-    mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset;
-    mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
+    mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
+    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
     if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) 
     {            
        printk("ERROR: mmu_update failed\n");
@@ -450,16 +455,13 @@ void build_pagetable(unsigned long *star
     unsigned long start_address, end_address;
     unsigned long pfn_to_map, pt_pfn = *start_pfn;
     static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
-    unsigned long *tab = (unsigned long *)start_info.pt_base;
+    pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
     unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
-    unsigned long page, offset;
+    unsigned long offset;
     int count = 0;
 
-#if defined(__x86_64__)
-    pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES;
-#else
-    pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
-#endif
+    pfn_to_map = (start_info.nr_pt_frames - UNMAPPED_PT_FRAMES) * 
L1_PAGETABLE_ENTRIES;
+
     start_address = (unsigned long)pfn_to_virt(pfn_to_map);
     end_address = (unsigned long)pfn_to_virt(*max_pfn);
     
@@ -468,7 +470,7 @@ void build_pagetable(unsigned long *star
 
     while(start_address < end_address)
     {
-        tab = (unsigned long *)start_info.pt_base;
+        tab = (pgentry_t *)start_info.pt_base;
         mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
 
 #if defined(__x86_64__)
@@ -480,6 +482,8 @@ void build_pagetable(unsigned long *star
         page = tab[offset];
         mfn = pte_to_mfn(page);
         tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
         offset = l3_table_offset(start_address);
         /* Need new L2 pt frame */
         if(!(start_address & L2_MASK)) 
@@ -498,9 +502,9 @@ void build_pagetable(unsigned long *star
         mfn = pte_to_mfn(page);
         offset = l1_table_offset(start_address);
 
-        mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset;
+        mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
         mmu_updates[count].val = 
-            pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+            (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
         count++;
         if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn)
         {
@@ -557,9 +561,6 @@ void init_mm(void)
     printk("  stack start:  %p\n", &stack);
     printk("  _end:         %p\n", &_end);
 
-    /* set up minimal memory infos */
-    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
-   
     /* First page follows page table pages and 3 more pages (store page etc) */
     start_pfn = PFN_UP(to_phys(start_info.pt_base)) + 
                 start_info.nr_pt_frames + 3;
@@ -569,7 +570,7 @@ void init_mm(void)
     printk("  max_pfn:      %lx\n", max_pfn);
 
     build_pagetable(&start_pfn, &max_pfn);
-    
+
     /*
      * now we can initialise the page allocator
      */
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/traps.c
--- a/extras/mini-os/traps.c    Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/traps.c    Tue May 30 14:30:34 2006 -0500
@@ -95,25 +95,26 @@ DO_ERROR(18, "machine check", machine_ch
 
 void page_walk(unsigned long virt_address)
 {
-        unsigned long *tab = (unsigned long *)start_info.pt_base;
-        unsigned long addr = virt_address, page;
+        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
+        unsigned long addr = virt_address;
         printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, 
start_info.pt_base);
     
 #if defined(__x86_64__)
         page = tab[l4_table_offset(addr)];
-        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk(" L4 = %p (%p)  [offset = %lx]\n", page, tab, 
l4_table_offset(addr));
-
+        tab = pte_to_virt(page);
+        printk(" L4 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l4_table_offset(addr));
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
         page = tab[l3_table_offset(addr)];
-        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk("  L3 = %p (%p)  [offset = %lx]\n", page, tab, 
l3_table_offset(addr));
+        tab = pte_to_virt(page);
+        printk("  L3 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l3_table_offset(addr));
 #endif
         page = tab[l2_table_offset(addr)];
-        tab =  to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk("   L2 = %p (%p)  [offset = %lx]\n", page, tab, 
l2_table_offset(addr));
+        tab = pte_to_virt(page);
+        printk("   L2 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l2_table_offset(addr));
         
         page = tab[l1_table_offset(addr)];
-        printk("    L1 = %p (%p)  [offset = %lx]\n", page, tab, 
l1_table_offset(addr));
+        printk("    L1 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l1_table_offset(addr));
 
 }
 
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/x86_32.S
--- a/extras/mini-os/x86_32.S   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/x86_32.S   Tue May 30 14:30:34 2006 -0500
@@ -4,9 +4,15 @@
 .section __xen_guest
        .ascii  "GUEST_OS=Mini-OS"
        .ascii  ",XEN_VER=xen-3.0"
+       .ascii  ",VIRT_BASE=0xc0000000" /* &_text from minios_x86_32.lds */
+       .ascii  ",ELF_PADDR_OFFSET=0xc0000000"
        .ascii  ",HYPERCALL_PAGE=0x2"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes"
+#else
+       .ascii  ",PAE=no"
+#endif
        .ascii  ",LOADER=generic"
-       .ascii  ",PT_MODE_WRITABLE"
        .byte   0
 .text
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 14:30:34 
2006 -0500
@@ -70,9 +70,6 @@
 
 /* Forward Declaration. */
 void __init find_max_pfn(void);
-
-/* Allows setting of maximum possible memory size  */
-static unsigned long xen_override_max_pfn;
 
 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
 static struct notifier_block xen_panic_block = {
@@ -399,6 +396,26 @@ start_info_t *xen_start_info;
 start_info_t *xen_start_info;
 EXPORT_SYMBOL(xen_start_info);
 
+static void __init add_memory_region(unsigned long long start,
+                                  unsigned long long size, int type)
+{
+       int x;
+
+       if (!efi_enabled) {
+                       x = e820.nr_map;
+
+               if (x == E820MAX) {
+                   printk(KERN_ERR "Ooops! Too many entries in the memory 
map!\n");
+                   return;
+               }
+
+               e820.map[x].addr = start;
+               e820.map[x].size = size;
+               e820.map[x].type = type;
+               e820.nr_map++;
+       }
+} /* add_memory_region */
+
 static void __init limit_regions(unsigned long long size)
 {
        unsigned long long current_addr = 0;
@@ -442,27 +459,20 @@ static void __init limit_regions(unsigne
                }
                return;
        }
-}
-
-static void __init add_memory_region(unsigned long long start,
-                                  unsigned long long size, int type)
-{
-       int x;
-
-       if (!efi_enabled) {
-                       x = e820.nr_map;
-
-               if (x == E820MAX) {
-                   printk(KERN_ERR "Ooops! Too many entries in the memory 
map!\n");
-                   return;
-               }
-
-               e820.map[x].addr = start;
-               e820.map[x].size = size;
-               e820.map[x].type = type;
-               e820.nr_map++;
-       }
-} /* add_memory_region */
+#ifdef CONFIG_XEN
+       if (i==e820.nr_map && current_addr < size) {
+               /*
+                 * The e820 map finished before our requested size so
+                 * extend the final entry to the requested address.
+                 */
+               --i;
+               if (e820.map[i].type == E820_RAM)
+                       e820.map[i].size -= current_addr - size;
+               else
+                       add_memory_region(current_addr, size - current_addr, 
E820_RAM);
+       }
+#endif
+}
 
 #define E820_DEBUG     1
 
@@ -492,7 +502,6 @@ static void __init print_memory_map(char
        }
 }
 
-#if 0
 /*
  * Sanitize the BIOS e820 map.
  *
@@ -680,9 +689,13 @@ static int __init sanitize_e820_map(stru
  */
 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 {
+#ifndef CONFIG_XEN
        /* Only one memory region (or negative)? Ignore it */
        if (nr_map < 2)
                return -1;
+#else
+       BUG_ON(nr_map < 1);
+#endif
 
        do {
                unsigned long long start = biosmap->addr;
@@ -694,6 +707,7 @@ static int __init copy_e820_map(struct e
                if (start > end)
                        return -1;
 
+#ifndef CONFIG_XEN
                /*
                 * Some BIOSes claim RAM in the 640k - 1M region.
                 * Not right. Fix it up.
@@ -708,11 +722,11 @@ static int __init copy_e820_map(struct e
                                size = end - start;
                        }
                }
+#endif
                add_memory_region(start, size, type);
        } while (biosmap++,--nr_map);
        return 0;
 }
-#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -785,13 +799,8 @@ static void __init parse_cmdline_early (
                                unsigned long long mem_size;
  
                                mem_size = memparse(from+4, &from);
-#if 0
                                limit_regions(mem_size);
                                userdef=1;
-#else
-                               xen_override_max_pfn =
-                                       (unsigned long)(mem_size>>PAGE_SHIFT);
-#endif
                        }
                }
 
@@ -984,7 +993,6 @@ static void __init parse_cmdline_early (
        }
 }
 
-#if 0 /* !XEN */
 /*
  * Callback for efi_memory_walk.
  */
@@ -1036,21 +1044,6 @@ void __init find_max_pfn(void)
                memory_present(0, start, end);
        }
 }
-#else
-/* We don't use the fake e820 because we need to respond to user override. */
-void __init find_max_pfn(void)
-{
-       if (xen_override_max_pfn == 0) {
-               max_pfn = xen_start_info->nr_pages;
-               /* Default 8MB slack (to balance backend allocations). */
-               max_pfn += 8 << (20 - PAGE_SHIFT);
-       } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
-               max_pfn = xen_override_max_pfn;
-       } else {
-               max_pfn = xen_start_info->nr_pages;
-       }
-}
-#endif /* XEN */
 
 /*
  * Determine low and high memory ranges:
@@ -1158,6 +1151,15 @@ static void __init register_bootmem_low_
                 */
                last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
 
+#ifdef CONFIG_XEN
+               /*
+                 * Truncate to the number of actual pages currently
+                 * present.
+                 */
+               if (last_pfn > xen_start_info->nr_pages)
+                       last_pfn = xen_start_info->nr_pages;
+#endif
+
                if (last_pfn > max_low_pfn)
                        last_pfn = max_low_pfn;
 
@@ -1351,83 +1353,33 @@ void __init remapped_pgdat_init(void)
  * and also for regions reported as reserved by the e820.
  */
 static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource 
*data_resource)
+legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
+                           struct resource *code_resource,
+                           struct resource *data_resource)
 {
        int i;
-#ifdef CONFIG_XEN
-       dom0_op_t op;
-       struct dom0_memory_map_entry *map;
-       unsigned long gapstart, gapsize;
-       unsigned long long last;
-#endif
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        probe_roms();
 #endif
 
-#ifdef CONFIG_XEN
-       map = alloc_bootmem_low_pages(PAGE_SIZE);
-       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
-       set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
-       op.u.physical_memory_map.max_map_entries =
-               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
-       BUG_ON(HYPERVISOR_dom0_op(&op));
-
-       last = 0x100000000ULL;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-
-       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
+       for (i = 0; i < nr_map; i++) {
                struct resource *res;
-
-               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
-                       gapsize = last - map[i].end;
-                       gapstart = map[i].end;
-               }
-               if (map[i].start < last)
-                       last = map[i].start;
-
-               if (map[i].end > 0x100000000ULL)
+               if (e820[i].addr + e820[i].size > 0x100000000ULL)
                        continue;
                res = alloc_bootmem_low(sizeof(struct resource));
-               res->name = map[i].is_ram ? "System RAM" : "reserved";
-               res->start = map[i].start;
-               res->end = map[i].end - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-               request_resource(&iomem_resource, res);
-       }
-
-       free_bootmem(__pa(map), PAGE_SIZE);
-
-       /*
-        * Start allocating dynamic PCI memory a bit into the gap,
-        * aligned up to the nearest megabyte.
-        *
-        * Question: should we try to pad it up a bit (do something
-        * like " + (gapsize >> 3)" in there too?). We now have the
-        * technology.
-        */
-       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
-
-       printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
-               pci_mem_start, gapstart, gapsize);
-#else
-       for (i = 0; i < e820.nr_map; i++) {
-               struct resource *res;
-               if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
-                       continue;
-               res = alloc_bootmem_low(sizeof(struct resource));
-               switch (e820.map[i].type) {
+               switch (e820[i].type) {
                case E820_RAM:  res->name = "System RAM"; break;
                case E820_ACPI: res->name = "ACPI Tables"; break;
                case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
                default:        res->name = "reserved";
                }
-               res->start = e820.map[i].addr;
-               res->end = res->start + e820.map[i].size - 1;
+               res->start = e820[i].addr;
+               res->end = res->start + e820[i].size - 1;
                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
-               if (e820.map[i].type == E820_RAM) {
+#ifndef CONFIG_XEN
+               if (e820[i].type == E820_RAM) {
                        /*
                         *  We don't know which RAM region contains kernel data,
                         *  so we try it repeatedly and let the resource manager
@@ -1439,38 +1391,21 @@ legacy_init_iomem_resources(struct resou
                        request_resource(res, &crashk_res);
 #endif
                }
-       }
-#endif
-}
-
-/*
- * Request address space for all standard resources
- */
-static void __init register_memory(void)
-{
-#ifndef CONFIG_XEN
+#endif
+       }
+}
+
+/*
+ * Locate a unused range of the physical address space below 4G which
+ * can be used for PCI mappings.
+ */
+static void __init
+e820_setup_gap(struct e820entry *e820, int nr_map)
+{
        unsigned long gapstart, gapsize, round;
        unsigned long long last;
-#endif
-       int           i;
-
-       /* Nothing to do if not running in dom0. */
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return;
-
-       if (efi_enabled)
-               efi_initialize_iomem_resources(&code_resource, &data_resource);
-       else
-               legacy_init_iomem_resources(&code_resource, &data_resource);
-
-       /* EFI systems may still have VGA */
-       request_resource(&iomem_resource, &video_ram_resource);
-
-       /* request I/O space for devices used on all i[345]86 PCs */
-       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
-               request_resource(&ioport_resource, &standard_io_resources[i]);
-
-#ifndef CONFIG_XEN
+       int i;
+
        /*
         * Search for the bigest gap in the low 32 bits of the e820
         * memory space.
@@ -1478,10 +1413,10 @@ static void __init register_memory(void)
        last = 0x100000000ull;
        gapstart = 0x10000000;
        gapsize = 0x400000;
-       i = e820.nr_map;
+       i = nr_map;
        while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
+               unsigned long long start = e820[i].addr;
+               unsigned long long end = start + e820[i].size;
 
                /*
                 * Since "last" is at most 4GB, we know we'll
@@ -1511,6 +1446,53 @@ static void __init register_memory(void)
 
        printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
                pci_mem_start, gapstart, gapsize);
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(void)
+{
+#ifdef CONFIG_XEN
+       struct e820entry *machine_e820;
+       struct xen_memory_map memmap;
+#endif
+       int           i;
+
+       /* Nothing to do if not running in dom0. */
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
+               return;
+
+#ifdef CONFIG_XEN
+       machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, machine_e820);
+
+       BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
+
+       legacy_init_iomem_resources(machine_e820, memmap.nr_entries,
+                                   &code_resource, &data_resource);
+#else
+       if (efi_enabled)
+               efi_initialize_iomem_resources(&code_resource, &data_resource);
+       else
+               legacy_init_iomem_resources(e820.map, e820.nr_map,
+                                           &code_resource, &data_resource);
+#endif
+
+       /* EFI systems may still have VGA */
+       request_resource(&iomem_resource, &video_ram_resource);
+
+       /* request I/O space for devices used on all i[345]86 PCs */
+       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+               request_resource(&ioport_resource, &standard_io_resources[i]);
+
+#ifdef CONFIG_XEN
+       e820_setup_gap(machine_e820, memmap.nr_entries);
+       free_bootmem(__pa(machine_e820), PAGE_SIZE);
+#else
+       e820_setup_gap(e820.map, e820.nr_map);
 #endif
 }
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c   Tue May 30 14:30:34 
2006 -0500
@@ -191,6 +191,7 @@ swiotlb_init(void)
        if (swiotlb_force == 1) {
                swiotlb = 1;
        } else if ((swiotlb_force != -1) &&
+                  is_running_on_xen() &&
                   (xen_start_info->flags & SIF_INITDOMAIN)) {
                /* Domain 0 always has a swiotlb. */
                ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Tue May 30 14:30:34 
2006 -0500
@@ -228,6 +228,12 @@ static inline int page_kills_ppro(unsign
        return 0;
 }
 
+#else
+
+#define page_kills_ppro(p)     0
+
+#endif
+
 extern int is_available_memory(efi_memory_desc_t *);
 
 int page_is_ram(unsigned long pagenr)
@@ -268,13 +274,6 @@ int page_is_ram(unsigned long pagenr)
        }
        return 0;
 }
-
-#else /* CONFIG_XEN */
-
-#define page_kills_ppro(p)     0
-#define page_is_ram(p)         1
-
-#endif
 
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue May 30 14:30:34 2006 -0500
@@ -51,7 +51,7 @@ config GENERIC_IOMAP
        default y
 
 config XEN
-       bool
+       bool "Xen hypervisor support"
        default y
        help
          Enable Xen hypervisor support.  Resulting kernel runs
@@ -60,34 +60,9 @@ config ARCH_XEN
 config ARCH_XEN
        bool
        default y
+       depends on XEN
        help
          TEMP ONLY. Needs to be on for drivers/xen to build.
-
-config XEN_PRIVILEGED_GUEST
-       bool "Privileged Guest"
-       default n
-       help
-         Used in drivers/xen/privcmd.c.  Should go away?
-
-config XEN_BLKDEV_GRANT
-       depends on XEN
-       bool
-       default y
-
-config XEN_BLKDEV_FRONTEND
-       depends on XEN
-       bool
-       default y
-
-config XEN_BACKEND
-       depends on XEN
-       bool
-       default y
-
-config XEN_BLKDEV_BACKEND
-       depends on XEN && XEN_BACKEND
-       bool
-       default y
 
 config XEN_IA64_DOM0_VP
        bool "dom0 vp model"
@@ -102,18 +77,6 @@ config XEN_IA64_DOM0_NON_VP
        default y
        help
          dom0 P=M model
-
-config XEN_SYSFS
-       bool "Export Xen attributes in sysfs"
-       depends on XEN && SYSFS
-       default y
-       help
-               Xen hypervisor attributes will show up under /sys/hypervisor/.
-
-config XEN_INTERFACE_VERSION
-       hex
-       depends on XEN
-       default 0x00030202
 
 config SCHED_NO_NO_OMIT_FRAME_POINTER
        bool
@@ -532,3 +495,16 @@ source "security/Kconfig"
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+# override default values of drivers/xen/Kconfig
+if !XEN_IA64_DOM0_VP
+config HAVE_ARCH_ALLOC_SKB
+        bool
+        default n
+
+config HAVE_ARCH_DEV_ALLOC_SKB
+        bool
+        default n
+endif
+
+source "drivers/xen/Kconfig"
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Tue May 30 14:30:34 
2006 -0500
@@ -514,6 +514,9 @@ setup_arch (char **cmdline_p)
 #ifdef CONFIG_XEN
        if (running_on_xen) {
                extern shared_info_t *HYPERVISOR_shared_info;
+               extern int xen_init (void);
+
+               xen_init ();
 
                /* xen_start_info isn't setup yet, get the flags manually */
                if (HYPERVISOR_shared_info->arch.flags & SIF_INITDOMAIN) {
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre
--- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Tue May 30 
14:30:34 2006 -0500
@@ -6,20 +6,6 @@
 # for building (as all files in mv'd directories are thought by hg
 # to have been deleted).  I don't know how to avoid this right now,
 # but if someone has a better way, I'm all ears
-
-function try_to_mv() {
-       if [ ! -e $2 ]
-       then
-               mv $1 $2
-       fi
-}
-
-try_to_mv mm/Kconfig mm/Kconfig.xen-x86
-
-# need to grab a couple of xen-modified files for generic_page_range and
-# typedef pte_fn_t which are used by driver/xen blkif
-#ln -sf ../mm.xen-x86/memory.c mm/
-#ln -sf ../linux.xen-x86/mm.h include/linux/
 
 #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
 ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile       Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile       Tue May 30 14:30:34 
2006 -0500
@@ -2,7 +2,7 @@
 # Makefile for Xen components
 #
 
-obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o 
xenconsole.o xen_ksyms.o
+obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o 
xenconsole.o
 
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o
-pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
\ No newline at end of file
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o util.o
+pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Tue May 30 
14:30:34 2006 -0500
@@ -1,20 +1,22 @@
 
+ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 obj-y   += util.o
+endif
 
 obj-y  += core/
+#obj-y += char/
 obj-y  += console/
 obj-y  += evtchn/
-#obj-y += balloon/
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/
 obj-y  += privcmd/
-obj-y  += blkback/
-#obj-y += netback/
-obj-y  += blkfront/
 obj-y  += xenbus/
-#obj-y += netfront/
-#obj-$(CONFIG_XEN_PRIVILEGED_GUEST)    += privcmd/
-#obj-$(CONFIG_XEN_BLKDEV_BACKEND)      += blkback/
-#obj-$(CONFIG_XEN_NETDEV_BACKEND)      += netback/
-#obj-$(CONFIG_XEN_BLKDEV_FRONTEND)     += blkfront/
-#obj-$(CONFIG_XEN_NETDEV_FRONTEND)     += netfront/
-#obj-$(CONFIG_XEN_BLKDEV_TAP)          += blktap/
 
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
+obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
+obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
+obj-$(CONFIG_XEN_TPMDEV_FRONTEND)      += tpmfront/
+obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Tue May 30 
14:30:34 2006 -0500
@@ -1,14 +1,6 @@
 #
 # Makefile for the linux kernel.
 #
-
-XENARCH        := $(subst ",,$(CONFIG_XENARCH))
-
-CPPFLAGS_vmlinux.lds += -U$(XENARCH)
-
-$(obj)/vmlinux.lds.S:
-       @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@
-
 
 obj-y   := gnttab.o features.o
 obj-$(CONFIG_PROC_FS) += xen_proc.o
@@ -16,8 +8,10 @@ ifeq ($(ARCH),ia64)
 ifeq ($(ARCH),ia64)
 obj-y   += evtchn_ia64.o
 obj-y   += xenia64_init.o
+ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
+obj-$(CONFIG_NET)     += skbuff.o
+endif
 else
-extra-y += vmlinux.lds
 obj-y   += reboot.o evtchn.o fixup.o 
 obj-$(CONFIG_SMP)     += smp.o         # setup_profiling_timer def'd in ia64
 obj-$(CONFIG_NET)     += skbuff.o      # until networking is up on ia64
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c  Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c  Tue May 30 
14:30:34 2006 -0500
@@ -246,25 +246,14 @@ static struct irqaction evtchn_irqaction
        .name =         "xen-event-channel"
 };
 
-int evtchn_irq = 0xe9;
+static int evtchn_irq = 0xe9;
 void __init evtchn_init(void)
 {
     shared_info_t *s = HYPERVISOR_shared_info;
-    vcpu_info_t   *vcpu_info = &s->vcpu_info[smp_processor_id()];
-
-#if 0
-    int ret;
-    irq = assign_irq_vector(AUTO_ASSIGN);
-    ret = request_irq(irq, evtchn_interrupt, 0, "xen-event-channel", NULL);
-    if (ret < 0)
-    {
-       printk("xen-event-channel unable to get irq %d (%d)\n", irq, ret);
-       return;
-    }
-#endif
+
     register_percpu_irq(evtchn_irq, &evtchn_irqaction);
 
-    vcpu_info->arch.evtchn_vector = evtchn_irq;
+    s->arch.evtchn_vector = evtchn_irq;
     printk("xen-event-channel using irq %d\n", evtchn_irq);
 
     spin_lock_init(&irq_mapping_update_lock);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 
14:30:34 2006 -0500
@@ -11,17 +11,20 @@ shared_info_t *HYPERVISOR_shared_info = 
 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
 EXPORT_SYMBOL(HYPERVISOR_shared_info);
 
-static int initialized;
 start_info_t *xen_start_info;
+
+int running_on_xen;
+EXPORT_SYMBOL(running_on_xen);
 
 int xen_init(void)
 {
+       static int initialized;
        shared_info_t *s = HYPERVISOR_shared_info;
 
        if (initialized)
                return running_on_xen ? 0 : -1;
 
-       if (!running_on_xen)
+       if (!is_running_on_xen())
                return -1;
 
        xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
@@ -35,6 +38,7 @@ int xen_init(void)
        return 0;
 }
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
 /* We just need a range of legal va here, though finally identity
  * mapped one is instead used for gnttab mapping.
  */
@@ -47,6 +51,7 @@ unsigned long alloc_empty_foreign_map_pa
 
        return (unsigned long)vma->addr;
 }
+#endif
 
 #if 0
 /* These should be define'd but some drivers use them without
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Tue May 30 14:30:34 
2006 -0500
@@ -247,7 +247,7 @@ 1:  mov r8=r32
        XEN_RESTORE_PSR_IC
        ;;
        br.ret.sptk.many rp
-END(xen_set_rr)
+END(xen_set_kr)
 
 GLOBAL_ENTRY(xen_fc)
        movl r8=running_on_xen;;
@@ -345,7 +345,7 @@ GLOBAL_ENTRY(xen_send_ipi)
 GLOBAL_ENTRY(xen_send_ipi)
         mov r14=r32
         mov r15=r33
-        mov r2=0x380
+        mov r2=0x400
         break 0x1000
         ;;
         br.ret.sptk.many rp
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 30 14:30:34 
2006 -0500
@@ -23,18 +23,56 @@
 //#include <linux/kernel.h>
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
+#include <linux/vmalloc.h>
 #include <asm/page.h>
 #include <asm/hypervisor.h>
 #include <asm/hypercall.h>
-
-#define XEN_IA64_BALLOON_IS_NOT_YET
-#ifndef XEN_IA64_BALLOON_IS_NOT_YET
+#include <xen/interface/memory.h>
 #include <xen/balloon.h>
-#else
-#define balloon_lock(flags)    ((void)flags)
-#define balloon_unlock(flags)  ((void)flags)
-#endif
-
+
+//XXX xen/ia64 copy_from_guest() is broken.
+//    This is a temporal work around until it is fixed.
+//    used by balloon.c netfront.c
+
+// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
+// if the definition in arch-ia64.h is changed, this must be updated.
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
+
+int
+ia64_xenmem_reservation_op(unsigned long op,
+                          struct xen_memory_reservation* reservation__)
+{
+       struct xen_memory_reservation reservation = *reservation__;
+       unsigned long* frame_list;
+       unsigned long nr_extents = reservation__->nr_extents;
+       int ret = 0;
+       get_xen_guest_handle(frame_list, reservation__->extent_start);
+
+       BUG_ON(op != XENMEM_increase_reservation &&
+              op != XENMEM_decrease_reservation &&
+              op != XENMEM_populate_physmap);
+
+       while (nr_extents > 0) {
+               int tmp_ret;
+               volatile unsigned long dummy;
+
+               set_xen_guest_handle(reservation.extent_start, frame_list);
+               reservation.nr_extents = nr_extents;
+
+               dummy = frame_list[0];// re-install tlb entry before hypercall
+               tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
+               if (tmp_ret < 0) {
+                       if (ret == 0) {
+                               ret = tmp_ret;
+                       }
+                       break;
+               }
+               frame_list += tmp_ret;
+               nr_extents -= tmp_ret;
+               ret += tmp_ret;
+       }
+       return ret;
+}
 
 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
 // move those to lib/contiguous_bitmap?
@@ -105,6 +143,39 @@ static void contiguous_bitmap_clear(
        }
 }
 
+static unsigned long
+HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
+                           unsigned int address_bits)
+{
+       unsigned long ret;
+        struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+                .address_bits = address_bits,
+                .extent_order = extent_order,
+                .domid        = DOMID_SELF
+        };
+       set_xen_guest_handle(reservation.extent_start, &gpfn);
+       ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
+
+static unsigned long
+HYPERVISOR_remove_physmap(unsigned long gpfn, unsigned int extent_order)
+{
+       unsigned long ret;
+       struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+               .address_bits = 0,
+               .extent_order = extent_order,
+               .domid        = DOMID_SELF
+       };
+       set_xen_guest_handle(reservation.extent_start, &gpfn);
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
+
 /* Ensure multi-page extents are contiguous in machine memory. */
 int
 __xen_create_contiguous_region(unsigned long vstart,
@@ -113,29 +184,29 @@ __xen_create_contiguous_region(unsigned 
        unsigned long error = 0;
        unsigned long gphys = __pa(vstart);
        unsigned long start_gpfn = gphys >> PAGE_SHIFT;
-       unsigned long num_pfn = 1 << order;
+       unsigned long num_gpfn = 1 << order;
        unsigned long i;
        unsigned long flags;
 
-       scrub_pages(vstart, 1 << order);
+       scrub_pages(vstart, num_gpfn);
 
        balloon_lock(flags);
 
-       //XXX order
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
-               if (error) {
-                       goto out;
-               }
+       error = HYPERVISOR_remove_physmap(start_gpfn, order);
+       if (error) {
+               goto fail;
        }
 
        error = HYPERVISOR_populate_physmap(start_gpfn, order, address_bits);
-       contiguous_bitmap_set(start_gpfn, 1UL << order);
+       if (error) {
+               goto fail;
+       }
+       contiguous_bitmap_set(start_gpfn, num_gpfn);
 #if 0
        {
        unsigned long mfn;
        unsigned long mfn_prev = ~0UL;
-       for (i = 0; i < 1 << order; i++) {
+       for (i = 0; i < num_gpfn; i++) {
                mfn = pfn_to_mfn_for_dma(start_gpfn + i);
                if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
                        xprintk("\n");
@@ -145,7 +216,7 @@ __xen_create_contiguous_region(unsigned 
                                vstart, virt_to_bus((void*)vstart),
                                phys_to_machine_for_dma(gphys));
                        xprintk("mfn: ");
-                       for (i = 0; i < 1 << order; i++) {
+                       for (i = 0; i < num_gpfn; i++) {
                                mfn = pfn_to_mfn_for_dma(start_gpfn + i);
                                xprintk("0x%lx ", mfn);
                        }
@@ -159,76 +230,405 @@ out:
 out:
        balloon_unlock(flags);
        return error;
+
+fail:
+       for (i = 0; i < num_gpfn; i++) {
+               error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+               if (error) {
+                       BUG();//XXX
+               }
+       }
+       goto out;
 }
 
 void
 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 {
+       unsigned long flags;
        unsigned long error = 0;
-       unsigned long gphys = __pa(vstart);
-       unsigned long start_gpfn = gphys >> PAGE_SHIFT;
-       unsigned long num_pfn = 1 << order;
+       unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
+       unsigned long num_gpfn = 1UL << order;
+       unsigned long* gpfns;
+       struct xen_memory_reservation reservation;
        unsigned long i;
-       unsigned long flags;
-
-       scrub_pages(vstart, 1 << order);
+
+       gpfns = kmalloc(sizeof(gpfns[0]) * num_gpfn,
+                       GFP_KERNEL | __GFP_NOFAIL);
+       for (i = 0; i < num_gpfn; i++) {
+               gpfns[i] = start_gpfn + i;
+       }
+
+       scrub_pages(vstart, num_gpfn);
 
        balloon_lock(flags);
 
-       contiguous_bitmap_clear(start_gpfn, 1UL << order);
-
-       //XXX order
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
-               if (error) {
-                       goto out;
-               }
-       }
-
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
-               if (error) {
-                       goto out;
-               }
-       }
-
+       contiguous_bitmap_clear(start_gpfn, num_gpfn);
+       error = HYPERVISOR_remove_physmap(start_gpfn, order);
+       if (error) {
+               goto fail;
+       }
+
+       set_xen_guest_handle(reservation.extent_start, gpfns);
+       reservation.nr_extents   = num_gpfn;
+       reservation.address_bits = 0;
+       reservation.extent_order = 0;
+       reservation.domid        = DOMID_SELF;
+       error = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+       if (error != num_gpfn) {
+               error = -EFAULT;//XXX
+               goto fail;
+       }
+       error = 0;
 out:
        balloon_unlock(flags);
+       kfree(gpfns);
        if (error) {
-               //XXX
-       }
+               // error can't be returned.
+               BUG();//XXX
+       }
+       return;
+
+fail:
+       for (i = 0; i < num_gpfn; i++) {
+               int tmp_error;// don't overwrite error.
+               tmp_error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+               if (tmp_error) {
+                       BUG();//XXX
+               }
+       }
+       goto out;
 }
 
 
 ///////////////////////////////////////////////////////////////////////////
-//XXX taken from balloon.c
-//    temporal hack until balloon driver support.
-#include <linux/module.h>
-
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
-       unsigned long vstart;
-       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
-
-       vstart = __get_free_pages(GFP_KERNEL, order);
-       if (vstart == 0)
-               return NULL;
-
-       return virt_to_page(vstart);
-}
-
-void balloon_dealloc_empty_page_range(
-       struct page *page, unsigned long nr_pages)
-{
-       __free_pages(page, get_order(nr_pages * PAGE_SIZE));
-}
-
-void balloon_update_driver_allowance(long delta)
-{
-}
-
-EXPORT_SYMBOL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
-EXPORT_SYMBOL(balloon_update_driver_allowance);
-
-
+// grant table hack
+// cmd: GNTTABOP_xxx
+
+#include <linux/mm.h>
+#include <xen/interface/xen.h>
+#include <xen/gnttab.h>
+
+static void
+gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
+{
+       uint32_t flags;
+
+       flags = uop->flags;
+       if (flags & GNTMAP_readonly) {
+#if 0
+               xprintd("GNTMAP_readonly is not supported yet\n");
+#endif
+               flags &= ~GNTMAP_readonly;
+       }
+
+       if (flags & GNTMAP_host_map) {
+               if (flags & GNTMAP_application_map) {
+                       xprintd("GNTMAP_application_map is not supported yet: 
flags 0x%x\n", flags);
+                       BUG();
+               }
+               if (flags & GNTMAP_contains_pte) {
+                       xprintd("GNTMAP_contains_pte is not supported yet flags 
0x%x\n", flags);
+                       BUG();
+               }
+       } else if (flags & GNTMAP_device_map) {
+               xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
+               BUG();//XXX not yet. actually this flag is not used.
+       } else {
+               BUG();
+       }
+}
+
+int
+HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
+{
+       if (cmd == GNTTABOP_map_grant_ref) {
+               unsigned int i;
+               for (i = 0; i < count; i++) {
+                       gnttab_map_grant_ref_pre(
+                               (struct gnttab_map_grant_ref*)uop + i);
+               }
+       }
+
+       return ____HYPERVISOR_grant_table_op(cmd, uop, count);
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// PageForeign(), SetPageForeign(), ClearPageForeign()
+
+struct address_space xen_ia64_foreign_dummy_mapping;
+
+///////////////////////////////////////////////////////////////////////////
+// foreign mapping
+
+struct xen_ia64_privcmd_entry {
+       atomic_t        map_count;
+       struct page*    page;
+};
+
+static void
+xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
+{
+       atomic_set(&entry->map_count, 0);
+       entry->page = NULL;
+}
+
+//TODO alloc_page() to allocate pseudo physical address space is 
+//     waste of memory.
+//     When vti domain is created, qemu maps all of vti domain pages which 
+//     reaches to several hundred megabytes at least.
+//     remove alloc_page().
+static int
+xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
+                           unsigned long addr,
+                           struct xen_ia64_privcmd_entry* entry,
+                           unsigned long mfn,
+                           pgprot_t prot,
+                           domid_t domid)
+{
+       int error = 0;
+       struct page* page;
+       unsigned long gpfn;
+
+       BUG_ON((addr & ~PAGE_MASK) != 0);
+       BUG_ON(mfn == INVALID_MFN);
+
+       if (entry->page != NULL) {
+               error = -EBUSY;
+               goto out;
+       }
+       page = alloc_page(GFP_KERNEL);
+       if (page == NULL) {
+               error = -ENOMEM;
+               goto out;
+       }
+       gpfn = page_to_pfn(page);
+
+       error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
+                                      domid);
+       if (error != 0) {
+               goto out;
+       }
+
+       prot = vma->vm_page_prot;
+       error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
+       if (error != 0) {
+               (void)HYPERVISOR_zap_physmap(gpfn, 0);
+               error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+               if (error) {
+                       BUG();//XXX
+               }
+               __free_page(page);
+       } else {
+               atomic_inc(&entry->map_count);
+               entry->page = page;
+       }
+
+out:
+       return error;
+}
+
+static void
+xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry)
+{
+       struct page* page = entry->page;
+       unsigned long gpfn = page_to_pfn(page);
+       int error;
+
+       error = HYPERVISOR_zap_physmap(gpfn, 0);
+       if (error) {
+               BUG();//XXX
+       }
+
+       error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+       if (error) {
+               BUG();//XXX
+       }
+
+       entry->page = NULL;
+       __free_page(page);
+}
+
+static int
+xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry)
+{
+       if (entry->page != NULL) {
+               atomic_inc(&entry->map_count);
+       } else {
+               BUG_ON(atomic_read(&entry->map_count) != 0);
+       }
+}
+
+static int
+xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry)
+{
+       if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) {
+               xen_ia64_privcmd_entry_munmap(entry);
+       }
+}
+
+struct xen_ia64_privcmd_range {
+       atomic_t                        ref_count;
+       unsigned long                   pgoff; // in PAGE_SIZE
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry   entries[0];
+};
+
+struct xen_ia64_privcmd_vma {
+       struct xen_ia64_privcmd_range*  range;
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry*  entries;
+};
+
+static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
+static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
+
+struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
+       .open = &xen_ia64_privcmd_vma_open,
+       .close = &xen_ia64_privcmd_vma_close,
+};
+
+static void
+__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
+                           struct xen_ia64_privcmd_vma* privcmd_vma,
+                           struct xen_ia64_privcmd_range* privcmd_range)
+{
+       unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
+       unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long i;
+
+       BUG_ON(entry_offset < 0);
+       BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
+
+       privcmd_vma->range = privcmd_range;
+       privcmd_vma->num_entries = num_entries;
+       privcmd_vma->entries = &privcmd_range->entries[entry_offset];
+       vma->vm_private_data = privcmd_vma;
+       for (i = 0; i < privcmd_vma->num_entries; i++) {
+               xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]);
+       }
+
+       vma->vm_private_data = privcmd_vma;
+       vma->vm_ops = &xen_ia64_privcmd_vm_ops;
+}
+
+static void
+xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma = (struct 
xen_ia64_privcmd_vma*)vma->vm_private_data;
+       struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+
+       atomic_inc(&privcmd_range->ref_count);
+       // vm_op->open() can't fail.
+       privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
+
+       __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+}
+
+static void
+xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma =
+               (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+       struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+       unsigned long i;
+
+       for (i = 0; i < privcmd_vma->num_entries; i++) {
+               xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]);
+       }
+       vma->vm_private_data = NULL;
+       kfree(privcmd_vma);
+
+       if (atomic_dec_and_test(&privcmd_range->ref_count)) {
+#if 1
+               for (i = 0; i < privcmd_range->num_entries; i++) {
+                       struct xen_ia64_privcmd_entry* entry =
+                               &privcmd_range->entries[i];
+                       BUG_ON(atomic_read(&entry->map_count) != 0);
+                       BUG_ON(entry->page != NULL);
+               }
+#endif
+               vfree(privcmd_range);
+       }
+}
+
+int
+privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+{
+       unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       struct xen_ia64_privcmd_range* privcmd_range;
+       struct xen_ia64_privcmd_vma* privcmd_vma;
+       unsigned long i;
+       BUG_ON(!running_on_xen);
+
+       BUG_ON(file->private_data != NULL);
+       privcmd_range =
+               vmalloc(sizeof(*privcmd_range) +
+                       sizeof(privcmd_range->entries[0]) * num_entries);
+       if (privcmd_range == NULL) {
+               goto out_enomem0;
+       }
+       privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
+       if (privcmd_vma == NULL) {
+               goto out_enomem1;
+       }
+
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+
+       atomic_set(&privcmd_range->ref_count, 1);
+       privcmd_range->pgoff = vma->vm_pgoff;
+       privcmd_range->num_entries = num_entries;
+       for (i = 0; i < privcmd_range->num_entries; i++) {
+               xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
+       }
+
+       __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+       return 0;
+
+out_enomem1:
+       kfree(privcmd_vma);
+out_enomem0:
+       vfree(privcmd_range);
+       return -ENOMEM;
+}
+
+int
+direct_remap_pfn_range(struct vm_area_struct *vma,
+                      unsigned long address,   // process virtual address
+                      unsigned long mfn,       // mfn, mfn + 1, ... mfn + 
size/PAGE_SIZE
+                      unsigned long size,
+                      pgprot_t prot,
+                      domid_t  domid)          // target domain
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma =
+               (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+       unsigned long i;
+       unsigned long offset;
+       int error = 0;
+       BUG_ON(!running_on_xen);
+
+#if 0
+       if (prot != vm->vm_page_prot) {
+               return -EINVAL;
+       }
+#endif
+
+       i = (address - vma->vm_start) >> PAGE_SHIFT;
+       for (offset = 0; offset < size; offset += PAGE_SIZE) {
+               struct xen_ia64_privcmd_entry* entry =
+                       &privcmd_vma->entries[i];
+               error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & 
PAGE_MASK, entry, mfn, prot, domid);
+               if (error != 0) {
+                       break;
+               }
+
+               i++;
+               mfn++;
+        }
+
+       return error;
+}
+
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Tue May 30 14:30:34 
2006 -0500
@@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to)
        mov r8=1
        ;;
        st4 [r27]=r8                    // psr.ic back on
-       ;;
-#else
-(p6)   ssm psr.ic                      // if we had to map, reenable the 
psr.ic bit FIRST!!!
-       ;;
-(p6)   srlz.d
+#else
        ld8 sp=[r21]                    // load kernel stack pointer of new task
        mov IA64_KR(CURRENT)=in0        // update "current" application register
 #endif
@@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to)
 #endif
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
+#ifndef CONFIG_XEN
+       ssm psr.ic                      // reenable the psr.ic bit
+       ;;
+       srlz.d
+#endif
        br.cond.sptk .done
 #ifdef CONFIG_XEN
 END(xen_switch_to)
@@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall)
 .mem.offset 0,0; st8.spill [r2]=r8             // store return value in slot 
for r8
 .mem.offset 8,0; st8.spill [r3]=r10            // clear error indication in 
slot for r10
        br.call.sptk.many rp=syscall_trace_leave // give parent a chance to 
catch return value
-.ret3: br.cond.sptk .work_pending_syscall_end
+.ret3:
+(pUStk)        cmp.eq.unc p6,p0=r0,r0                  // p6 <- pUStk
+       br.cond.sptk .work_pending_syscall_end
 
 strace_error:
        ld8 r3=[r2]                             // load pt_regs.r8
@@ -246,7 +249,7 @@ END(ia64_trace_syscall)
  *           r8-r11: restored (syscall return value(s))
  *              r12: restored (user-level stack pointer)
  *              r13: restored (user-level thread pointer)
- *              r14: cleared
+ *              r14: set to __kernel_syscall_via_epc
  *              r15: restored (syscall #)
  *          r16-r17: cleared
  *              r18: user-level b6
@@ -267,7 +270,7 @@ END(ia64_trace_syscall)
  *               pr: restored (user-level pr)
  *               b0: restored (user-level rp)
  *               b6: restored
- *               b7: cleared
+ *               b7: set to __kernel_syscall_via_epc
  *          ar.unat: restored (user-level ar.unat)
  *           ar.pfs: restored (user-level ar.pfs)
  *           ar.rsc: restored (user-level ar.rsc)
@@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall)
        ;;
 (p6)   ld4 r31=[r18]                           // load 
current_thread_info()->flags
        ld8 r19=[r2],PT(B6)-PT(LOADRS)          // load ar.rsc value for 
"loadrs"
-       mov b7=r0               // clear b7
-       ;;
-       ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)    // load ar.bspstore (may be 
garbage)
+       nop.i 0
+       ;;
+       mov r16=ar.bsp                          // M2  get existing backing 
store pointer
        ld8 r18=[r2],PT(R9)-PT(B6)              // load b6
 (p6)   and r15=TIF_WORK_MASK,r31               // any work other than 
TIF_SYSCALL_TRACE?
        ;;
-       mov r16=ar.bsp                          // M2  get existing backing 
store pointer
+       ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)    // load ar.bspstore (may be 
garbage)
 (p6)   cmp4.ne.unc p6,p0=r15, r0               // any special work pending?
 (p6)   br.cond.spnt .work_pending_syscall
        ;;
        // start restoring the state saved on the kernel stack (struct pt_regs):
        ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
        ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-       mov f6=f0               // clear f6
+(pNonSys) break 0              //      bug check: we shouldn't be here if 
pNonSys is TRUE!
        ;;
        invala                  // M0|1 invalidate ALAT
 #ifdef CONFIG_XEN
@@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall)
        st4     [r29]=r0        // note: clears both vpsr.i and vpsr.ic!
        ;;
 #else
-       rsm psr.i | psr.ic      // M2 initiate turning off of interrupt and 
interruption collection
-#endif
-       mov f9=f0               // clear f9
-
-       ld8 r29=[r2],16         // load cr.ipsr
-       ld8 r28=[r3],16                 // load cr.iip
-       mov f8=f0               // clear f8
+       rsm psr.i | psr.ic      // M2   turn off interrupts and interruption 
collection
+#endif
+       cmp.eq p9,p0=r0,r0      // A    set p9 to indicate that we should 
restore cr.ifs
+
+       ld8 r29=[r2],16         // M0|1 load cr.ipsr
+       ld8 r28=[r3],16         // M0|1 load cr.iip
+       mov r22=r0              // A    clear r22
        ;;
        ld8 r30=[r2],16         // M0|1 load cr.ifs
-       mov.m ar.ssd=r0         // M2 clear ar.ssd
-       cmp.eq p9,p0=r0,r0      // set p9 to indicate that we should restore 
cr.ifs
-       ;;
        ld8 r25=[r3],16         // M0|1 load ar.unat
-       mov.m ar.csd=r0         // M2 clear ar.csd
-       mov r22=r0              // clear r22
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
        ;;
        ld8 r26=[r2],PT(B0)-PT(AR_PFS)  // M0|1 load ar.pfs
-(pKStk)        mov r22=psr             // M2 read PSR now that interrupts are 
disabled
-       mov f10=f0              // clear f10
-       ;;
-       ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
-       ld8 r27=[r3],PT(PR)-PT(AR_RSC)  // load ar.rsc
-       mov f11=f0              // clear f11
-       ;;
-       ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)    // load ar.rnat (may be garbage)
-       ld8 r31=[r3],PT(R1)-PT(PR)              // load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-       ;;
-       ld8 r20=[r2],PT(R12)-PT(AR_FPSR)        // load ar.fpsr
-       ld8.fill r1=[r3],16     // load r1
-(pUStk) mov r17=1
-       ;;
-       srlz.d                  // M0  ensure interruption collection is off
-       ld8.fill r13=[r3],16
-       mov f7=f0               // clear f7
-       ;;
-       ld8.fill r12=[r2]       // restore r12 (sp)
-       ld8.fill r15=[r3]       // restore r15
-       addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
-       ;;
-(pUStk)        ld4 r3=[r3]             // r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
-       mov b6=r18              // I0  restore b6
-       ;;
-       mov r14=r0              // clear r14
-       shr.u r18=r19,16        // I0|1 get byte size of existing "dirty" 
partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
-
-       mov.m ar.ccv=r0         // clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
-       br.cond.sptk.many rbs_switch
+(pKStk)        mov r22=psr                     // M2   read PSR now that 
interrupts are disabled
+       nop 0
+       ;;
+       ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+       ld8 r27=[r3],PT(PR)-PT(AR_RSC)  // M0|1 load ar.rsc
+       mov f6=f0                       // F    clear f6
+       ;;
+       ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)    // M0|1 load ar.rnat (may be 
garbage)
+       ld8 r31=[r3],PT(R1)-PT(PR)              // M0|1 load predicates
+       mov f7=f0                               // F    clear f7
+       ;;
+       ld8 r20=[r2],PT(R12)-PT(AR_FPSR)        // M0|1 load ar.fpsr
+       ld8.fill r1=[r3],16                     // M0|1 load r1
+(pUStk) mov r17=1                              // A
+       ;;
+(pUStk) st1 [r14]=r17                          // M2|3
+       ld8.fill r13=[r3],16                    // M0|1
+       mov f8=f0                               // F    clear f8
+       ;;
+       ld8.fill r12=[r2]                       // M0|1 restore r12 (sp)
+       ld8.fill r15=[r3]                       // M0|1 restore r15
+       mov b6=r18                              // I0   restore b6
+
+       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+       mov f9=f0                                       // F    clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch              // B
+
+       srlz.d                          // M0   ensure interruption collection 
is off (for cover)
+       shr.u r18=r19,16                // I0|1 get byte size of existing 
"dirty" partition
+#ifdef CONFIG_XEN
+       XEN_HYPER_COVER;
+#else
+       cover                           // B    add current frame into dirty 
partition & set cr.ifs
+#endif
+       ;;
+(pUStk) ld4 r17=[r17]                  // M0|1 r17 = 
cpu_data->phys_stacked_size_p8
+       mov r19=ar.bsp                  // M2   get new backing store pointer
+       mov f10=f0                      // F    clear f10
+
+       nop.m 0
+       movl r14=__kernel_syscall_via_epc // X
+       ;;
+       mov.m ar.csd=r0                 // M2   clear ar.csd
+       mov.m ar.ccv=r0                 // M2   clear ar.ccv
+       mov b7=r14                      // I0   clear b7 (hint with 
__kernel_syscall_via_epc)
+
+       mov.m ar.ssd=r0                 // M2   clear ar.ssd
+       mov f11=f0                      // F    clear f11
+       br.cond.sptk.many rbs_switch    // B
 #ifdef CONFIG_XEN
 END(xen_leave_syscall)
 #else
@@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        ldf.fill f7=[r2],PT(F11)-PT(F7)
        ldf.fill f8=[r3],32
        ;;
-       srlz.i                  // ensure interruption collection is off
+       srlz.d  // ensure that inter. collection is off (VHPT is don't care, 
since text is pinned)
        mov ar.ccv=r15
        ;;
        ldf.fill f11=[r2]
@@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        movl r2=XSI_BANK1_R16
        movl r3=XSI_BANK1_R16+8
        ;;
-       st8.spill [r2]=r16,16
-       st8.spill [r3]=r17,16
-       ;;
-       st8.spill [r2]=r18,16
-       st8.spill [r3]=r19,16
-       ;;
-       st8.spill [r2]=r20,16
-       st8.spill [r3]=r21,16
-       ;;
-       st8.spill [r2]=r22,16
-       st8.spill [r3]=r23,16
-       ;;
-       st8.spill [r2]=r24,16
-       st8.spill [r3]=r25,16
-       ;;
-       st8.spill [r2]=r26,16
-       st8.spill [r3]=r27,16
-       ;;
-       st8.spill [r2]=r28,16
-       st8.spill [r3]=r29,16
-       ;;
-       st8.spill [r2]=r30,16
-       st8.spill [r3]=r31,16
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,16
        ;;
        movl r2=XSI_BANKNUM;;
        st4 [r2]=r0;
@@ -641,14 +655,14 @@ GLOBAL_ENTRY(ia64_leave_kernel)
         */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
 
+#ifdef CONFIG_XEN
+       XEN_HYPER_COVER;
+#else
+       cover                           // add current frame into dirty 
partition and set cr.ifs
+#endif
+       ;;
+       mov r19=ar.bsp                  // get new backing store pointer
 rbs_switch:
-#ifdef CONFIG_XEN
-       XEN_HYPER_COVER;
-#else
-       cover                           // add current frame into dirty 
partition and set cr.ifs
-#endif
-       ;;
-       mov r19=ar.bsp                  // get new backing store pointer
        sub r16=r16,r18                 // krbs = old bsp - size of dirty 
partition
        cmp.ne p9,p0=r0,r0              // clear p9 to skip restore of cr.ifs
        ;;
@@ -723,14 +737,14 @@ rse_clear_invalid:
        mov loc5=0
        mov loc6=0
        mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
        ;;
        mov loc8=0
        mov loc9=0
        cmp.ne pReturn,p0=r0,in1        // if recursion count != 0, we need to 
do a br.ret
        mov loc10=0
        mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
 #endif /* !CONFIG_ITANIUM */
 #      undef pRecurse
 #      undef pReturn
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Tue May 30 14:30:34 
2006 -0500
@@ -87,16 +87,17 @@ ENTRY(vhpt_miss)
         * (the "original") TLB miss, which may either be caused by an 
instruction
         * fetch or a data access (or non-access).
         *
-        * What we do here is normal TLB miss handing for the _original_ miss, 
followed
-        * by inserting the TLB entry for the virtual page table page that the 
VHPT
-        * walker was attempting to access.  The latter gets inserted as long
-        * as both L1 and L2 have valid mappings for the faulting address.
-        * The TLB entry for the original miss gets inserted only if
-        * the L3 entry indicates that the page is present.
+        * What we do here is normal TLB miss handing for the _original_ miss,
+        * followed by inserting the TLB entry for the virtual page table page
+        * that the VHPT walker was attempting to access.  The latter gets
+        * inserted as long as page table entry above pte level have valid
+        * mappings for the faulting address.  The TLB entry for the original
+        * miss gets inserted only if the pte entry indicates that the page is
+        * present.
         *
         * do_page_fault gets invoked in the following cases:
         *      - the faulting virtual address uses unimplemented address bits
-        *      - the faulting virtual address has no L1, L2, or L3 mapping
+        *      - the faulting virtual address has no valid page table mapping
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -127,7 +128,7 @@ ENTRY(vhpt_miss)
        shl r21=r16,3                           // shift bit 60 into sign bit
        shr.u r17=r16,61                        // get the region number into 
r17
        ;;
-       shr r22=r21,3
+       shr.u r22=r21,3
 #ifdef CONFIG_HUGETLB_PAGE
        extr.u r26=r25,2,6
        ;;
@@ -139,7 +140,7 @@ ENTRY(vhpt_miss)
 #endif
        ;;
        cmp.eq p6,p7=5,r17                      // is IFA pointing into to 
region 5?
-       shr.u r18=r22,PGDIR_SHIFT               // get bits 33-63 of the 
faulting address
+       shr.u r18=r22,PGDIR_SHIFT               // get bottom portion of pgd 
index bit
        ;;
 (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
 
@@ -150,41 +151,54 @@ ENTRY(vhpt_miss)
 (p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
 (p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
        ;;
-(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
-(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=pgd_offset for region 5
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=pgd_offset for 
region[0-4]
        cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
-       shr.u r18=r22,PMD_SHIFT                 // shift L2 index into position
-       ;;
-       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
-       ;;
-(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
-       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
-       ;;
-(p7)   ld8 r20=[r17]                           // fetch the L2 entry (may be 0)
-       shr.u r19=r22,PAGE_SHIFT                // shift L3 index into position
-       ;;
-(p7)   cmp.eq.or.andcm p6,p7=r20,r0            // was L2 entry NULL?
-       dep r21=r19,r20,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
-       ;;
-#ifdef CONFIG_XEN
-(p7)   ld8 r18=[r21]                           // read the L3 PTE
+#ifdef CONFIG_PGTABLE_4
+       shr.u r28=r22,PUD_SHIFT                 // shift pud index into position
+#else
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+#endif
+       ;;
+       ld8 r17=[r17]                           // get *pgd (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was pgd_present(*pgd) == 
NULL?
+#ifdef CONFIG_PGTABLE_4
+       dep r28=r28,r17,3,(PAGE_SHIFT-3)        // r28=pud_offset(pgd,addr)
+       ;;
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+(p7)   ld8 r29=[r28]                           // get *pud (may be 0)
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r29,r0            // was pud_present(*pud) == 
NULL?
+       dep r17=r18,r29,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pud,addr)
+#else
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pgd,addr)
+#endif
+       ;;
+(p7)   ld8 r20=[r17]                           // get *pmd (may be 0)
+       shr.u r19=r22,PAGE_SHIFT                // shift pte index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r20,r0            // was pmd_present(*pmd) == 
NULL?
+       dep r21=r19,r20,3,(PAGE_SHIFT-3)        // r21=pte_offset(pmd,addr)
+       ;;
+(p7)   ld8 r18=[r21]                           // read *pte
+#ifdef CONFIG_XEN
        movl r19=XSI_ISR
        ;;
        ld8 r19=[r19]
+#else
+       mov r19=cr.isr                          // cr.isr bit 32 tells us if 
this is an insn miss
+#endif
        ;;
 (p7)   tbit.z p6,p7=r18,_PAGE_P_BIT            // page present bit cleared?
+#ifdef CONFIG_XEN
        movl r22=XSI_IHA
        ;;
        ld8 r22=[r22]
-       ;;
-#else
-(p7)   ld8 r18=[r21]                           // read the L3 PTE
-       mov r19=cr.isr                          // cr.isr bit 0 tells us if 
this is an insn miss
-       ;;
-(p7)   tbit.z p6,p7=r18,_PAGE_P_BIT            // page present bit cleared?
+#else
        mov r22=cr.iha                          // get the VHPT address that 
caused the TLB miss
+#endif
        ;;                                      // avoid RAW on p7
-#endif
 (p7)   tbit.nz.unc p10,p11=r19,32              // is it an instruction TLB 
miss?
        dep r23=0,r20,0,PAGE_SHIFT              // clear low bits to get page 
address
        ;;
@@ -198,16 +212,17 @@ ENTRY(vhpt_miss)
        ;;
        mov r8=r24
        ;;
-(p6)   br.cond.spnt.many page_fault            // handle bad address/page not 
present (page fault)
-       ;;
-       movl r24=XSI_IFA
-       ;;
-       st8 [r24]=r22
-       ;;
 #else
 (p10)  itc.i r18                               // insert the instruction TLB 
entry
 (p11)  itc.d r18                               // insert the data TLB entry
+#endif
 (p6)   br.cond.spnt.many page_fault            // handle bad address/page not 
present (page fault)
+#ifdef CONFIG_XEN
+       movl r24=XSI_IFA
+       ;;
+       st8 [r24]=r22
+       ;;
+#else
        mov cr.ifa=r22
 #endif
 
@@ -242,25 +257,41 @@ ENTRY(vhpt_miss)
        dv_serialize_data
 
        /*
-        * Re-check L2 and L3 pagetable.  If they changed, we may have received 
a ptc.g
+        * Re-check pagetable entry.  If they changed, we may have received a 
ptc.g
         * between reading the pagetable and the "itc".  If so, flush the entry 
we
-        * inserted and retry.
-        */
-       ld8 r25=[r21]                           // read L3 PTE again
-       ld8 r26=[r17]                           // read L2 entry again
-       ;;
-       cmp.ne p6,p7=r26,r20                    // did L2 entry change
+        * inserted and retry.  At this point, we have:
+        *
+        * r28 = equivalent of pud_offset(pgd, ifa)
+        * r17 = equivalent of pmd_offset(pud, ifa)
+        * r21 = equivalent of pte_offset(pmd, ifa)
+        *
+        * r29 = *pud
+        * r20 = *pmd
+        * r18 = *pte
+        */
+       ld8 r25=[r21]                           // read *pte again
+       ld8 r26=[r17]                           // read *pmd again
+#ifdef CONFIG_PGTABLE_4
+       ld8 r19=[r28]                           // read *pud again
+#endif
+       cmp.ne p6,p7=r0,r0
+       ;;
+       cmp.ne.or.andcm p6,p7=r26,r20           // did *pmd change
+#ifdef CONFIG_PGTABLE_4
+       cmp.ne.or.andcm p6,p7=r19,r29           // did *pud change
+#endif
        mov r27=PAGE_SHIFT<<2
        ;;
 (p6)   ptc.l r22,r27                           // purge PTE page translation
-(p7)   cmp.ne.or.andcm p6,p7=r25,r18           // did L3 PTE change
+(p7)   cmp.ne.or.andcm p6,p7=r25,r18           // did *pte change
        ;;
 (p6)   ptc.l r16,r27                           // purge translation
 #endif
 
        mov pr=r31,-1                           // restore predicate registers
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -272,10 +303,10 @@ ENTRY(itlb_miss)
 ENTRY(itlb_miss)
        DBG_FAULT(1)
        /*
-        * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+        * The ITLB handler accesses the PTE via the virtually mapped linear
         * page table.  If a nested TLB miss occurs, we switch into physical
-        * mode, walk the page table, and then re-execute the L3 PTE read
-        * and go on normally after that.
+        * mode, walk the page table, and then re-execute the PTE read and
+        * go on normally after that.
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -292,11 +323,11 @@ ENTRY(itlb_miss)
        ;;
        ld8 r17=[r17]                           // get virtual address of L3 PTE
 #else
-       mov r17=cr.iha                          // get virtual address of L3 PTE
+       mov r17=cr.iha                          // get virtual address of PTE
 #endif
        movl r30=1f                             // load nested fault 
continuation point
        ;;
-1:     ld8 r18=[r17]                           // read L3 PTE
+1:     ld8 r18=[r17]                           // read *pte
        ;;
        mov b0=r29
        tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
@@ -320,7 +351,7 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
         */
        dv_serialize_data
 
-       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       ld8 r19=[r17]                           // read *pte again and see if 
same
        mov r20=PAGE_SHIFT<<2                   // setup page size for purge
        ;;
        cmp.ne p7,p0=r18,r19
@@ -329,7 +360,8 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
 #endif
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -341,10 +373,10 @@ ENTRY(dtlb_miss)
 ENTRY(dtlb_miss)
        DBG_FAULT(2)
        /*
-        * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+        * The DTLB handler accesses the PTE via the virtually mapped linear
         * page table.  If a nested TLB miss occurs, we switch into physical
-        * mode, walk the page table, and then re-execute the L3 PTE read
-        * and go on normally after that.
+        * mode, walk the page table, and then re-execute the PTE read and
+        * go on normally after that.
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -361,11 +393,11 @@ dtlb_fault:
        ;;
        ld8 r17=[r17]                           // get virtual address of L3 PTE
 #else
-       mov r17=cr.iha                          // get virtual address of L3 PTE
+       mov r17=cr.iha                          // get virtual address of PTE
 #endif
        movl r30=1f                             // load nested fault 
continuation point
        ;;
-1:     ld8 r18=[r17]                           // read L3 PTE
+1:     ld8 r18=[r17]                           // read *pte
        ;;
        mov b0=r29
        tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
@@ -390,7 +422,7 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
         */
        dv_serialize_data
 
-       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       ld8 r19=[r17]                           // read *pte again and see if 
same
        mov r20=PAGE_SHIFT<<2                   // setup page size for purge
        ;;
        cmp.ne p7,p0=r18,r19
@@ -399,7 +431,8 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
 #endif
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss)
        ld8 r21=[r31],XSI_IFA-XSI_IPSR  // get ipsr, point to ifa
        movl r17=PAGE_KERNEL
        ;;
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       ;;
        ld8 r16=[r31]           // get ifa
-       mov r31=pr
-       ;;
 #else
        mov r16=cr.ifa          // get address that caused the TLB miss
        movl r17=PAGE_KERNEL
        mov r21=cr.ipsr
+#endif
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r31=pr
        ;;
-#endif
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into 
r21
        ;;
@@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss)
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        ;;
        ld8 r16=[r31]           // get ifa
-       mov r31=pr
-       ;;
 #else
        mov r16=cr.ifa          // get address that caused the TLB miss
        movl r17=PAGE_KERNEL
        mov r20=cr.isr
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r21=cr.ipsr
+#endif
        mov r31=pr
        ;;
-#endif
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into 
r21
        ;;
@@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss)
         *              r30:    continuation address
         *              r31:    saved pr
         *
-        * Output:      r17:    physical address of L3 PTE of faulting address
+        * Output:      r17:    physical address of PTE of faulting address
         *              r29:    saved b0
         *              r30:    continuation address
         *              r31:    saved pr
         *
-        * Clobbered:   b0, r18, r19, r21, psr.dt (cleared)
+        * Clobbered:   b0, r18, r19, r21, r22, psr.dt (cleared)
         */
 #ifdef CONFIG_XEN
        XEN_HYPER_RSM_PSR_DT;
@@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss)
 #endif
        mov r19=IA64_KR(PT_BASE)                // get the page table base 
address
        shl r21=r16,3                           // shift bit 60 into sign bit
+#ifdef CONFIG_XEN
+       movl r18=XSI_ITIR
+       ;;
+       ld8 r18=[r18]
+#else
+       mov r18=cr.itir
+#endif
        ;;
        shr.u r17=r16,61                        // get the region number into 
r17
+       extr.u r18=r18,2,6                      // get the faulting page size
        ;;
        cmp.eq p6,p7=5,r17                      // is faulting address in 
region 5?
-       shr.u r18=r16,PGDIR_SHIFT               // get bits 33-63 of faulting 
address
-       ;;
+       add r22=-PAGE_SHIFT,r18                 // adjustment for hugetlb 
address
+       add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
+       ;;
+       shr.u r22=r16,r22
+       shr.u r18=r16,r18
 (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
 
        srlz.d
@@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss)
 (p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
 (p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
        ;;
-(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
-(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=pgd_offset for region 5
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=pgd_offset for 
region[0-4]
        cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
-       shr.u r18=r16,PMD_SHIFT                 // shift L2 index into position
-       ;;
-       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
-       ;;
-(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
-       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
-       ;;
-(p7)   ld8 r17=[r17]                           // fetch the L2 entry (may be 0)
-       shr.u r19=r16,PAGE_SHIFT                // shift L3 index into position
-       ;;
-(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was L2 entry NULL?
-       dep r17=r19,r17,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
+#ifdef CONFIG_PGTABLE_4
+       shr.u r18=r22,PUD_SHIFT                 // shift pud index into position
+#else
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+#endif
+       ;;
+       ld8 r17=[r17]                           // get *pgd (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was pgd_present(*pgd) == 
NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=p[u|m]d_offset(pgd,addr)
+       ;;
+#ifdef CONFIG_PGTABLE_4
+(p7)   ld8 r17=[r17]                           // get *pud (may be 0)
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was pud_present(*pud) == 
NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pud,addr)
+       ;;
+#endif
+(p7)   ld8 r17=[r17]                           // get *pmd (may be 0)
+       shr.u r19=r22,PAGE_SHIFT                // shift pte index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was pmd_present(*pmd) == 
NULL?
+       dep r17=r19,r17,3,(PAGE_SHIFT-3)        // r17=pte_offset(pmd,addr);
 (p6)   br.cond.spnt page_fault
        mov b0=r30
        br.sptk.many b0                         // return to continuation point
@@ -626,7 +676,7 @@ END(ikey_miss)
        // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is 
faulting address)
 ENTRY(page_fault)
 #ifdef CONFIG_XEN
-       XEN_HYPER_SSM_PSR_DT;
+       XEN_HYPER_SSM_PSR_DT
 #else
        ssm psr.dt
        ;;
@@ -742,11 +792,12 @@ 1:        ld8 r18=[r17]
        ;;                                      // avoid RAW on r18
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_D|_PAGE_A,r18              // set the dirty and accessed 
bits
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only update if page is 
present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only compare if page is 
present
        ;;
 (p6)   itc.d r25                               // install updated PTE
        ;;
@@ -775,7 +826,8 @@ 1:  ld8 r18=[r17]
 #endif
        mov pr=r31,-1                           // restore pr
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -826,11 +878,12 @@ 1:        ld8 r18=[r17]
        ;;
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_A,r18                      // set the accessed bit
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only if page present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only if page present
        ;;
 #ifdef CONFIG_XEN
        mov r26=r8
@@ -869,7 +922,8 @@ 1:  ld8 r18=[r17]
 #endif /* !CONFIG_SMP */
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -892,11 +946,13 @@ ENTRY(daccess_bit)
        movl r30=1f                             // load continuation point in 
case of nested fault
        ;;
 #ifdef CONFIG_XEN
-       mov r18=r8;
-       mov r8=r16;
-       XEN_HYPER_THASH;;
-       mov r17=r8;
-       mov r8=r18;;
+       mov r18=r8
+       mov r8=r16
+       XEN_HYPER_THASH
+       ;;
+       mov r17=r8
+       mov r8=r18
+       ;;
 #else
        thash r17=r16                           // compute virtual address of 
L3 PTE
 #endif
@@ -909,11 +965,12 @@ 1:        ld8 r18=[r17]
        ;;                                      // avoid RAW on r18
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_A,r18                      // set the dirty bit
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only if page is present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only if page is present
        ;;
 #ifdef CONFIG_XEN
        mov r26=r8
@@ -950,7 +1007,8 @@ 1: ld8 r18=[r17]
        mov b0=r29                              // restore b0
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -976,143 +1034,157 @@ ENTRY(break_fault)
         * to prevent leaking bits from kernel to user level.
         */
        DBG_FAULT(11)
-       mov r16=IA64_KR(CURRENT)                // r16 = current task; 12 cycle 
read lat.
-#ifdef CONFIG_XEN
-       movl r31=XSI_IPSR
-       ;;
-       ld8 r29=[r31],XSI_IIP-XSI_IPSR          // get ipsr, point to iip
-       mov r18=__IA64_BREAK_SYSCALL
-       mov r21=ar.fpsr
-       ;;
-       ld8 r28=[r31],XSI_IIM-XSI_IIP           // get iip, point to iim
-       mov r19=b6
-       mov r25=ar.unat
-       ;;
-       ld8 r17=[r31]                           // get iim
-       mov r27=ar.rsc
-       mov r26=ar.pfs
-       ;;
-#else
-       mov r17=cr.iim
-       mov r18=__IA64_BREAK_SYSCALL
-       mov r21=ar.fpsr
-       mov r29=cr.ipsr
-       mov r19=b6
-       mov r25=ar.unat
-       mov r27=ar.rsc
-       mov r26=ar.pfs
-       mov r28=cr.iip
-#endif
-       mov r31=pr                              // prepare to save predicates
-       mov r20=r1
-       ;;
+       mov.m r16=IA64_KR(CURRENT)              // M2 r16 <- current task (12 
cyc)
+#ifdef CONFIG_XEN
+       movl r22=XSI_IPSR
+       ;;
+       ld8 r29=[r22],XSI_IIM-XSI_IPSR          // get ipsr, point to iip
+#else
+       mov r29=cr.ipsr                         // M2 (12 cyc)
+#endif
+       mov r31=pr                              // I0 (2 cyc)
+
+#ifdef CONFIG_XEN
+       ;;
+       ld8 r17=[r22],XSI_IIP-XSI_IIM
+#else
+       mov r17=cr.iim                          // M2 (2 cyc)
+#endif
+       mov.m r27=ar.rsc                        // M2 (12 cyc)
+       mov r18=__IA64_BREAK_SYSCALL            // A
+
+       mov.m ar.rsc=0                          // M2
+       mov.m r21=ar.fpsr                       // M2 (12 cyc)
+       mov r19=b6                              // I0 (2 cyc)
+       ;;
+       mov.m r23=ar.bspstore                   // M2 (12 cyc)
+       mov.m r24=ar.rnat                       // M2 (5 cyc)
+       mov.i r26=ar.pfs                        // I0 (2 cyc)
+
+       invala                                  // M0|1
+       nop.m 0                                 // M
+       mov r20=r1                              // A                    save r1
+
+       nop.m 0
+       movl r30=sys_call_table                 // X
+
+#ifdef CONFIG_XEN
+       ld8 r28=[r22]
+#else
+       mov r28=cr.iip                          // M2 (2 cyc)
+#endif
+       cmp.eq p0,p7=r18,r17                    // I0 is this a system call?
+(p7)   br.cond.spnt non_syscall                // B  no ->
+       //
+       // From this point on, we are definitely on the syscall-path
+       // and we can use (non-banked) scratch registers.
+       //
+///////////////////////////////////////////////////////////////////////
+       mov r1=r16                              // A    move task-pointer to 
"addl"-addressable reg
+       mov r2=r16                              // A    setup r2 for 
ia64_syscall_setup
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16      // A    r9 = 
&current_thread_info()->flags
+
        adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-       cmp.eq p0,p7=r18,r17                    // is this a system call? (p7 
<- false, if so)
-(p7)   br.cond.spnt non_syscall
-       ;;
-       ld1 r17=[r16]                           // load 
current->thread.on_ustack flag
-       st1 [r16]=r0                            // clear 
current->thread.on_ustack flag
-       add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16   // set r1 for 
MINSTATE_START_SAVE_MIN_VIRT
-       ;;
-       invala
-
-       /* adjust return address so we skip over the break instruction: */
-
-       extr.u r8=r29,41,2                      // extract ei field from cr.ipsr
-       ;;
-       cmp.eq p6,p7=2,r8                       // isr.ei==2?
-       mov r2=r1                               // setup r2 for 
ia64_syscall_setup
-       ;;
-(p6)   mov r8=0                                // clear ei to 0
-(p6)   adds r28=16,r28                         // switch cr.iip to next bundle 
cr.ipsr.ei wrapped
-(p7)   adds r8=1,r8                            // increment ei to next slot
-       ;;
-       cmp.eq pKStk,pUStk=r0,r17               // are we in kernel mode 
already?
-       dep r29=r8,r29,41,2                     // insert new ei into cr.ipsr
-       ;;
-
-       // switch from user to kernel RBS:
-       MINSTATE_START_SAVE_MIN_VIRT
-       br.call.sptk.many b7=ia64_syscall_setup
-       ;;
+       adds r15=-1024,r15                      // A    subtract 1024 from 
syscall number
+       mov r3=NR_syscalls - 1
+       ;;
+       ld1.bias r17=[r16]                      // M0|1 r17 = 
current->thread.on_ustack flag
+       ld4 r9=[r9]                             // M0|1 r9 = 
current_thread_info()->flags
+       extr.u r8=r29,41,2                      // I0   extract ei field from 
cr.ipsr
+
+       shladd r30=r15,3,r30                    // A    r30 = sys_call_table + 
8*(syscall-1024)
+       addl r22=IA64_RBS_OFFSET,r1             // A    compute base of RBS
+       cmp.leu p6,p7=r15,r3                    // A    syscall number in range?
+       ;;
+
+       lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch RBS
+(p6)   ld8 r30=[r30]                           // M0|1 load address of syscall 
entry point
+       tnat.nz.or p7,p0=r15                    // I0   is syscall nr a NaT?
+
+       mov.m ar.bspstore=r22                   // M2   switch to kernel RBS
+       cmp.eq p8,p9=2,r8                       // A    isr.ei==2?
+       ;;
+
+(p8)   mov r8=0                                // A    clear ei to 0
+(p7)   movl r30=sys_ni_syscall                 // X
+
+(p8)   adds r28=16,r28                         // A    switch cr.iip to next 
bundle
+(p9)   adds r8=1,r8                            // A    increment ei to next 
slot
+       nop.i 0
+       ;;
+
+       mov.m r25=ar.unat                       // M2 (5 cyc)
+       dep r29=r8,r29,41,2                     // I0   insert new ei into 
cr.ipsr
+       adds r15=1024,r15                       // A    restore original 
syscall number
+       //
+       // If any of the above loads miss in L1D, we'll stall here until
+       // the data arrives.
+       //
+///////////////////////////////////////////////////////////////////////
+       st1 [r16]=r0                            // M2|3 clear 
current->thread.on_ustack flag
+       mov b6=r30                              // I0   setup syscall handler 
branch reg early
+       cmp.eq pKStk,pUStk=r0,r17               // A    were we on kernel 
stacks already?
+
+       and r9=_TIF_SYSCALL_TRACEAUDIT,r9       // A    mask trace or audit
+       mov r18=ar.bsp                          // M2 (12 cyc)
+(pKStk)        br.cond.spnt .break_fixup               // B    we're already 
in kernel-mode -- fix up RBS
+       ;;
+.back_from_break_fixup:
+(pUStk)        addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute 
base of memory stack
+       cmp.eq p14,p0=r9,r0                     // A    are syscalls being 
traced/audited?
+       br.call.sptk.many b7=ia64_syscall_setup // B
+1:
+       mov ar.rsc=0x3                          // M2   set eager mode, pl 0, 
LE, loadrs=0
+       nop 0
 #ifdef CONFIG_XEN
        mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
 #else
-       MINSTATE_END_SAVE_MIN_VIRT              // switch to bank 1
-#endif
-#ifdef CONFIG_XEN
-       movl r3=XSI_PSR_IC
-       mov r16=1
-       ;;
-#if 1
-       st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR     // if (p15) vpsr.i = 1
-       mov r16=r0
-       ;;
-(p15)  ld4 r16=[r3]                            // if (pending_interrupts)
-       ;;
-       cmp.ne  p6,p0=r16,r0
+       bsw.1                                   // B (6 cyc) regs are saved, 
switch to bank 1
+#endif
+       ;;
+
+#ifdef CONFIG_XEN
+       movl r16=XSI_PSR_IC
+       mov r3=1
+       ;;
+       st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
+#else
+       ssm psr.ic | PSR_DEFAULT_BITS           // M2   now it's safe to 
re-enable intr.-collection
+#endif
+       movl r3=ia64_ret_from_syscall           // X
+       ;;
+
+       srlz.i                                  // M0   ensure interruption 
collection is on
+       mov rp=r3                               // I0   set the real return addr
+(p10)  br.cond.spnt.many ia64_ret_from_syscall // B    return if bad 
call-frame or r15 is a NaT
+
+#ifdef CONFIG_XEN
+(p15)  ld8 r16=[r16]                           // vpsr.i
+       ;;
+(p15)  st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR    // if (p15) vpsr.i = 1
+       mov r2=r0
+       ;;
+(p15)  ld4 r2=[r16]                            // if (pending_interrupts)
+       ;;
+       cmp.ne  p6,p0=r2,r0
        ;;
 (p6)   ssm     psr.i                           //   do a real ssm psr.i
-       ;;
-#else
-//     st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
-       adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3    // SKIP vpsr.ic = 1
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR     // if (p15) vpsr.i = 1
-       mov r16=r0
-       ;;
-(p15)  ld4 r16=[r3]                            // if (pending_interrupts)
-       ;;
-       cmp.ne  p6,p0=r16,r0
-       ;;
-//(p6) ssm     psr.i                           //   do a real ssm psr.i
-//(p6) XEN_HYPER_SSM_I;
-(p6)   break 0x7;
-       ;;
-#endif
-       mov r3=NR_syscalls - 1
-       ;;
-#else
-       ssm psr.ic | PSR_DEFAULT_BITS
-       ;;
-       srlz.i                                  // guarantee that interruption 
collection is on
-       mov r3=NR_syscalls - 1
-       ;;
-(p15)  ssm psr.i                               // restore psr.i
-#endif
-       // p10==true means out registers are more than 8 or r15's Nat is true
-(p10)  br.cond.spnt.many ia64_ret_from_syscall
-       ;;
-       movl r16=sys_call_table
-
-       adds r15=-1024,r15                      // r15 contains the syscall 
number---subtract 1024
-       movl r2=ia64_ret_from_syscall
-       ;;
-       shladd r20=r15,3,r16                    // r20 = sys_call_table + 
8*(syscall-1024)
-       cmp.leu p6,p7=r15,r3                    // (syscall > 0 && syscall < 
1024 + NR_syscalls) ?
-       mov rp=r2                               // set the real return addr
-       ;;
-(p6)   ld8 r20=[r20]                           // load address of syscall 
entry point
-(p7)   movl r20=sys_ni_syscall
-
-       add r2=TI_FLAGS+IA64_TASK_SIZE,r13
-       ;;
-       ld4 r2=[r2]                             // r2 = 
current_thread_info()->flags
-       ;;
-       and r2=_TIF_SYSCALL_TRACEAUDIT,r2       // mask trace or audit
-       ;;
-       cmp.eq p8,p0=r2,r0
-       mov b6=r20
-       ;;
-(p8)   br.call.sptk.many b6=b6                 // ignore this return addr
-       br.cond.sptk ia64_trace_syscall
+#else
+(p15)  ssm psr.i                               // M2   restore psr.i
+#endif
+(p14)  br.call.sptk.many b6=b6                 // B    invoke syscall-handker 
(ignore return addr)
+       br.cond.spnt.many ia64_trace_syscall    // B    do syscall-tracing 
thingamagic
        // NOT REACHED
+///////////////////////////////////////////////////////////////////////
+       // On entry, we optimistically assumed that we're coming from 
user-space.
+       // For the rare cases where a system-call is done from within the 
kernel,
+       // we fix things up at this point:
+.break_fixup:
+       add r1=-IA64_PT_REGS_SIZE,sp            // A    allocate space for 
pt_regs structure
+       mov ar.rnat=r24                         // M2   restore kernel's AR.RNAT
+       ;;
+       mov ar.bspstore=r23                     // M2   restore kernel's 
AR.BSPSTORE
+       br.cond.sptk .back_from_break_fixup
 END(break_fault)
 
        .org ia64_ivt+0x3000
@@ -1201,8 +1273,6 @@ END(interrupt)
         *      - r31: saved pr
         *      -  b0: original contents (to be saved)
         * On exit:
-        *      - executing on bank 1 registers
-        *      - psr.ic enabled, interrupts restored
         *      -  p10: TRUE if syscall is invoked with more than 8 out
         *              registers or r15's Nat is true
         *      -  r1: kernel's gp
@@ -1210,8 +1280,11 @@ END(interrupt)
         *      -  r8: -EINVAL if p10 is true
         *      - r12: points to kernel stack
         *      - r13: points to current task
+        *      - r14: preserved (same as on entry)
+        *      - p13: preserved
         *      - p15: TRUE if interrupts need to be re-enabled
         *      - ar.fpsr: set to kernel settings
+        *      -  b6: preserved (same as on entry)
         */
 #ifndef CONFIG_XEN
 GLOBAL_ENTRY(ia64_syscall_setup)
@@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
 (p13)  mov in5=-1
        ;;
        st8 [r16]=r21,PT(R8)-PT(AR_FPSR)        // save ar.fpsr
-       tnat.nz p14,p0=in6
+       tnat.nz p13,p0=in6
        cmp.lt p10,p9=r11,r8    // frame size can't be more than local+8
        ;;
-       stf8 [r16]=f1           // ensure pt_regs.r8 != 0 (see 
handle_syscall_error)
+       mov r8=1
 (p9)   tnat.nz p10,p0=r15
        adds r12=-16,r1         // switch to kernel memory stack (with 16 bytes 
of scratch)
 
@@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
        mov r13=r2                              // establish `current'
        movl r1=__gp                            // establish kernel global 
pointer
        ;;
-(p14)  mov in6=-1
+       st8 [r16]=r8            // ensure pt_regs.r8 != 0 (see 
handle_syscall_error)
+(p13)  mov in6=-1
 (p8)   mov in7=-1
-       nop.i 0
 
        cmp.eq pSys,pNonSys=r0,r0               // set pSys=1, pNonSys=0
        movl r17=FPSR_DEFAULT
@@ -1323,6 +1396,8 @@ END(ia64_syscall_setup)
         * element, followed by the arguments.
         */
 ENTRY(dispatch_illegal_op_fault)
+       .prologue
+       .body
        SAVE_MIN_WITH_COVER
        ssm psr.ic | PSR_DEFAULT_BITS
        ;;
@@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault)
        mov out0=ar.ec
        ;;
        SAVE_REST
+       PT_REGS_UNWIND_INFO(0)
        ;;
        br.call.sptk.many rp=ia64_illegal_op_fault
 .ret0: ;;
@@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault)
        FAULT(17)
 
 ENTRY(non_syscall)
+       mov ar.rsc=r27                  // restore ar.rsc before 
SAVE_MIN_WITH_COVER
+       ;;
        SAVE_MIN_WITH_COVER
 
        // There is no particular reason for this code to be here, other than 
that
@@ -1540,7 +1618,7 @@ ENTRY(daccess_rights)
        ;;
        ld8 r16=[r16]
        ;;
-       XEN_HYPER_RSM_PSR_DT;
+       XEN_HYPER_RSM_PSR_DT
 #else
        mov r16=cr.ifa
        rsm psr.dt
@@ -1584,6 +1662,25 @@ END(disabled_fp_reg)
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(nat_consumption)
        DBG_FAULT(26)
+
+       mov r16=cr.ipsr
+       mov r17=cr.isr
+       mov r31=pr                              // save PR
+       ;;
+       and r18=0xf,r17                         // r18 = cr.ipsr.code{3:0}
+       tbit.z p6,p0=r17,IA64_ISR_NA_BIT
+       ;;
+       cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
+       dep r16=-1,r16,IA64_PSR_ED_BIT,1
+(p6)   br.cond.spnt 1f         // branch if (cr.ispr.na == 0 || 
cr.ipsr.code{3:0} != LFETCH)
+       ;;
+       mov cr.ipsr=r16         // set cr.ipsr.na
+       mov pr=r31,-1
+       ;;
+       rfi
+
+1:     mov pr=r31,-1
+       ;;
        FAULT(26)
 END(nat_consumption)
 
@@ -1624,7 +1721,7 @@ ENTRY(speculation_vector)
 #ifdef CONFIG_XEN
        XEN_HYPER_RFI;
 #else
-       rfi
+       rfi                             // and go back
 #endif
 END(speculation_vector)
 
@@ -1647,7 +1744,6 @@ END(debug_vector)
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(unaligned_access)
        DBG_FAULT(30)
-       mov r16=cr.ipsr
        mov r31=pr              // prepare to save predicates
        ;;
        br.sptk.many dispatch_unaligned_handler
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Tue May 30 14:30:34 
2006 -0500
@@ -155,6 +155,8 @@
        ;;                                                                      
                \
        ld4 r30=[r8];                                                           
                \
        ;;                                                                      
                \
+       /* set XSI_INCOMPL_REGFR 0 */                                           
                \
+       st4 [r8]=r0;                                                            
                \
        cmp.eq  p6,p7=r30,r0;                                                   
                \
        ;; /* not sure if this stop bit is necessary */                         
                \
 (p6)   adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;                          
                \
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Tue May 30 14:30:34 
2006 -0500
@@ -8,28 +8,17 @@
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
 
-       .data
-       .align 8
-       .globl running_on_xen
-running_on_xen:
-       data4 0
-
 #define isBP   p3      // are we the Bootstrap Processor?
 
        .text
 GLOBAL_ENTRY(early_xen_setup)
-       mov r8=cr.dcr
+       mov r8=ar.rsc           // Initialized in head.S
 (isBP) movl r9=running_on_xen;;
-       extr.u r8=r8,63,1;;
-       cmp.ne p7,p0=r8,r0;;
+       extr.u r8=r8,2,2;;      // Extract pl fields
+       cmp.ne p7,p0=r8,r0;;    // p7: running on xen 
+(p7)   mov r8=1                // booleanize.
+(p7)   movl r10=xen_ivt;;
 (isBP) st4 [r9]=r8
-(p7)   movl r10=xen_ivt;;
 (p7)   mov cr.iva=r10
        br.ret.sptk.many rp;;
 END(early_xen_setup)
-
-GLOBAL_ENTRY(is_running_on_xen)
-       movl r9=running_on_xen;;
-       ld4 r8=[r9]
-       br.ret.sptk.many rp;;
-END(is_running_on_xen)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Tue May 30 
14:30:34 2006 -0500
@@ -26,53 +26,34 @@
 #include <asm/sections.h>
 #include <xen/interface/memory.h>
 
-unsigned long pci_mem_start = 0xaeedbabe;
-
 /* 
  * PFN of last memory page.
  */
 unsigned long end_pfn; 
 EXPORT_SYMBOL(end_pfn);
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
-unsigned long end_pfn_map; 
-
-/* 
- * Add a memory region to the kernel e820 map.
- */ 
-void __init add_memory_region(unsigned long start, unsigned long size, int 
type)
-{
-       int x = e820.nr_map;
-
-       if (x == E820MAX) {
-               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-               return;
-       }
-
-       e820.map[x].addr = start;
-       e820.map[x].size = size;
-       e820.map[x].type = type;
-       e820.nr_map++;
-}
-
-#ifndef CONFIG_XEN
 
 /* 
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
  * The direct mapping extends to end_pfn_map, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */ 
+unsigned long end_pfn_map; 
 
 /* 
  * Last pfn which the user wants to use.
  */
-
+unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
+
+#ifndef CONFIG_XEN
 extern struct resource code_resource, data_resource;
+#endif
 
 /* Check for some hardcoded bad areas that early boot is not allowed to touch 
*/ 
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
 { 
        unsigned long addr = *addrp, last = addr + size; 
 
+#ifndef CONFIG_XEN
        /* various gunk below that needed for SMP startup */
        if (addr < 0x8000) { 
                *addrp = 0x8000;
@@ -100,9 +81,16 @@ static inline int bad_addr(unsigned long
                return 1;
        }
        /* XXX ramdisk image here? */ 
+#else
+       if (last < (table_end<<PAGE_SHIFT)) {
+               *addrp = table_end << PAGE_SHIFT;
+               return 1;
+       }
+#endif
        return 0;
 } 
 
+#ifndef CONFIG_XEN
 int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) 
 { 
        int i;
@@ -116,6 +104,7 @@ int __init e820_mapped(unsigned long sta
        } 
        return 0;
 }
+#endif
 
 /* 
  * Find a free area in a specific range. 
@@ -246,22 +235,23 @@ e820_hole_size(unsigned long start_pfn, 
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(void)
-{
-       int i;
-       for (i = 0; i < e820.nr_map; i++) {
+void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
+{
+       int i;
+       for (i = 0; i < nr_map; i++) {
                struct resource *res;
                res = alloc_bootmem_low(sizeof(struct resource));
-               switch (e820.map[i].type) {
+               switch (e820[i].type) {
                case E820_RAM:  res->name = "System RAM"; break;
                case E820_ACPI: res->name = "ACPI Tables"; break;
                case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
                default:        res->name = "reserved";
                }
-               res->start = e820.map[i].addr;
-               res->end = res->start + e820.map[i].size - 1;
+               res->start = e820[i].addr;
+               res->end = res->start + e820[i].size - 1;
                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
+#ifndef CONFIG_XEN
                if (e820.map[i].type == E820_RAM) {
                        /*
                         *  We don't know which RAM region contains kernel data,
@@ -274,9 +264,27 @@ void __init e820_reserve_resources(void)
                        request_resource(res, &crashk_res);
 #endif
                }
-       }
-}
-#endif /* CONFIG_XEN */
+#endif
+       }
+}
+
+/* 
+ * Add a memory region to the kernel e820 map.
+ */ 
+void __init add_memory_region(unsigned long start, unsigned long size, int 
type)
+{
+       int x = e820.nr_map;
+
+       if (x == E820MAX) {
+               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+               return;
+       }
+
+       e820.map[x].addr = start;
+       e820.map[x].size = size;
+       e820.map[x].type = type;
+       e820.nr_map++;
+}
 
 void __init e820_print_map(char *who)
 {
@@ -304,7 +312,6 @@ void __init e820_print_map(char *who)
        }
 }
 
-#ifndef CONFIG_XEN
 /*
  * Sanitize the BIOS e820 map.
  *
@@ -491,9 +498,13 @@ static int __init sanitize_e820_map(stru
  */
 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 {
+#ifndef CONFIG_XEN
        /* Only one memory region (or negative)? Ignore it */
        if (nr_map < 2)
                return -1;
+#else
+       BUG_ON(nr_map < 1);
+#endif
 
        do {
                unsigned long start = biosmap->addr;
@@ -505,6 +516,7 @@ static int __init copy_e820_map(struct e
                if (start > end)
                        return -1;
 
+#ifndef CONFIG_XEN
                /*
                 * Some BIOSes claim RAM in the 640k - 1M region.
                 * Not right. Fix it up.
@@ -523,12 +535,14 @@ static int __init copy_e820_map(struct e
                                size = end - start;
                        }
                }
+#endif
 
                add_memory_region(start, size, type);
        } while (biosmap++,--nr_map);
        return 0;
 }
 
+#ifndef CONFIG_XEN
 void __init setup_memory_region(void)
 {
        char *who = "BIOS-e820";
@@ -562,104 +576,63 @@ void __init setup_memory_region(void)
 
 #else  /* CONFIG_XEN */
 
-extern unsigned long xen_override_max_pfn;
-extern union xen_start_info_union xen_start_info_union;
-
-unsigned long __init e820_end_of_ram(void)
-{
-       unsigned long max_end_pfn;
-
-       if (xen_override_max_pfn == 0) {
-               max_end_pfn = xen_start_info->nr_pages;
-               /* Default 8MB slack (to balance backend allocations). */
-               max_end_pfn += 8 << (20 - PAGE_SHIFT);
-       } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
-               max_end_pfn = xen_override_max_pfn;
-       } else {
-               max_end_pfn = xen_start_info->nr_pages;
-       }
-
-       return max_end_pfn;
-}
-
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
-       return 0;
-}
-
-void __init e820_reserve_resources(void) 
-{
-       dom0_op_t op;
-       struct dom0_memory_map_entry *map;
-       unsigned long gapstart, gapsize, round, last;
-       int i, found = 0;
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return;
-
-       map = alloc_bootmem_low_pages(PAGE_SIZE);
-       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
-       set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
-       op.u.physical_memory_map.max_map_entries =
-               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
-       BUG_ON(HYPERVISOR_dom0_op(&op));
-
-       last = 0x100000000ULL;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-
-       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
-               struct resource *res;
-
-               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
-                       gapsize = last - map[i].end;
-                       gapstart = map[i].end;
-                       found = 1;
-               }
-               if (map[i].start < last)
-                       last = map[i].start;
-
-               if (map[i].end > 0x100000000ULL)
-                       continue;
-               res = alloc_bootmem_low(sizeof(struct resource));
-               res->name = map[i].is_ram ? "System RAM" : "reserved";
-               res->start = map[i].start;
-               res->end = map[i].end - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-               request_resource(&iomem_resource, res);
-       }
-
-       free_bootmem(__pa(map), PAGE_SIZE);
-
-       if (!found) {
-               gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
-               gapstart = (gapstart << PAGE_SHIFT) + 1024*1024;
-               printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit 
address range\n"
-                      KERN_ERR "PCI: Unassigned devices with 32bit resource 
registers may break!\n");
-       }
-
+void __init setup_memory_region(void)
+{
+       int rc;
+       struct xen_memory_map memmap;
        /*
-        * See how much we want to round up: start off with
-        * rounding to the next 1MB area.
+        * This is rather large for a stack variable but this early in
+        * the boot process we know we have plenty slack space.
         */
-       round = 0x100000;
-       while ((gapsize >> 4) > round)
-               round += round;
-       /* Fun with two's complement */
-       pci_mem_start = (gapstart + round) & -round;
-
-       printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
-               pci_mem_start, gapstart, gapsize);
-}
-
+       struct e820entry map[E820MAX];
+
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, map);
+
+       rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+       if ( rc == -ENOSYS ) {
+               memmap.nr_entries = 1;
+               map[0].addr = 0ULL;
+               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               /* 8MB slack (to balance backend allocations). */
+               map[0].size += 8 << 20;
+               map[0].type = E820_RAM;
+               rc = 0;
+       }
+       BUG_ON(rc);
+
+       sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+       BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
+
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+       e820_print_map("Xen");
+}
 #endif
 
 void __init parse_memopt(char *p, char **from) 
 { 
+       int i;
+       unsigned long current_end;
+       unsigned long end;
+
        end_user_pfn = memparse(p, from);
        end_user_pfn >>= PAGE_SHIFT;    
-       xen_override_max_pfn = (unsigned long) end_user_pfn;
+
+       end = end_user_pfn<<PAGE_SHIFT;
+       i = e820.nr_map-1;
+       current_end = e820.map[i].addr + e820.map[i].size;
+
+       if (current_end < end) {
+               /*
+                 * The e820 map ends before our requested size so
+                 * extend the final entry to the requested address.
+                 */
+               if (e820.map[i].type == E820_RAM)
+                       e820.map[i].size = end - e820.map[i].addr;
+               else
+                       add_memory_region(current_end, end - current_end, 
E820_RAM);
+       }
 } 
 
 void __init parse_memmapopt(char *p, char **from)
@@ -683,16 +656,17 @@ void __init parse_memmapopt(char *p, cha
        p = *from;
 }
 
+unsigned long pci_mem_start = 0xaeedbabe;
+
 /*
  * Search for the biggest gap in the low 32 bits of the e820
  * memory space.  We pass this space to PCI to assign MMIO resources
  * for hotplug or unconfigured devices in.
  * Hopefully the BIOS let enough space left.
  */
-__init void e820_setup_gap(void)
-{
-#ifndef CONFIG_XEN
-       unsigned long gapstart, gapsize;
+__init void e820_setup_gap(struct e820entry *e820, int nr_map)
+{
+       unsigned long gapstart, gapsize, round;
        unsigned long last;
        int i;
        int found = 0;
@@ -700,10 +674,10 @@ __init void e820_setup_gap(void)
        last = 0x100000000ull;
        gapstart = 0x10000000;
        gapsize = 0x400000;
-       i = e820.nr_map;
+       i = nr_map;
        while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
+               unsigned long long start = e820[i].addr;
+               unsigned long long end = start + e820[i].size;
 
                /*
                 * Since "last" is at most 4GB, we know we'll
@@ -729,16 +703,15 @@ __init void e820_setup_gap(void)
        }
 
        /*
-        * Start allocating dynamic PCI memory a bit into the gap,
-        * aligned up to the nearest megabyte.
-        *
-        * Question: should we try to pad it up a bit (do something
-        * like " + (gapsize >> 3)" in there too?). We now have the
-        * technology.
+        * See how much we want to round up: start off with
+        * rounding to the next 1MB area.
         */
-       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+       round = 0x100000;
+       while ((gapsize >> 4) > round)
+               round += round;
+       /* Fun with two's complement */
+       pci_mem_start = (gapstart + round) & -round;
 
        printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
                pci_mem_start, gapstart, gapsize);
-#endif
-}
+}
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Tue May 30 
14:30:34 2006 -0500
@@ -76,8 +76,8 @@
 #include <xen/features.h>
 #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
-#define end_pfn_map end_pfn
 #include <asm/mach-xen/setup_arch_post.h>
+#include <xen/interface/memory.h>
 
 extern unsigned long start_pfn;
 extern struct edid_info edid_info;
@@ -490,19 +490,6 @@ static __init void parse_cmdline_early (
 }
 
 #ifndef CONFIG_NUMA
-#ifdef CONFIG_XEN
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-        unsigned long bootmap_size;
-
-        bootmap_size = init_bootmem(start_pfn, end_pfn);
-        free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT);   
-        reserve_bootmem(HIGH_MEMORY,
-                        (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
-                        - HIGH_MEMORY);
-}
-#else
 static void __init
 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 {
@@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_
        if (bootmap == -1L)
                panic("Cannot find bootmem map of size %ld\n",bootmap_size);
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+#ifdef CONFIG_XEN
+       e820_bootmem_free(NODE_DATA(0), 0, 
xen_start_info->nr_pages<<PAGE_SHIFT);
+#else
        e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+#endif
        reserve_bootmem(bootmap, bootmap_size);
 } 
-#endif /* !CONFIG_XEN */
 #endif
 
 /* Use inline assembly to define this because the nops are defined 
@@ -636,6 +626,11 @@ void __init setup_arch(char **cmdline_p)
 {
        unsigned long kernel_end;
 
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       struct e820entry *machine_e820;
+       struct xen_memory_map memmap;
+#endif
+
 #ifdef CONFIG_XEN
        /* Register a call for panic conditions. */
        notifier_chain_register(&panic_notifier_list, &xen_panic_block);
@@ -693,20 +688,18 @@ void __init setup_arch(char **cmdline_p)
        rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
        rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
 #endif
+#endif /* !CONFIG_XEN */
        setup_memory_region();
        copy_edd();
-#endif /* !CONFIG_XEN */
 
        if (!MOUNT_ROOT_RDONLY)
                root_mountflags &= ~MS_RDONLY;
        init_mm.start_code = (unsigned long) &_text;
        init_mm.end_code = (unsigned long) &_etext;
        init_mm.end_data = (unsigned long) &_edata;
-#ifdef CONFIG_XEN
-       init_mm.brk = start_pfn << PAGE_SHIFT;
-#else
-       init_mm.brk = (unsigned long) &_end;    
-
+       init_mm.brk = (unsigned long) &_end;
+
+#ifndef CONFIG_XEN
        code_resource.start = virt_to_phys(&_text);
        code_resource.end = virt_to_phys(&_etext)-1;
        data_resource.start = virt_to_phys(&_etext);
@@ -735,12 +728,11 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_NUMA
-       numa_initmem_init(start_pfn, end_pfn); 
+       numa_initmem_init(0, end_pfn); 
 #else
-       contig_initmem_init(start_pfn, end_pfn);
-#endif
-
-#ifndef CONFIG_XEN
+       contig_initmem_init(0, end_pfn);
+#endif
+
        /* Reserve direct mapping */
        reserve_bootmem_generic(table_start << PAGE_SHIFT, 
                                (table_end - table_start) << PAGE_SHIFT);
@@ -749,6 +741,10 @@ void __init setup_arch(char **cmdline_p)
        kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
        reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
 
+#ifdef CONFIG_XEN
+       /* reserve physmap, start info and initial page tables */
+       reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end);
+#else
        /*
         * reserve physical page 0 - it's a special BIOS page on many boxes,
         * enabling clean reboots, SMP operation, laptop functions.
@@ -933,13 +929,25 @@ void __init setup_arch(char **cmdline_p)
        prefill_possible_map();
 #endif
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        /*
         * Request address space for all standard RAM and ROM resources
         * and also for regions reported as reserved by the e820.
         */
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
        probe_roms();
-       e820_reserve_resources(); 
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+               memmap.nr_entries = E820MAX;
+               set_xen_guest_handle(memmap.buffer, machine_e820);
+
+               BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, 
&memmap));
+
+               e820_reserve_resources(machine_e820, memmap.nr_entries);
+       }
+#elif !defined(CONFIG_XEN)
+       probe_roms();
+       e820_reserve_resources(e820.map, e820.nr_map);
 #endif
 
        request_resource(&iomem_resource, &video_ram_resource);
@@ -951,7 +959,14 @@ void __init setup_arch(char **cmdline_p)
                request_resource(&ioport_resource, &standard_io_resources[i]);
        }
 
-       e820_setup_gap();
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               e820_setup_gap(machine_e820, memmap.nr_entries);
+               free_bootmem(__pa(machine_e820), PAGE_SIZE);
+       }
+#elif !defined(CONFIG_XEN)
+       e820_setup_gap(e820.map, e820.nr_map);
+#endif
 
 #ifdef CONFIG_GART_IOMMU
        iommu_hole_init();
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue May 30 14:30:34 
2006 -0500
@@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addre
        set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
 }
 
-unsigned long __initdata table_start, tables_space; 
+unsigned long __initdata table_start, table_end; 
 
 unsigned long get_machine_pfn(unsigned long addr)
 {
@@ -409,11 +409,17 @@ static inline int make_readonly(unsigned
 {
        int readonly = 0;
 
-       /* Make old and new page tables read-only. */
+       /* Make new page tables read-only. */
+       if (!xen_feature(XENFEAT_writable_page_tables)
+           && (paddr >= (table_start << PAGE_SHIFT))
+           && (paddr < (table_end << PAGE_SHIFT)))
+               readonly = 1;
+       /* Make old page tables read-only. */
        if (!xen_feature(XENFEAT_writable_page_tables)
            && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
-           && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+           && (paddr < (start_pfn << PAGE_SHIFT)))
                readonly = 1;
+
        /*
         * No need for writable mapping of kernel image. This also ensures that
         * page and descriptor tables embedded inside don't have writable
@@ -544,7 +550,7 @@ void __init xen_init_pt(void)
                mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
 }
 
-void __init extend_init_mapping(void) 
+void __init extend_init_mapping(unsigned long tables_space)
 {
        unsigned long va = __START_KERNEL_map;
        unsigned long phys, addr, *pte_page;
@@ -599,23 +605,23 @@ void __init extend_init_mapping(void)
 
 static void __init find_early_table_space(unsigned long end)
 {
-       unsigned long puds, pmds, ptes; 
+       unsigned long puds, pmds, ptes, tables; 
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
        ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
 
-       tables_space =
-               round_up(puds * 8, PAGE_SIZE) + 
+       tables = round_up(puds * 8, PAGE_SIZE) + 
                round_up(pmds * 8, PAGE_SIZE) + 
                round_up(ptes * 8, PAGE_SIZE); 
 
-       extend_init_mapping();
+       extend_init_mapping(tables);
 
        table_start = start_pfn;
+       table_end = table_start + (tables>>PAGE_SHIFT);
 
        early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
-               end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT);
+               end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
 }
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsig
                        set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
        }
 
-       BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >> 
PAGE_SHIFT));
+       BUG_ON(!after_bootmem && start_pfn != table_end);
 
        __flush_tlb_all();
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Tue May 30 14:30:34 
2006 -0500
@@ -329,7 +329,7 @@ out:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct tpm_private *tp = dev->data;
        DPRINTK("\n");
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 14:30:34 2006 -0500
@@ -1,5 +1,4 @@
 
-obj-y  += net_driver_util.o
 obj-y  += util.o
 
 obj-y  += core/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Tue May 30 
14:30:34 2006 -0500
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
  * Also protects non-atomic updates of current_pages and driver_pages, and
  * balloon lists.
  */
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
 
 /* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
+       unsigned long min_target;
+
+       /* Do not allow target to reduce below 2% of maximum memory size. */
+       min_target = max_pfn / 50;
+       target = max(target, min_target);
+
        /* No need for lock. Not read-modify-write updates. */
        hard_limit   = ~0UL;
        target_pages = target;
@@ -468,8 +474,8 @@ static int __init balloon_init(void)
 
        IPRINTK("Initialising balloon driver.\n");
 
-       if (xen_init() < 0)
-               return -1;
+       if (!is_running_on_xen())
+               return -ENODEV;
 
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        totalram_pages = current_pages;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue May 30 
14:30:34 2006 -0500
@@ -82,7 +82,7 @@ typedef struct {
 
 static pending_req_t *pending_reqs;
 static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -526,7 +526,7 @@ static int __init blkif_init(void)
        struct page *page;
        int i;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Tue May 30 
14:30:34 2006 -0500
@@ -247,7 +247,7 @@ fail:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev->data;
        struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
                   have ignored this request initially, as the device was
                   still mounted. */
                struct xenbus_device * dev = info->xbdev;
-               XenbusState state = xenbus_read_driver_state(dev->otherend);
+               enum xenbus_state state = 
xenbus_read_driver_state(dev->otherend);
 
                if (state == XenbusStateClosing)
                        blkfront_closing(dev);
@@ -792,7 +792,7 @@ static struct xenbus_driver blkfront = {
 
 static int __init xlblk_init(void)
 {
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        return xenbus_register_frontend(&blkfront);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Tue May 30 14:30:34 
2006 -0500
@@ -93,7 +93,7 @@ static struct block_device_operations xl
        .ioctl  = blkif_ioctl,
 };
 
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
 
 static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue May 30 14:30:34 
2006 -0500
@@ -138,7 +138,7 @@ typedef struct {
  */
 static pending_req_t pending_reqs[MAX_PENDING_REQS];
 static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
 /* NB. We use a different index type to differentiate from shared blk rings. */
 typedef unsigned int PEND_RING_IDX;
 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Tue May 30 
14:30:34 2006 -0500
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
 {
        unsigned int goal;
        goal = simple_strtoul(str, NULL, 0);
-       while (wbuf_size < goal)
-               wbuf_size <<= 1;
+       if (goal) {
+               goal = roundup_pow_of_two(goal);
+               if (wbuf_size < goal)
+                       wbuf_size = goal;
+       }
        return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
 /* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
 
 /* Common transmit-kick routine. */
 static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
 
 /******************** Kernel console driver ********************************/
 
-static void kcons_write(
-       struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
 {
        int           i = 0;
        unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
        spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
-static void kcons_write_dom0(
-       struct console *c, const char *s, unsigned int count)
-{
-       int rc;
-
-       while ((count > 0) &&
-              ((rc = HYPERVISOR_console_io(
-                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int 
count)
+{
+
+       while (count > 0) {
+               int rc;
+               rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+               if (rc <= 0)
+                       break;
                count -= rc;
                s += rc;
        }
@@ -183,7 +185,7 @@ static struct console kcons_info = {
 #define __RETCODE 0
 static int __init xen_console_init(void)
 {
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return __RETCODE;
 
        if (xen_start_info->flags & SIF_INITDOMAIN) {
@@ -566,7 +568,7 @@ static int __init xencons_init(void)
 {
        int rc;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        if (xc_mode == XC_OFF)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue May 30 14:30:34 
2006 -0500
@@ -4,8 +4,9 @@
 
 obj-y   := evtchn.o reboot.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET)     += skbuff.o
-obj-$(CONFIG_SMP)     += smpboot.o
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)     += xen_proc.o
+obj-$(CONFIG_NET)         += skbuff.o
+obj-$(CONFIG_SMP)         += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Tue May 30 14:30:34 
2006 -0500
@@ -51,10 +51,10 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
 
 /* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ...  NR_EVENT_CHANNELS-1] = 
-1};
 
 /* Packed IRQ information: binding type, sub-type index, and event channel. */
 static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
 }
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
 
 /* Reference counts for bindings to IRQs. */
 static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
                BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
 
        /* Secondary CPUs must have no VIRQ or IPI bindings. */
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
+               if (cpu == 0)
+                       continue;
                for (virq = 0; virq < NR_VIRQS; virq++)
                        BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
                for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
 void __init xen_init_IRQ(void)
 {
        int i;
-       int cpu;
-
-       spin_lock_init(&irq_mapping_update_lock);
 
        init_evtchn_cpu_bindings();
 
-       /* No VIRQ or IPI bindings. */
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               for (i = 0; i < NR_VIRQS; i++)
-                       per_cpu(virq_to_irq, cpu)[i] = -1;
-               for (i = 0; i < NR_IPIS; i++)
-                       per_cpu(ipi_to_irq, cpu)[i] = -1;
-       }
-
-       /* No event-channel -> IRQ mappings. */
-       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-               evtchn_to_irq[i] = -1;
-               mask_evtchn(i); /* No event channels are 'live' right now. */
-       }
+       /* No event channels are 'live' right now. */
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+               mask_evtchn(i);
 
        /* No IRQ -> event-channel mappings. */
        for (i = 0; i < NR_IRQS; i++)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Tue May 30 14:30:34 
2006 -0500
@@ -38,7 +38,6 @@
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
 #include <xen/interface/xen.h>
-#include <asm/fixmap.h>
 #include <asm/uaccess.h>
 #include <xen/gnttab.h>
 #include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
 static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
 
 static grant_entry_t *shared = NULL;
 
@@ -443,7 +442,7 @@ gnttab_init(void)
 {
        int i;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        if (gnttab_resume() < 0)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c  Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c  Tue May 30 
14:30:34 2006 -0500
@@ -49,6 +49,9 @@ static struct kobj_type hyp_sysfs_kobj_t
 
 static int __init hypervisor_subsys_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
        return subsystem_register(&hypervisor_subsys);
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Tue May 30 14:30:34 
2006 -0500
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
 static int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int  smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend()  (0)
-#define smp_resume()   ((void)0)
-#endif
 
 /* Ensure we run on the idle task page tables so that we will
    switch page tables before running user space. This is needed
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Tue May 30 14:30:34 
2006 -0500
@@ -23,6 +23,7 @@
 #include <asm/pgalloc.h>
 #include <xen/evtchn.h>
 #include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
 #include <xen/xenbus.h>
 
 #ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
 #elif !defined(CONFIG_X86_IO_APIC)
 unsigned int maxcpus = NR_CPUS;
 #endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
 
 void __init prefill_possible_map(void)
 {
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
        cpu_idle();
 }
 
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
 {
        vcpu_guest_context_t ctxt;
-       struct task_struct *idle = idle_task(vcpu);
+       struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
-       struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+       struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
 #else
-       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
-       if (vcpu == 0)
+       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+       if (cpu == 0)
                return;
 
        memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
 
        ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
-       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
-       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
                cpu_set(cpu, cpu_present_map);
 #endif
 
-               vcpu_prepare(cpu);
-       }
-
-       xenbus_allowed_cpumask = cpu_present_map;
+               cpu_initialize_context(cpu);
+       }
+
+       init_xenbus_allowed_cpumask();
 
        /* Currently, Xen gives no dynamic NUMA/HT info. */
        for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
        cpu_online_map   = cpumask_of_cpu(0);
 }
 
-static int local_cpu_hotplug_request(void)
-{
-       /*
-        * We assume a CPU hotplug request comes from local admin if it is made
-        * via a userspace process (i.e., one with a real mm_struct).
-        */
-       return (current->mm != NULL);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void vcpu_hotplug(unsigned int cpu)
-{
-       int err;
-       char dir[32], state[32];
-
-       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
-               return;
-
-       sprintf(dir, "cpu/%d", cpu);
-       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
-       if (err != 1) {
-               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-               return;
-       }
-
-       if (strcmp(state, "online") == 0) {
-               cpu_set(cpu, xenbus_allowed_cpumask);
-               (void)cpu_up(cpu);
-       } else if (strcmp(state, "offline") == 0) {
-               cpu_clear(cpu, xenbus_allowed_cpumask);
-               (void)cpu_down(cpu);
-       } else {
-               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-                      state, cpu);
-       }
-}
-
-static void handle_vcpu_hotplug_event(
-       struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
-       int cpu;
-       char *cpustr;
-       const char *node = vec[XS_WATCH_PATH];
-
-       if ((cpustr = strstr(node, "cpu/")) != NULL) {
-               sscanf(cpustr, "cpu/%d", &cpu);
-               vcpu_hotplug(cpu);
-       }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
-                             unsigned long action, void *hcpu)
-{
-       int cpu = (long)hcpu;
-
-       /*
-        * We do this in a callback notifier rather than __cpu_disable()
-        * because local_cpu_hotplug_request() does not work in the latter
-        * as it's always executed from within a stopmachine kthread.
-        */
-       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
-               cpu_clear(cpu, local_allowed_cpumask);
-
-       return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
-                             unsigned long event, void *data)
-{
-       int i;
-
-       static struct xenbus_watch cpu_watch = {
-               .node = "cpu",
-               .callback = handle_vcpu_hotplug_event,
-               .flags = XBWF_new_thread };
-       (void)register_xenbus_watch(&cpu_watch);
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-               for_each_cpu(i)
-                       vcpu_hotplug(i);
-               printk(KERN_INFO "Brought up %ld CPUs\n",
-                      (long)num_online_cpus());
-       }
-
-       return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
-       static struct notifier_block hotplug_cpu = {
-               .notifier_call = smpboot_cpu_notify };
-       static struct notifier_block xsn_cpu = {
-               .notifier_call = setup_cpu_watcher };
-
-       register_cpu_notifier(&hotplug_cpu);
-       register_xenstore_notifier(&xsn_cpu);
-
-       return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
-       int i, err;
-
-       lock_cpu_hotplug();
-
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       while (num_online_cpus() > 1) {
-               unlock_cpu_hotplug();
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       err = cpu_down(i);
-                       if (err) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               for_each_cpu(i)
-                                       vcpu_hotplug(i);
-                               return err;
-                       }
-               }
-               lock_cpu_hotplug();
-       }
-
-       return 0;
-}
-
-void smp_resume(void)
-{
-       int i;
-
-       for_each_cpu(i)
-               vcpu_prepare(i);
-
-       unlock_cpu_hotplug();
-
-       for_each_cpu(i)
-               vcpu_hotplug(i);
-}
-
 static void
 remove_siblinginfo(int cpu)
 {
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
 
 #else /* !CONFIG_HOTPLUG_CPU */
 
-int smp_suspend(void)
-{
-       if (num_online_cpus() > 1) {
-               printk(KERN_WARNING "Can't suspend SMP guests "
-                      "without CONFIG_HOTPLUG_CPU\n");
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-void smp_resume(void)
-{
-}
-
 int __cpu_disable(void)
 {
        return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
 {
        int rc;
 
-       if (local_cpu_hotplug_request()) {
-               cpu_set(cpu, local_allowed_cpumask);
-               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
-                       printk("%s: attempt to bring up CPU %u disallowed by "
-                              "remote admin.\n", __FUNCTION__, cpu);
-                       return -EBUSY;
-               }
-       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
-                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
-               return -EBUSY;
-       }
+       rc = cpu_up_check(cpu);
+       if (rc)
+               return rc;
 
 #ifdef CONFIG_SMP_ALTERNATIVES
        if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
        cpu_set(cpu, cpu_online_map);
 
        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-       if (rc != 0)
-               BUG();
+       BUG_ON(rc);
 
        return 0;
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Tue May 30 14:30:34 
2006 -0500
@@ -429,6 +429,9 @@ static int __init evtchn_init(void)
 {
        int err;
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        spin_lock_init(&port_user_lock);
        memset(port_user, 0, sizeof(port_user));
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Tue May 30 
14:30:34 2006 -0500
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
 
 static unsigned long alloc_mfn(void)
 {
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long 
 
 static void netif_idx_release(u16 pending_idx)
 {
-       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(_lock);
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
@@ -810,6 +810,9 @@ static int __init netback_init(void)
        int i;
        struct page *page;
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        /* We can increase reservation by this much in net_rx_action(). */
        balloon_update_driver_allowance(NET_RX_RING_SIZE);
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -17,13 +17,10 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-
 #include <stdarg.h>
 #include <linux/module.h>
 #include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
 #include "common.h"
-
 
 #if 0
 #undef DPRINTK
@@ -31,22 +28,19 @@
     printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #endif
 
-
 struct backend_info
 {
        struct xenbus_device *dev;
        netif_t *netif;
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
-
 
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void maybe_connect(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static int netback_remove(struct xenbus_device *dev)
 {
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
 
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
        kfree(ratestr);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 static void connect(struct backend_info *be)
 {
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Tue May 30 
14:30:34 2006 -0500
@@ -60,12 +60,11 @@
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
 
 #define GRANT_INVALID_REF      0
 
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
 
 static inline void init_skb_shinfo(struct sk_buff *skb)
 {
@@ -80,20 +79,14 @@ struct netfront_info {
 
        struct net_device_stats stats;
 
-       netif_tx_front_ring_t tx;
-       netif_rx_front_ring_t rx;
+       struct netif_tx_front_ring tx;
+       struct netif_rx_front_ring rx;
 
        spinlock_t   tx_lock;
        spinlock_t   rx_lock;
 
        unsigned int handle;
        unsigned int evtchn, irq;
-
-       /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED       0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED    2
-       unsigned int backend_state;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -123,8 +116,8 @@ struct netfront_info {
        u8 mac[ETH_ALEN];
 
        unsigned long rx_pfn_array[NET_RX_RING_SIZE];
-       multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-       mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+       struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+       struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 };
 
 /*
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
        list[0] = list[id];
        return id;
 }
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
-       [BEST_CLOSED]       = "closed",
-       [BEST_DISCONNECTED] = "disconnected",
-       [BEST_CONNECTED]    = "connected",
-};
-#endif
 
 #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
                                        __FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
        return talk_to_backend(dev, info);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_backend(struct xenbus_device *dev,
@@ -317,8 +323,8 @@ again:
 
 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
 {
-       netif_tx_sring_t *txs;
-       netif_rx_sring_t *rxs;
+       struct netif_tx_sring *txs;
+       struct netif_rx_sring *rxs;
        int err;
        struct net_device *netdev = info->netdev;
 
@@ -328,13 +334,13 @@ static int setup_device(struct xenbus_de
        info->tx.sring = NULL;
        info->irq = 0;
 
-       txs = (netif_tx_sring_t *)__get_free_page(GFP_KERNEL);
+       txs = (struct netif_tx_sring *)__get_free_page(GFP_KERNEL);
        if (!txs) {
                err = -ENOMEM;
                xenbus_dev_fatal(dev, err, "allocating tx ring page");
                goto fail;
        }
-       rxs = (netif_rx_sring_t *)__get_free_page(GFP_KERNEL);
+       rxs = (struct netif_rx_sring *)__get_free_page(GFP_KERNEL);
        if (!rxs) {
                err = -ENOMEM;
                xenbus_dev_fatal(dev, err, "allocating rx ring page");
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
        }
        memset(txs, 0, PAGE_SIZE);
        memset(rxs, 0, PAGE_SIZE);
-       info->backend_state = BEST_DISCONNECTED;
 
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        DPRINTK("\n");
 
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       if (np->backend_state != BEST_CONNECTED)
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
        struct xen_memory_reservation reservation;
        grant_ref_t ref;
 
-       if (unlikely(np->backend_state != BEST_CONNECTED))
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        /*
@@ -638,7 +643,7 @@ static int network_start_xmit(struct sk_
 {
        unsigned short id;
        struct netfront_info *np = netdev_priv(dev);
-       netif_tx_request_t *tx;
+       struct netif_tx_request *tx;
        RING_IDX i;
        grant_ref_t ref;
        unsigned long mfn;
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
 
        spin_lock_irq(&np->tx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -736,10 +741,10 @@ static int netif_poll(struct net_device 
 {
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb, *nskb;
-       netif_rx_response_t *rx;
+       struct netif_rx_response *rx;
        RING_IDX i, rp;
-       mmu_update_t *mmu = np->rx_mmu;
-       multicall_entry_t *mcl = np->rx_mcl;
+       struct mmu_update *mmu = np->rx_mmu;
+       struct multicall_entry *mcl = np->rx_mcl;
        int work_done, budget, more_to_do = 1;
        struct sk_buff_head rxq;
        unsigned long flags;
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device 
 
        spin_lock(&np->rx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock(&np->rx_lock);
                return 0;
        }
@@ -962,7 +967,7 @@ static void network_connect(struct net_d
 {
        struct netfront_info *np;
        int i, requeue_idx;
-       netif_tx_request_t *tx;
+       struct netif_tx_request *tx;
        struct sk_buff *skb;
 
        np = netdev_priv(dev);
@@ -1041,11 +1046,9 @@ static void network_connect(struct net_d
         * domain a kick because we've probably just requeued some
         * packets.
         */
-       np->backend_state = BEST_CONNECTED;
+       netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
-
-       network_maybe_wake_tx(dev);
 
        spin_unlock(&np->rx_lock);
        spin_unlock_irq(&np->tx_lock);
@@ -1057,7 +1060,7 @@ static void show_device(struct netfront_
        if (np) {
                IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
                        np->handle,
-                       be_state_name[np->backend_state],
+                       netif_carrier_ok(np->netdev) ? "on" : "off",
                        netif_running(np->netdev) ? "open" : "closed",
                        np->evtchn,
                        np->tx,
@@ -1243,9 +1246,10 @@ static struct net_device * __devinit cre
        }
 
        np                = netdev_priv(netdev);
-       np->backend_state = BEST_CLOSED;
        np->handle        = handle;
        np->xbdev         = dev;
+
+       netif_carrier_off(netdev);
 
        spin_lock_init(&np->tx_lock);
        spin_lock_init(&np->rx_lock);
@@ -1394,7 +1398,7 @@ static void netif_disconnect_backend(str
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       info->backend_state = BEST_DISCONNECTED;
+       netif_carrier_off(info->netdev);
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
 
@@ -1454,6 +1458,9 @@ static struct notifier_block notifier_in
 
 static int __init netif_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        if (xen_start_info->flags & SIF_INITDOMAIN)
                return 0;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
 }
 
 static void pciback_frontend_changed(struct xenbus_device *xdev,
-                                    XenbusState fe_state)
+                                    enum xenbus_state fe_state)
 {
        struct pciback_device *pdev = xdev->data;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Tue May 30 
14:30:34 2006 -0500
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
 static int pcifront_try_disconnect(struct pcifront_device *pdev)
 {
        int err = 0;
-       XenbusState prev_state;
+       enum xenbus_state prev_state;
 
        spin_lock(&pdev->dev_lock);
 
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
 }
 
 static void pcifront_backend_changed(struct xenbus_device *xdev,
-                                    XenbusState be_state)
+                                    enum xenbus_state be_state)
 {
        struct pcifront_device *pdev = xdev->data;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Tue May 30 
14:30:34 2006 -0500
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
        break;
 
        case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
-               mmu_update_t u;
-               uint64_t ptep;
-#endif
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
                for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
                        if (get_user(mfn, p))
                                return -EFAULT;
-#ifdef __ia64__
+
                        ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-                                                    mfn, 1 << PAGE_SHIFT,
+                                                    mfn, PAGE_SIZE,
                                                     vma->vm_page_prot, m.dom);
                        if (ret < 0)
-                           goto batch_err;
-#else
-
-                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-                       if (ret)
-                               goto batch_err;
-
-                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-                       u.ptr = ptep;
-
-                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
                                put_user(0xF0000000 | mfn, p);
-#endif
                }
 
                ret = 0;
@@ -271,6 +255,9 @@ static int capabilities_read(char *page,
 
 static int __init privcmd_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        /* Set of hypercalls that privileged applications may execute. */
        set_bit(__HYPERVISOR_acm_op,           hypercall_permission_map);
        set_bit(__HYPERVISOR_dom0_op,          hypercall_permission_map);
@@ -280,6 +267,9 @@ static int __init privcmd_init(void)
        set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
        set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
        set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op_compat,
+               hypercall_permission_map);
 
        privcmd_intf = create_xen_proc_entry("privcmd", 0400);
        if (privcmd_intf != NULL)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -34,7 +34,7 @@ struct backend_info
 
        /* watch front end for changes */
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
 
 static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_watch *watch,
                             const char **vec, unsigned int len);
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state);
+                             enum xenbus_state frontend_state);
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
 
 
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state)
+                             enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Tue May 30 
14:30:34 2006 -0500
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 
 
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
        /* We check whether the state is currently set to the given value, and
           if not, then the state is set.  We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
 }
 
 
-XenbusState xenbus_read_driver_state(const char *path)
-{
-       XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+       enum xenbus_state result;
        int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
        if (err)
                result = XenbusStateClosed;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue May 30 
14:30:34 2006 -0500
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
        struct xenbus_device *dev =
                container_of(watch, struct xenbus_device, otherend_watch);
        struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
-       XenbusState state;
+       enum xenbus_state state;
 
        /* Protect us against watches firing on old details when the otherend
           details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
        size_t stringlen;
        char *tmpstring;
 
-       XenbusState state = xenbus_read_driver_state(nodename);
+       enum xenbus_state state = xenbus_read_driver_state(nodename);
 
        if (state != XenbusStateInitialising) {
                /* Device is not new, so ignore it.  This can happen if a
@@ -966,10 +966,8 @@ static int __init xenbus_probe_init(void
 
        DPRINTK("");
 
-       if (xen_init() < 0) {
-               DPRINTK("failed");
+       if (!is_running_on_xen())
                return -ENODEV;
-       }
 
        /* Register ourselves with the kernel bus subsystem */
        bus_register(&xenbus_frontend.bus);
@@ -1069,10 +1067,8 @@ static int __init wait_for_devices(void)
 {
        unsigned long timeout = jiffies + 10*HZ;
 
-       if (xen_init() < 0) {
-               DPRINTK("failed");
+       if (!is_running_on_xen())
                return -ENODEV;
-       }
 
        while (time_before(jiffies, timeout)) {
                if (all_devices_ready())
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Tue May 
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Tue May 
30 14:30:34 2006 -0500
@@ -118,7 +118,7 @@ u64 jiffies_to_st(unsigned long jiffies)
 #define MULTI_UVMDOMID_INDEX 4
 #endif
 
-#define xen_init()     (0)
+#define is_running_on_xen() 1
 
 static inline int
 HYPERVISOR_yield(
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Tue May 
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Tue May 
30 14:30:34 2006 -0500
@@ -10,10 +10,32 @@
 
 static char * __init machine_specific_memory_setup(void)
 {
-       unsigned long max_pfn = xen_start_info->nr_pages;
+       int rc;
+       struct xen_memory_map memmap;
+       /*
+        * This is rather large for a stack variable but this early in
+        * the boot process we know we have plenty slack space.
+        */
+       struct e820entry map[E820MAX];
 
-       e820.nr_map = 0;
-       add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, map);
+
+       rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+       if ( rc == -ENOSYS ) {
+               memmap.nr_entries = 1;
+               map[0].addr = 0ULL;
+               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               /* 8MB slack (to balance backend allocations). */
+               map[0].size += 8 << 20;
+               map[0].type = E820_RAM;
+               rc = 0;
+       }
+       BUG_ON(rc);
+
+       sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+       BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
 
        return "Xen";
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 14:30:34 
2006 -0500
@@ -195,12 +195,42 @@ HYPERVISOR_multicall(
     return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
 static inline int
 HYPERVISOR_memory_op(
     unsigned int cmd, void *arg)
 {
     return _hypercall2(int, memory_op, cmd, arg);
 }
+#else
+//XXX xen/ia64 copy_from_guest() is broken.
+//    This is a temporal work around until it is fixed.
+static inline int
+____HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    return _hypercall2(int, memory_op, cmd, arg);
+}
+
+#include <xen/interface/memory.h>
+int ia64_xenmem_reservation_op(unsigned long op,
+                  struct xen_memory_reservation* reservation__);
+static inline int
+HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    switch (cmd) {
+    case XENMEM_increase_reservation:
+    case XENMEM_decrease_reservation:
+    case XENMEM_populate_physmap:
+        return ia64_xenmem_reservation_op(cmd, 
+                                          (struct xen_memory_reservation*)arg);
+    default:
+        return ____HYPERVISOR_memory_op(cmd, arg);
+    }
+    /* NOTREACHED */
+}
+#endif
 
 static inline int
 HYPERVISOR_event_channel_op(
@@ -244,12 +274,19 @@ HYPERVISOR_physdev_op(
     return rc;
 }
 
-static inline int
-HYPERVISOR_grant_table_op(
+//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant.
+static inline int
+____HYPERVISOR_grant_table_op(
     unsigned int cmd, void *uop, unsigned int count)
 {
     return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+#define HYPERVISOR_grant_table_op(cmd, uop, count) \
+       ____HYPERVISOR_grant_table_op((cmd), (uop), (count))
+#else
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+#endif
 
 static inline int
 HYPERVISOR_vcpu_op(
@@ -281,6 +318,7 @@ static inline void exit_idle(void) {}
 #define do_IRQ(irq, regs) __do_IRQ((irq), (regs))
 
 #ifdef CONFIG_XEN_IA64_DOM0_VP
+#include <linux/err.h>
 #include <asm/xen/privop.h>
 
 #define _hypercall_imm1(type, name, imm, a1)                   \
@@ -382,6 +420,10 @@ HYPERVISOR_ioremap(unsigned long ioaddr,
        unsigned long ret = ioaddr;
        if (running_on_xen) {
                ret = __HYPERVISOR_ioremap(ioaddr, size);
+               if (unlikely(IS_ERR_VALUE(ret)))
+                       panic("hypercall %s failed with %ld. "
+                             "Please check Xen and Linux config mismatch\n",
+                             __func__, -ret);
        }
        return ret;
 }
@@ -421,27 +463,6 @@ HYPERVISOR_machtophys(unsigned long mfn)
 }
 
 static inline unsigned long
-__HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
-                             unsigned int address_bits)
-{
-       return _hypercall_imm3(unsigned long, ia64_dom0vp_op,
-                              IA64_DOM0VP_populate_physmap, gpfn, 
-                              extent_order, address_bits);
-}
-
-static inline unsigned long
-HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
-                           unsigned int address_bits)
-{
-       unsigned long ret = 0;
-       if (running_on_xen) {
-               ret = __HYPERVISOR_populate_physmap(gpfn, extent_order,
-                                                   address_bits);
-       }
-       return ret;
-}
-
-static inline unsigned long
 __HYPERVISOR_zap_physmap(unsigned long gpfn, unsigned int extent_order)
 {
        return _hypercall_imm2(unsigned long, ia64_dom0vp_op,
@@ -466,6 +487,7 @@ __HYPERVISOR_add_physmap(unsigned long g
                               IA64_DOM0VP_add_physmap, gpfn, mfn, flags,
                               domid);
 }
+
 static inline unsigned long
 HYPERVISOR_add_physmap(unsigned long gpfn, unsigned long mfn,
                       unsigned int flags, domid_t domid)
@@ -477,13 +499,15 @@ HYPERVISOR_add_physmap(unsigned long gpf
        }
        return ret;
 }
+
+// for balloon driver
+#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+
 #else
-#define HYPERVISOR_ioremap(ioaddr, size)               ({ioaddr;})
-#define HYPERVISOR_phystomach(gpfn)                    ({gpfn;})
-#define HYPERVISOR_machtophys(mfn)                     ({mfn;})
-#define HYPERVISOR_populate_physmap(gpfn, extent_order, address_bits) \
-                                                       ({0;})
-#define HYPERVISOR_zap_physmap(gpfn, extent_order)     ({0;})
-#define HYPERVISOR_add_physmap(gpfn, mfn, flags)       ({0;})
+#define HYPERVISOR_ioremap(ioaddr, size)               (ioaddr)
+#define HYPERVISOR_phystomach(gpfn)                    (gpfn)
+#define HYPERVISOR_machtophys(mfn)                     (mfn)
+#define HYPERVISOR_zap_physmap(gpfn, extent_order)     (0)
+#define HYPERVISOR_add_physmap(gpfn, mfn, flags)       (0)
 #endif
 #endif /* __HYPERCALL_H__ */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Tue May 30 
14:30:34 2006 -0500
@@ -53,7 +53,7 @@ extern start_info_t *xen_start_info;
 
 void force_evtchn_callback(void);
 
-int xen_init(void);
+#define is_running_on_xen() running_on_xen
 
 /* Turn jiffies into Xen system time. XXX Implement me. */
 #define jiffies_to_st(j)       0
@@ -118,11 +118,22 @@ HYPERVISOR_poll(
 }
 
 // for drivers/xen/privcmd/privcmd.c
-#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
 #define machine_to_phys_mapping 0
 #ifndef CONFIG_XEN_IA64_DOM0_VP
+#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
 #define        pfn_to_mfn(x)   (x)
 #define        mfn_to_pfn(x)   (x)
+#else
+struct vm_area_struct;
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+                          unsigned long address,
+                          unsigned long mfn,
+                          unsigned long size,
+                          pgprot_t prot,
+                          domid_t  domid);
+struct file;
+int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
+#define HAVE_ARCH_PRIVCMD_MMAP
 #endif
 
 // for drivers/xen/balloon/balloon.c
@@ -147,7 +158,7 @@ xen_create_contiguous_region(unsigned lo
                              unsigned int order, unsigned int address_bits)
 {
        int ret = 0;
-       if (running_on_xen) {
+       if (is_running_on_xen()) {
                ret = __xen_create_contiguous_region(vstart, order,
                                                     address_bits);
        }
@@ -158,11 +169,24 @@ static inline void
 static inline void
 xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 {
-       if (running_on_xen)
+       if (is_running_on_xen())
                __xen_destroy_contiguous_region(vstart, order);
 }
+
+// for netfront.c, netback.c
+#define MULTI_UVMFLAGS_INDEX 0 //XXX any value
+
+static inline void
+MULTI_update_va_mapping(
+       multicall_entry_t *mcl, unsigned long va,
+       pte_t new_val, unsigned long flags)
+{
+       mcl->op = __HYPERVISOR_update_va_mapping;
+       mcl->result = 0;
+}
+
 #else
-#define xen_create_contiguous_region(vstart, order, address_bits)      ({0;})
+#define xen_create_contiguous_region(vstart, order, address_bits)      (0)
 #define xen_destroy_contiguous_region(vstart, order)   do {} while (0)
 #endif
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/page.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h      Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h      Tue May 30 14:30:34 
2006 -0500
@@ -234,6 +234,43 @@ get_order (unsigned long size)
 
 #include <linux/kernel.h>
 #include <asm/hypervisor.h>
+#include <xen/features.h>      // to compile netback, netfront
+typedef unsigned long maddr_t; // to compile netback, netfront
+
+// XXX hack!
+//     Linux/IA64 uses PG_arch_1.
+//     This hack will be removed once PG_foreign bit is taken.
+//#include <xen/foreign_page.h>
+#ifdef __ASM_XEN_FOREIGN_PAGE_H__
+# error "don't include include/xen/foreign_page.h!"
+#endif
+
+extern struct address_space xen_ia64_foreign_dummy_mapping;
+#define PageForeign(page)      \
+       ((page)->mapping == &xen_ia64_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do {                                \
+       set_page_private((page), (unsigned long)(dtor));        \
+       (page)->mapping = &xen_ia64_foreign_dummy_mapping;      \
+       smp_rmb();                                              \
+} while (0)
+
+#define ClearPageForeign(page) do {    \
+       (page)->mapping = NULL;         \
+       smp_rmb();                      \
+       set_page_private((page), 0);    \
+} while (0)
+
+#define PageForeignDestructor(page)    \
+       ( (void (*) (struct page *)) page_private(page) )
+
+#define arch_free_page(_page,_order)                   \
+({      int foreign = PageForeign(_page);               \
+       if (foreign)                                    \
+               (PageForeignDestructor(_page))(_page);  \
+       foreign;                                        \
+})
+#define HAVE_ARCH_FREE_PAGE
 
 //XXX xen page size != page size
 
@@ -279,11 +316,14 @@ machine_to_phys_for_dma(unsigned long ma
 #define set_phys_to_machine(pfn, mfn) do { } while (0)
 #define xen_machphys_update(mfn, pfn) do { } while (0)
 
-#define mfn_to_pfn(mfn)                        ({(mfn);})
-#define mfn_to_virt(mfn)               ({__va((mfn) << PAGE_SHIFT);})
-#define pfn_to_mfn(pfn)                        ({(pfn);})
-#define virt_to_mfn(virt)              ({__pa(virt) >> PAGE_SHIFT;})
-#define virt_to_machine(virt)          ({__pa(virt);}) // for tpmfront.c
+//XXX to compile set_phys_to_machine(vaddr, FOREIGN_FRAME(m))
+#define FOREIGN_FRAME(m)        (INVALID_P2M_ENTRY)
+
+#define mfn_to_pfn(mfn)                        (mfn)
+#define mfn_to_virt(mfn)               (__va((mfn) << PAGE_SHIFT))
+#define pfn_to_mfn(pfn)                        (pfn)
+#define virt_to_mfn(virt)              (__pa(virt) >> PAGE_SHIFT)
+#define virt_to_machine(virt)          __pa(virt) // for tpmfront.c
 
 #endif /* CONFIG_XEN_IA64_DOM0_VP */
 #endif /* CONFIG_XEN */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/privop.h    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/privop.h    Tue May 30 14:30:34 
2006 -0500
@@ -44,13 +44,14 @@
 #define ia64_ptcl                      __ia64_ptcl
 #define ia64_ptri                      __ia64_ptri
 #define ia64_ptrd                      __ia64_ptrd
-#define        ia64_get_psr_i                  __ia64_get_psr_i
+#define ia64_get_psr_i                 __ia64_get_psr_i
 #define ia64_intrin_local_irq_restore  __ia64_intrin_local_irq_restore
 #define ia64_pal_halt_light            __ia64_pal_halt_light
-#define        ia64_leave_kernel               __ia64_leave_kernel
-#define        ia64_leave_syscall              __ia64_leave_syscall
-#define        ia64_switch_to                  __ia64_switch_to
-#define        ia64_pal_call_static            __ia64_pal_call_static
+#define ia64_leave_kernel              __ia64_leave_kernel
+#define ia64_leave_syscall             __ia64_leave_syscall
+#define ia64_trace_syscall             __ia64_trace_syscall
+#define ia64_switch_to                 __ia64_switch_to
+#define ia64_pal_call_static           __ia64_pal_call_static
 
 #endif /* !IA64_PARAVIRTUALIZED */
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Tue May 30 
14:30:34 2006 -0500
@@ -11,45 +11,41 @@
 
 
 #include <asm/xen/asm-xsi-offsets.h>
+#include <xen/interface/arch-ia64.h>
 
 #define IA64_PARAVIRTUALIZED
 
 #ifdef __ASSEMBLY__
-#define        XEN_HYPER_RFI                   break 0x1
-#define        XEN_HYPER_RSM_PSR_DT            break 0x2
-#define        XEN_HYPER_SSM_PSR_DT            break 0x3
-#define        XEN_HYPER_COVER                 break 0x4
-#define        XEN_HYPER_ITC_D                 break 0x5
-#define        XEN_HYPER_ITC_I                 break 0x6
-#define        XEN_HYPER_SSM_I                 break 0x7
-#define        XEN_HYPER_GET_IVR               break 0x8
-#define        XEN_HYPER_GET_TPR               break 0x9
-#define        XEN_HYPER_SET_TPR               break 0xa
-#define        XEN_HYPER_EOI                   break 0xb
-#define        XEN_HYPER_SET_ITM               break 0xc
-#define        XEN_HYPER_THASH                 break 0xd
-#define        XEN_HYPER_PTC_GA                break 0xe
-#define        XEN_HYPER_ITR_D                 break 0xf
-#define        XEN_HYPER_GET_RR                break 0x10
-#define        XEN_HYPER_SET_RR                break 0x11
-#define        XEN_HYPER_SET_KR                break 0x12
-#define        XEN_HYPER_FC                    break 0x13
-#define        XEN_HYPER_GET_CPUID             break 0x14
-#define        XEN_HYPER_GET_PMD               break 0x15
-#define        XEN_HYPER_GET_EFLAG             break 0x16
-#define        XEN_HYPER_SET_EFLAG             break 0x17
+#define        XEN_HYPER_RFI                   break HYPERPRIVOP_RFI
+#define        XEN_HYPER_RSM_PSR_DT            break HYPERPRIVOP_RSM_DT
+#define        XEN_HYPER_SSM_PSR_DT            break HYPERPRIVOP_SSM_DT
+#define        XEN_HYPER_COVER                 break HYPERPRIVOP_COVER
+#define        XEN_HYPER_ITC_D                 break HYPERPRIVOP_ITC_D
+#define        XEN_HYPER_ITC_I                 break HYPERPRIVOP_ITC_I
+#define        XEN_HYPER_SSM_I                 break HYPERPRIVOP_SSM_I
+#define        XEN_HYPER_GET_IVR               break HYPERPRIVOP_GET_IVR
+#define        XEN_HYPER_GET_TPR               break HYPERPRIVOP_GET_TPR
+#define        XEN_HYPER_SET_TPR               break HYPERPRIVOP_SET_TPR
+#define        XEN_HYPER_EOI                   break HYPERPRIVOP_EOI
+#define        XEN_HYPER_SET_ITM               break HYPERPRIVOP_SET_ITM
+#define        XEN_HYPER_THASH                 break HYPERPRIVOP_THASH
+#define        XEN_HYPER_PTC_GA                break HYPERPRIVOP_PTC_GA
+#define        XEN_HYPER_ITR_D                 break HYPERPRIVOP_ITR_D
+#define        XEN_HYPER_GET_RR                break HYPERPRIVOP_GET_RR
+#define        XEN_HYPER_SET_RR                break HYPERPRIVOP_SET_RR
+#define        XEN_HYPER_SET_KR                break HYPERPRIVOP_SET_KR
+#define        XEN_HYPER_FC                    break HYPERPRIVOP_FC
+#define        XEN_HYPER_GET_CPUID             break HYPERPRIVOP_GET_CPUID
+#define        XEN_HYPER_GET_PMD               break HYPERPRIVOP_GET_PMD
+#define        XEN_HYPER_GET_EFLAG             break HYPERPRIVOP_GET_EFLAG
+#define        XEN_HYPER_SET_EFLAG             break HYPERPRIVOP_SET_EFLAG
 #endif
 
 #ifndef __ASSEMBLY__
-#ifdef MODULE
-extern int is_running_on_xen(void);
-#define running_on_xen (is_running_on_xen())
-#else
 extern int running_on_xen;
-#endif
-
-#define        XEN_HYPER_SSM_I                 asm("break 0x7");
-#define        XEN_HYPER_GET_IVR               asm("break 0x8");
+
+#define        XEN_HYPER_SSM_I         asm("break %0" : : "i" 
(HYPERPRIVOP_SSM_I))
+#define        XEN_HYPER_GET_IVR       asm("break %0" : : "i" 
(HYPERPRIVOP_GET_IVR))
 
 /************************************************/
 /* Instructions paravirtualized for correctness */
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 14:30:34 2006 -0500
@@ -75,7 +75,7 @@ struct xenbus_device {
        int otherend_id;
        struct xenbus_watch otherend_watch;
        struct device dev;
-       XenbusState state;
+       enum xenbus_state state;
        void *data;
 };
 
@@ -98,7 +98,7 @@ struct xenbus_driver {
        int (*probe)(struct xenbus_device *dev,
                     const struct xenbus_device_id *id);
        void (*otherend_changed)(struct xenbus_device *dev,
-                                XenbusState backend_state);
+                                enum xenbus_state backend_state);
        int (*remove)(struct xenbus_device *dev);
        int (*suspend)(struct xenbus_device *dev);
        int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
  * Return 0 on success, or -errno on error.  On error, the device will switch
  * to XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state 
new_state);
 
 
 /**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
  * Return the state of the driver rooted at the given store path, or
  * XenbusStateClosed if no state can be read.
  */
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
 
 
 /***
diff -r e74246451527 -r f54d38cea8ac tools/examples/network-bridge
--- a/tools/examples/network-bridge     Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/network-bridge     Tue May 30 14:30:34 2006 -0500
@@ -59,7 +59,7 @@ findCommand "$@"
 findCommand "$@"
 evalVariables "$@"
 
-vifnum=${vifnum:-$(ip route list | awk '/^default / { 
sub(/^(eth|xenbr)/,"",$NF); print $NF }')}
+vifnum=${vifnum:-$(ip route list | awk '/^default / { print $NF }' | sed 
's/^[^0-9]*//')}
 bridge=${bridge:-xenbr${vifnum}}
 netdev=${netdev:-eth${vifnum}}
 antispoof=${antispoof:-no}
diff -r e74246451527 -r f54d38cea8ac tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti      Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/xmexample.vti      Tue May 30 14:30:34 2006 -0500
@@ -36,7 +36,7 @@ name = "ExampleVTIDomain"
 # Random MACs are assigned if not given.
 #vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ]
 # type=ioemu specify the NIC is an ioemu device not netfront
-#vif = [ 'type=ioemu, bridge=xenbr0' ]
+vif = [ 'type=ioemu, bridge=xenbr0' ]
 # for multiple NICs in device model, 3 in this example
 #vif = [ 'type=ioemu, bridge=xenbr0', 'type=ioemu', 'type=ioemu']
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/Makefile
--- a/tools/libxc/Makefile      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/Makefile      Tue May 30 14:30:34 2006 -0500
@@ -20,6 +20,7 @@ SRCS       += xc_physdev.c
 SRCS       += xc_physdev.c
 SRCS       += xc_private.c
 SRCS       += xc_sedf.c
+SRCS       += xc_csched.c
 SRCS       += xc_tbuf.c
 
 ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_acm.c
--- a/tools/libxc/xc_acm.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_acm.c      Tue May 30 14:30:34 2006 -0500
@@ -1,13 +1,10 @@
 /******************************************************************************
+ * xc_acm.c
  *
- * Copyright (C) 2005 IBM Corporation
+ * Copyright (C) 2005, 2006 IBM Corporation, R Sailer
  *
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- *
- * Authors:
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Stefan Berger <stefanb@xxxxxxxxxxxxxx>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -17,29 +14,23 @@
 
 #include "xc_private.h"
 
-int xc_acm_op(int xc_handle, struct acm_op *op)
+
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size)
 {
     int ret = -1;
     DECLARE_HYPERCALL;
 
-    op->interface_version = ACM_INTERFACE_VERSION;
+    hypercall.op = __HYPERVISOR_acm_op;
+    hypercall.arg[0] = cmd;
+    hypercall.arg[1] = (unsigned long) arg;
 
-    hypercall.op = __HYPERVISOR_acm_op;
-    hypercall.arg[0] = (unsigned long) op;
-
-    if (mlock(op, sizeof(*op)) != 0) {
-        PERROR("Could not lock memory for Xen policy hypercall");
-        goto out1;
+    if (mlock(arg, arg_size) != 0) {
+        PERROR("xc_acm_op: arg mlock failed");
+        goto out;
     }
-
     ret = do_xen_hypercall(xc_handle, &hypercall);
-    ret = ioctl(xc_handle, IOCTL_PRIVCMD_HYPERCALL, &hypercall);
-    if (ret < 0) {
-        goto out2;
-    }
- out2:
-    safe_munlock(op, sizeof(*op));
- out1:
+    safe_munlock(arg, arg_size);
+ out:
     return ret;
 }
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ia64_stubs.c       Tue May 30 14:30:34 2006 -0500
@@ -50,7 +50,7 @@ xc_plan9_build(int xc_handle,
 }
 /*  
     VMM uses put_user to copy pfn_list to guest buffer, this maybe fail,
-    VMM don't handle this now.
+    VMM doesn't handle this now.
     This method will touch guest buffer to make sure the buffer's mapping
     is tracked by VMM,
  */
@@ -66,6 +66,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
     unsigned int __start_page, __nr_pages;
     unsigned long max_pfns;
     unsigned long *__pfn_buf;
+
     __start_page = start_page;
     __nr_pages = nr_pages;
     __pfn_buf = pfn_buf;
@@ -75,6 +76,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         op.cmd = DOM0_GETMEMLIST;
         op.u.getmemlist.domain   = (domid_t)domid;
         op.u.getmemlist.max_pfns = max_pfns;
+        op.u.getmemlist.num_pfns = 0;
         set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
 
         if ( (max_pfns != -1UL)
@@ -723,7 +725,7 @@ int xc_hvm_build(int xc_handle,
 
     ctxt->flags = VGCF_VMX_GUEST;
     ctxt->regs.cr_iip = 0x80000000ffffffb0UL;
-    ctxt->vcpu.privregs = 0;
+    ctxt->privregs = 0;
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_build.c      Tue May 30 14:30:34 2006 -0500
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count, nmfn;
+    unsigned long ppt_alloc, count;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
@@ -327,6 +316,13 @@ static int setup_pg_tables_pae(int xc_ha
                 *vl1e &= ~_PAGE_RW;
         }
         vl1e++;
+    }
+
+    /* Xen requires a mid-level pgdir mapping 0xC0000000 region. */
+    if ( (vl3tab[3] & _PAGE_PRESENT) == 0 )
+    {
+        alloc_pt(l2tab, vl2tab, pl2tab);
+        vl3tab[3] = l2tab | L3_PROT;
     }
 
     munmap(vl1tab, PAGE_SIZE);
@@ -727,25 +723,28 @@ static int setup_guest(int xc_handle,
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
-        if ( dsi.pae_kernel )
-        {
-            /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
-            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
-                   L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
-                break;
-        }
-        else
-        {
-            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
-                   L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
-                break;
-        }
-#endif
-#if defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
+#if defined(__i386__)
+        if ( dsi.pae_kernel )
+        {
+            if ( (1 + /* # L3 */
+                  NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
+                  NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT_PAE) + /* # L1 */
+                  /* Include a fourth mid-level page directory for Xen. */
+                  (v_end <= (3 << L3_PAGETABLE_SHIFT_PAE)))
+                  <= nr_pt_pages )
+                break;
+        }
+        else
+        {
+            if ( (1 + /* # L2 */
+                  NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+                 <= nr_pt_pages )
+                break;
+        }
+#elif defined(__x86_64__)
         if ( (1 + /* # L4 */
               NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
               NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
@@ -1116,7 +1115,7 @@ static int xc_linux_build_internal(int x
     ctxt->regs.ar_fpsr = xc_ia64_fpsr_default();
     /* currently done by hypervisor, should move here */
     /* ctxt->regs.r28 = dom_fw_setup(); */
-    ctxt->vcpu.privregs = 0;
+    ctxt->privregs = 0;
     ctxt->sys_pgnr = 3;
     i = 0; /* silence unused variable warning */
 #else /* x86 */
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_restore.c    Tue May 30 14:30:34 2006 -0500
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
-                **
-                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-                ** so we may need to update the p2m after the main loop.
-                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(pt_levels != 3 || pagetype != L1TAB) {
-
-                    if(!uncanonicalize_pagetable(pagetype, page)) {
-                        /*
-                        ** Failing to uncanonicalize a page table can be ok
-                        ** under live migration since the pages type may have
-                        ** changed by now (and we'll get an update later).
-                        */
-                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                                pagetype >> 28, pfn, mfn);
-                        nraces++;
-                        continue;
-                    }
-
+                if(!uncanonicalize_pagetable(pagetype, page)) {
+                    /*
+                    ** Failing to uncanonicalize a page table can be ok
+                    ** under live migration since the pages type may have
+                    ** changed by now (and we'll get an update later).
+                    */
+                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                            pagetype >> 28, pfn, mfn);
+                    nraces++;
+                    continue;
                 }
 
             } else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if(pt_levels == 3) {
-
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for (i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
-                    ERR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                p2m[i] = new_mfn;
-                if (xc_add_mmu_update(xc_handle, mmu,
-                                      (((unsigned long long)new_mfn)
-                                       << PAGE_SHIFT) |
-                                      MMU_MACHPHYS_UPDATE, i)) {
-                    ERR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for(i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
-                region_mfn[j] = p2m[i];
-                j++;
-            }
-
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
-                if (!(region_base = xc_map_foreign_batch(
-                          xc_handle, dom, PROT_READ | PROT_WRITE,
-                          region_mfn, j))) {
-                    ERR("map batch failed");
-                    goto out;
-                }
-
-                for(k = 0; k < j; k++) {
-                    if(!uncanonicalize_pagetable(L1TAB,
-                                                 region_base + k*PAGE_SIZE)) {
-                        ERR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_load_elf.c Tue May 30 14:30:34 2006 -0500
@@ -59,10 +59,10 @@ static int parseelfimage(const char *ima
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
     const char *shstrtab;
     char *guestinfo=NULL, *p;
-    int h;
+    int h, virt_base_defined, elf_pa_off_defined;
 
     if ( !IS_ELF(*ehdr) )
     {
@@ -164,34 +164,40 @@ static int parseelfimage(const char *ima
 
     dsi->xen_guest_string = guestinfo;
 
-    if ( (p = strstr(guestinfo, "VIRT_BASE=")) == NULL )
-    {
-        ERROR("Malformed ELF image. No VIRT_BASE specified");
-        return -EINVAL;
-    }
-
-    virt_base = strtoul(p+10, &p, 0);
-
-    dsi->elf_paddr_offset = virt_base;
-    if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
-        dsi->elf_paddr_offset = strtoul(p+17, &p, 0);
+    /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+    p = strstr(guestinfo, "VIRT_BASE=");
+    virt_base_defined = (p != NULL);
+    virt_base = virt_base_defined ? strtoul(p+10, &p, 0) : 0;
+
+    /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+    p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+    elf_pa_off_defined = (p != NULL);
+    elf_pa_off = elf_pa_off_defined ? strtoul(p+17, &p, 0) : virt_base;
+
+    if ( elf_pa_off_defined && !virt_base_defined )
+        goto bad_image;
 
     for ( h = 0; h < ehdr->e_phnum; h++ )
     {
         phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+        vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+        if ( (vaddr + phdr->p_memsz) < vaddr )
+            goto bad_image;
         if ( vaddr < kernstart )
             kernstart = vaddr;
         if ( (vaddr + phdr->p_memsz) > kernend )
             kernend = vaddr + phdr->p_memsz;
     }
 
-    if ( virt_base )
-        dsi->v_start = virt_base;
-    else
-        dsi->v_start = kernstart;
+    /*
+     * Legacy compatibility and images with no __xen_guest section: assume
+     * header addresses are virtual addresses, and that guest memory should be
+     * mapped starting at kernel load address.
+     */
+    dsi->v_start          = virt_base_defined  ? virt_base  : kernstart;
+    dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
 
     dsi->v_kernentry = ehdr->e_entry;
     if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -199,11 +205,9 @@ static int parseelfimage(const char *ima
 
     if ( (kernstart > kernend) ||
          (dsi->v_kernentry < kernstart) ||
-         (dsi->v_kernentry > kernend) )
-    {
-        ERROR("Malformed ELF image.");
-        return -EINVAL;
-    }
+         (dsi->v_kernentry > kernend) ||
+         (dsi->v_start > kernstart) )
+        goto bad_image;
 
     if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
         dsi->load_symtab = 1;
@@ -215,6 +219,10 @@ static int parseelfimage(const char *ima
     loadelfsymtab(image, 0, 0, NULL, dsi);
 
     return 0;
+
+ bad_image:
+    ERROR("Malformed ELF image.");
+    return -EINVAL;
 }
 
 static int
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_private.c  Tue May 30 14:30:34 2006 -0500
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
     return rc;
 }
 
-unsigned long xc_make_page_below_4G(
-    int xc_handle, uint32_t domid, unsigned long mfn)
-{
-    unsigned long new_mfn;
-
-    if ( xc_domain_memory_decrease_reservation(
-        xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    if ( xc_domain_memory_increase_reservation(
-        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    return new_mfn;
-}
-
 /*
  * Local variables:
  * mode: C
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.c   Tue May 30 14:30:34 2006 -0500
@@ -185,61 +185,36 @@ map_domain_va_32(
     void *guest_va,
     int perm)
 {
-    unsigned long pde, page;
-    unsigned long va = (unsigned long)guest_va;
-
-    static unsigned long  cr3_phys[MAX_VIRT_CPUS];
-    static uint32_t *cr3_virt[MAX_VIRT_CPUS];
-    static unsigned long  pde_phys[MAX_VIRT_CPUS];
-    static uint32_t *pde_virt[MAX_VIRT_CPUS];
-    static unsigned long  page_phys[MAX_VIRT_CPUS];
-    static uint32_t *page_virt[MAX_VIRT_CPUS];
-    static int            prev_perm[MAX_VIRT_CPUS];
-
-   if (ctxt[cpu].ctrlreg[3] == 0)
-       return NULL;
-   if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] )
-    {
-        cr3_phys[cpu] = ctxt[cpu].ctrlreg[3];
-        if ( cr3_virt[cpu] )
-            munmap(cr3_virt[cpu], PAGE_SIZE);
-        cr3_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, PROT_READ,
-            cr3_phys[cpu] >> PAGE_SHIFT);
-        if ( cr3_virt[cpu] == NULL )
-            return NULL;
-    }
-    pde = to_ma(cpu, cr3_virt[cpu][vtopdi(va)]);
-    if ( pde != pde_phys[cpu] )
-    {
-        pde_phys[cpu] = pde;
-        if ( pde_virt[cpu] )
-            munmap(pde_virt[cpu], PAGE_SIZE);
-        pde_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, PROT_READ,
-            pde_phys[cpu] >> PAGE_SHIFT);
-        if ( pde_virt[cpu] == NULL )
-            return NULL;
-    }
-    page = to_ma(cpu, pde_virt[cpu][vtopti(va)]);
-
-    if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
-    {
-        page_phys[cpu] = page;
-        if ( page_virt[cpu] )
-            munmap(page_virt[cpu], PAGE_SIZE);
-        page_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, perm,
-            page_phys[cpu] >> PAGE_SHIFT);
-        if ( page_virt[cpu] == NULL )
-        {
-            page_phys[cpu] = 0;
-            return NULL;
-        }
-        prev_perm[cpu] = perm;
-    }
-
-    return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
+    unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
+    uint32_t *l2, *l1;
+    static void *v[MAX_VIRT_CPUS];
+
+    l2 = xc_map_foreign_range(
+         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+    if ( l2 == NULL )
+        return NULL;
+
+    l2e = l2[l2_table_offset_i386(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l1p >> PAGE_SHIFT);
+    if ( l1 == NULL )
+        return NULL;
+
+    l1e = l1[l1_table_offset_i386(va)];
+    munmap(l1, PAGE_SIZE);
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 
 
@@ -250,36 +225,45 @@ map_domain_va_pae(
     void *guest_va,
     int perm)
 {
-    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
     uint64_t *l3, *l2, *l1;
-    static void *v;
+    static void *v[MAX_VIRT_CPUS];
 
     l3 = xc_map_foreign_range(
         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+    l3e = l3[l3_table_offset_pae(va)];
+    munmap(l3, PAGE_SIZE);
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
-    munmap(l3, PAGE_SIZE);
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+    l2e = l2[l2_table_offset_pae(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p 
>> PAGE_SHIFT);
-    munmap(l2, PAGE_SIZE);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
-    if ( v != NULL )
-        munmap(v, PAGE_SIZE);
-    v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> 
PAGE_SHIFT);
+    l1e = l1[l1_table_offset_pae(va)];
     munmap(l1, PAGE_SIZE);
-    if ( v == NULL )
-        return NULL;
-
-    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 
 #ifdef __x86_64__
@@ -290,9 +274,10 @@ map_domain_va_64(
     void *guest_va,
     int perm)
 {
-    unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+    unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned 
long)guest_va;
     uint64_t *l4, *l3, *l2, *l1;
-    static void *v;
+    static void *v[MAX_VIRT_CPUS];
+
 
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -302,41 +287,51 @@ map_domain_va_64(
     if ( l4 == NULL )
         return NULL;
 
-    l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+    l4e = l4[l4_table_offset(va)];
+    munmap(l4, PAGE_SIZE);
+    if ( !(l4e & _PAGE_PRESENT) )
+        return NULL;
+    l3p = to_ma(cpu, l4e);
     l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l3p >> PAGE_SHIFT);
-    munmap(l4, PAGE_SIZE);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+    l3e = l3[l3_table_offset(va)];
+    munmap(l3, PAGE_SIZE);
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
-    munmap(l3, PAGE_SIZE);
     if ( l2 == NULL )
         return NULL;
 
     l1 = NULL;
-    l1e = to_ma(cpu, l2[l2_table_offset(va)]);
-    l1p = l1e >> PAGE_SHIFT;
-    if (l1e & 0x80)  { /* 2M pages */
+    l2e = l2[l2_table_offset(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    if (l2e & 0x80)  { /* 2M pages */
         p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
     } else { /* 4K pages */
-        //l1p = to_ma(cpu, l1e[l1_table_offset(va)]);
         l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, 
l1p >> PAGE_SHIFT);
-        munmap(l2, PAGE_SIZE);
         if ( l1 == NULL )
             return NULL;
 
-        p = to_ma(cpu, l1[l1_table_offset(va)]);
-    }
-    if ( v != NULL )
-        munmap(v, PAGE_SIZE);
-    v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> 
PAGE_SHIFT);
+        l1e = l1[l1_table_offset(va)];
+        if ( !(l1e & _PAGE_PRESENT) )
+            return NULL;
+        p = to_ma(cpu, l1e);
+    }
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
     if (l1)
         munmap(l1, PAGE_SIZE);
-    if ( v == NULL )
-        return NULL;
-
-    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 #endif
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h   Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.h   Tue May 30 14:30:34 2006 -0500
@@ -7,7 +7,6 @@
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 #define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT        22
 #define PSL_T  0x00000100 /* trace enable bit */
 
 #ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
 #endif
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 #endif
 
 typedef void (*thr_ev_handler_t)(long);
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace_core.c      Tue May 30 14:30:34 2006 -0500
@@ -3,6 +3,7 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include "xc_ptrace.h"
 #include <time.h>
 
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, 
         }
         cr3_virt[cpu] = v;
     }
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+    if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, 
             return NULL;
         pde_virt[cpu] = v;
     }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+    if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, 
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED)
         {
-            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
l1_table_offset_i386(va));
             page_phys[cpu] = 0;
             return NULL;
         }
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c     Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_tbuf.c     Tue May 30 14:30:34 2006 -0500
@@ -18,53 +18,57 @@
 
 static int tbuf_enable(int xc_handle, int enable)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  if (enable)
-    op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
-  else
-    op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    if (enable)
+        op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
+    else
+        op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_set_size(int xc_handle, unsigned long size)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
-  op.u.tbufcontrol.size = size;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
+    op.u.tbufcontrol.size = size;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_get_size(int xc_handle, unsigned long *size)
 {
-  int rc;
-  DECLARE_DOM0_OP;
+    int rc;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
 
-  rc = xc_dom0_op(xc_handle, &op);
-  if (rc == 0)
-    *size = op.u.tbufcontrol.size;
-  return rc;
+    rc = xc_dom0_op(xc_handle, &op);
+    if (rc == 0)
+        *size = op.u.tbufcontrol.size;
+    return rc;
 }
 
 int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
-    unsigned long *size)
+                   unsigned long *size)
 {
     DECLARE_DOM0_OP;
     int rc;
 
-    if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
-        return -1;
+    /*
+     * Ignore errors (at least for now) as we get an error if size is already
+     * set (since trace buffers cannot be reallocated). If we really have no
+     * buffers at all then tbuf_enable() will fail, so this is safe.
+     */
+    (void)xc_tbuf_set_size(xc_handle, cnt);
 
     if ( tbuf_enable(xc_handle, 1) != 0 )
         return -1;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xenctrl.h     Tue May 30 14:30:34 2006 -0500
@@ -359,6 +359,14 @@ int xc_sedf_domain_get(int xc_handle,
                        uint64_t *latency, uint16_t *extratime,
                        uint16_t *weight);
 
+int xc_sched_credit_domain_set(int xc_handle,
+                               uint32_t domid,
+                               struct sched_credit_adjdom *sdom);
+
+int xc_sched_credit_domain_get(int xc_handle,
+                               uint32_t domid,
+                               struct sched_credit_adjdom *sdom);
+
 typedef evtchn_status_t xc_evtchn_status_t;
 
 /*
@@ -449,9 +457,6 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long first_mfn,
                                unsigned long nr_mfns,
                                uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
-                                    unsigned long mfn);
 
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
@@ -599,6 +604,6 @@ int xc_add_mmu_update(int xc_handle, xc_
                    unsigned long long ptr, unsigned long long val);
 int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
 
-int xc_acm_op(int xc_handle, struct acm_op *op);
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
 
 #endif
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xg_private.h  Tue May 30 14:30:34 2006 -0500
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
 #define L2_PAGETABLE_SHIFT_PAE   21
 #define L3_PAGETABLE_SHIFT_PAE   30
 
+#define L2_PAGETABLE_SHIFT_I386  22
+
 #if defined(__i386__)
 #define L1_PAGETABLE_SHIFT       12
 #define L2_PAGETABLE_SHIFT       22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
 #define L1_PAGETABLE_ENTRIES_PAE  512
 #define L2_PAGETABLE_ENTRIES_PAE  512
 #define L3_PAGETABLE_ENTRIES_PAE    4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
 
 #if defined(__i386__)
 #define L1_PAGETABLE_ENTRIES   1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
 #define l3_table_offset_pae(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
 
+#define l1_table_offset_i386(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
 #if defined(__i386__)
 #define l1_table_offset(_a) \
           (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/acm/acm.c
--- a/tools/python/xen/lowlevel/acm/acm.c       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/acm/acm.c       Tue May 30 14:30:34 2006 -0500
@@ -38,7 +38,7 @@ fprintf(stderr, "ERROR: " _m " (%d = %s)
 /* generic shared function */
 void * __getssid(int domid, uint32_t *buflen)
 {
-    struct acm_op op;
+    struct acm_getssid getssid;
     int xc_handle;
     #define SSID_BUFFER_SIZE    4096
     void *buf = NULL;
@@ -51,14 +51,13 @@ void * __getssid(int domid, uint32_t *bu
         goto out2;
     }
     memset(buf, 0, SSID_BUFFER_SIZE);
-    op.cmd = ACM_GETSSID;
-    op.interface_version = ACM_INTERFACE_VERSION;
-    op.u.getssid.ssidbuf = buf;
-    op.u.getssid.ssidbuf_size = SSID_BUFFER_SIZE;
-    op.u.getssid.get_ssid_by = DOMAINID;
-    op.u.getssid.id.domainid = domid;
-
-    if (xc_acm_op(xc_handle, &op) < 0) {
+    getssid.interface_version = ACM_INTERFACE_VERSION;
+    getssid.ssidbuf = buf;
+    getssid.ssidbuf_size = SSID_BUFFER_SIZE;
+    getssid.get_ssid_by = DOMAINID;
+    getssid.id.domainid = domid;
+
+    if (xc_acm_op(xc_handle, ACMOP_getssid, &getssid, sizeof(getssid)) < 0) {
         if (errno == EACCES)
             PERROR("ACM operation failed.");
         free(buf);
@@ -147,7 +146,7 @@ static PyObject *getdecision(PyObject * 
 static PyObject *getdecision(PyObject * self, PyObject * args)
 {
     char *arg1_name, *arg1, *arg2_name, *arg2, *decision = NULL;
-    struct acm_op op;
+    struct acm_getdecision getdecision;
     int xc_handle;
 
     if (!PyArg_ParseTuple(args, "ssss", &arg1_name, &arg1, &arg2_name, &arg2)) 
{
@@ -163,34 +162,33 @@ static PyObject *getdecision(PyObject * 
     (strcmp(arg2_name, "domid") && strcmp(arg2_name, "ssidref")))
         return NULL;
 
-    op.cmd = ACM_GETDECISION;
-    op.interface_version = ACM_INTERFACE_VERSION;
-    op.u.getdecision.hook = SHARING;
+    getdecision.interface_version = ACM_INTERFACE_VERSION;
+    getdecision.hook = SHARING;
     if (!strcmp(arg1_name, "domid")) {
-        op.u.getdecision.get_decision_by1 = DOMAINID;
-        op.u.getdecision.id1.domainid = atoi(arg1);
-    } else {
-        op.u.getdecision.get_decision_by1 = SSIDREF;
-        op.u.getdecision.id1.ssidref = atol(arg1);
+        getdecision.get_decision_by1 = DOMAINID;
+        getdecision.id1.domainid = atoi(arg1);
+    } else {
+        getdecision.get_decision_by1 = SSIDREF;
+        getdecision.id1.ssidref = atol(arg1);
     }
     if (!strcmp(arg2_name, "domid")) {
-        op.u.getdecision.get_decision_by2 = DOMAINID;
-        op.u.getdecision.id2.domainid = atoi(arg2);
-    } else {
-        op.u.getdecision.get_decision_by2 = SSIDREF;
-        op.u.getdecision.id2.ssidref = atol(arg2);
-    }
-
-    if (xc_acm_op(xc_handle, &op) < 0) {
+        getdecision.get_decision_by2 = DOMAINID;
+        getdecision.id2.domainid = atoi(arg2);
+    } else {
+        getdecision.get_decision_by2 = SSIDREF;
+        getdecision.id2.ssidref = atol(arg2);
+    }
+
+    if (xc_acm_op(xc_handle, ACMOP_getdecision, &getdecision, 
sizeof(getdecision)) < 0) {
         if (errno == EACCES)
             PERROR("ACM operation failed.");
     }
 
     xc_interface_close(xc_handle);
 
-    if (op.u.getdecision.acm_decision == ACM_ACCESS_PERMITTED)
+    if (getdecision.acm_decision == ACM_ACCESS_PERMITTED)
         decision = "PERMITTED";
-    else if (op.u.getdecision.acm_decision == ACM_ACCESS_DENIED)
+    else if (getdecision.acm_decision == ACM_ACCESS_DENIED)
         decision = "DENIED";
 
     return Py_BuildValue("s", decision);
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue May 30 14:30:34 2006 -0500
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
                          "weight",    weight);
 }
 
+static PyObject *pyxc_sched_credit_domain_set(XcObject *self,
+                                              PyObject *args,
+                                              PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t weight;
+    uint16_t cap;
+    static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+    static char kwd_type[] = "I|HH";
+    struct sched_credit_adjdom sdom;
+    
+    weight = 0;
+    cap = (uint16_t)~0U;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
+                                     &domid, &weight, &cap) )
+        return NULL;
+
+    sdom.weight = weight;
+    sdom.cap = cap;
+
+    if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct sched_credit_adjdom sdom;
+    
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+    
+    if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:H,s:H}",
+                         "weight",  sdom.weight,
+                         "cap",     sdom.cap);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
       " slice     [long]: CPU reservation per period\n"
       " latency   [long]: domain's wakeup latency hint\n"
       " extratime [int]:  domain aware of extratime?\n"},
+    
+    { "sched_credit_domain_set",
+      (PyCFunction)pyxc_sched_credit_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to set\n"
+      " weight    [short]: domain's scheduling weight\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "sched_credit_domain_get",
+      (PyCFunction)pyxc_sched_credit_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " weight    [short]: domain's scheduling weight\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
@@ -1172,7 +1233,7 @@ PyXc_init(XcObject *self, PyObject *args
 PyXc_init(XcObject *self, PyObject *args, PyObject *kwds)
 {
     if ((self->xc_handle = xc_interface_open()) == -1) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
+        PyErr_SetFromErrno(xc_error);
         return -1;
     }
 
@@ -1245,7 +1306,7 @@ PyMODINIT_FUNC initxc(void)
     if (m == NULL)
       return;
 
-    xc_error = PyErr_NewException(PKG ".error", NULL, NULL);
+    xc_error = PyErr_NewException(PKG ".Error", PyExc_RuntimeError, NULL);
     zero = PyInt_FromLong(0);
 
     /* KAF: This ensures that we get debug output in a timely manner. */
@@ -1254,6 +1315,9 @@ PyMODINIT_FUNC initxc(void)
 
     Py_INCREF(&PyXcType);
     PyModule_AddObject(m, CLS, (PyObject *)&PyXcType);
+
+    Py_INCREF(xc_error);
+    PyModule_AddObject(m, "Error", xc_error);
 }
 
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xs/xs.c Tue May 30 14:30:34 2006 -0500
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
 
     if (perms) {
         PyObject *val = PyList_New(perms_n);
-        for (i = 0; i < perms_n; i++, perms++) {
-            PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
-                                        "dom",  perms->id,
-                                        "read", perms->perms & XS_PERM_READ,
-                                        "write",perms->perms & XS_PERM_WRITE);
+        for (i = 0; i < perms_n; i++) {
+            PyObject *p =
+                Py_BuildValue("{s:i,s:i,s:i}",
+                              "dom",   perms[i].id,
+                              "read",  perms[i].perms & XS_PERM_READ,
+                              "write", perms[i].perms & XS_PERM_WRITE);
             PyList_SetItem(val, i, p);
         }
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomain.py       Tue May 30 14:30:34 2006 -0500
@@ -522,6 +522,28 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_sched_credit_get(self, domid):
+        """Get credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.sched_credit_domain_get(dominfo.getDomid())
+        except Exception, ex:
+            raise XendError(str(ex))
+    
+    def domain_sched_credit_set(self, domid, weight, cap):
+        """Set credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+        except Exception, ex:
+            raise XendError(str(ex))
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue May 30 14:30:34 2006 -0500
@@ -29,6 +29,7 @@ import string
 import string
 import time
 import threading
+import os
 
 import xen.lowlevel.xc
 from xen.util import asserts
@@ -700,6 +701,16 @@ class XendDomainInfo:
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
+        self.setVmPermissions()
+
+
+    def setVmPermissions(self):
+        """Allow the guest domain to read its UUID.  We don't allow it to
+        access any other entry, for security."""
+        xstransact.SetPermissions('%s/uuid' % self.vmpath,
+                                  { 'dom' : self.domid,
+                                    'read' : True,
+                                    'write' : False })
 
 
     def storeDomDetails(self):
@@ -1264,7 +1275,14 @@ class XendDomainInfo:
             m = self.image.getDomainMemory(self.info['memory'] * 1024)
             balloon.free(m)
             xc.domain_setmaxmem(self.domid, m)
-            xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
+
+            init_reservation = self.info['memory'] * 1024
+            if os.uname()[4] == 'ia64':
+                # Workaround until ia64 properly supports ballooning.
+                init_reservation = m
+
+            xc.domain_memory_increase_reservation(self.domid, init_reservation,
+                                                  0, 0)
 
             self.createChannels()
 
@@ -1527,6 +1545,10 @@ class XendDomainInfo:
 
         self.configure_bootloader()
         config = self.sxpr()
+
+        if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+            config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+                                          self.info['cpus'])])
 
         if self.readVm(RESTART_IN_PROGRESS):
             log.error('Xend failed during restart of domain %d.  '
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/balloon.py  Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB.  We need th
 BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
                       # rounded.
 RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
 ##
 # The time to sleep between retries grows linearly, using this value (in
 # seconds).  When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
         last_new_alloc = None
-        while retries < RETRY_LIMIT:
+        rlimit = RETRY_LIMIT
+        while retries < rlimit:
             free_mem = xc.physinfo()['free_memory']
 
             if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
                 return
 
             if retries == 0:
-                log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+                log.debug("Balloon: free %d; need %d; retries: %d.", 
+                          free_mem, need_mem, rlimit)
 
             if dom0_min_mem > 0:
                 dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
                     # Continue to retry, waiting for ballooning.
 
             time.sleep(sleep_time)
+            if retries < 2 * RETRY_LIMIT:
+                sleep_time += SLEEP_TIME_GROWTH
             retries += 1
-            sleep_time += SLEEP_TIME_GROWTH
 
         # Not enough memory; diagnose the problem.
         if dom0_min_mem == 0:
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/image.py    Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,7 @@
 
 import os, string
 import re
+import math
 
 import xen.lowlevel.xc
 from xen.xend import sxp
@@ -141,11 +142,16 @@ class ImageHandler:
                           % (self.ostype, self.vm.getDomid(), str(result)))
 
 
-    def getDomainMemory(self, mem):
+    def getDomainMemory(self, mem_kb):
         """@return The memory required, in KiB, by the domain to store the
-        given amount, also in KiB.  This is normally just mem, but HVM domains
-        have overheads to account for."""
-        return mem
+        given amount, also in KiB."""
+        if os.uname()[4] != 'ia64':
+            # A little extra because auto-ballooning is broken w.r.t. HVM
+            # guests. Also, slack is necessary for live migration since that
+            # uses shadow page tables.
+            if 'hvm' in xc.xeninfo()['xen_caps']:
+                mem_kb += 4*1024;
+        return mem_kb
 
     def buildDomain(self):
         """Build the domain. Define in subclass."""
@@ -377,15 +383,20 @@ class HVMImageHandler(ImageHandler):
         os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem):
+    def getDomainMemory(self, mem_kb):
         """@see ImageHandler.getDomainMemory"""
-        page_kb = 4
-        extra_pages = 0
         if os.uname()[4] == 'ia64':
             page_kb = 16
             # ROM size for guest firmware, ioreq page and xenstore page
             extra_pages = 1024 + 2
-        return mem + extra_pages * page_kb
+        else:
+            page_kb = 4
+            # This was derived emperically:
+            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
+            #   + 4 to avoid low-memory condition
+            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+        return mem_kb + extra_pages * page_kb
 
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/server/SrvDomain.py Tue May 30 14:30:34 2006 -0500
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
                     ['latency', 'int'],
                     ['extratime', 'int'],
                     ['weight', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+    
+    def op_domain_sched_credit_get(self, _, req):
+        fn = FormFn(self.xd.domain_sched_credit_get,
+                    [['dom', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+
+
+    def op_domain_sched_credit_set(self, _, req):
+        fn = FormFn(self.xd.domain_sched_credit_set,
+                    [['dom', 'int'],
+                     ['weight', 'int']])
         val = fn(req.args, {'dom': self.dom.domid})
         return val
 
diff -r e74246451527 -r f54d38cea8ac 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Tue May 30 12:52:02 
2006 -0500
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Tue May 30 14:30:34 
2006 -0500
@@ -221,6 +221,34 @@ class xstransact:
                 xshandle().mkdir(self.transaction, self.prependPath(key))
 
 
+    def get_permissions(self, *args):
+        """If no arguments are given, return the permissions at this
+        transaction's path.  If one argument is given, treat that argument as
+        a subpath to this transaction's path, and return the permissions at
+        that path.  Otherwise, treat each argument as a subpath to this
+        transaction's path, and return a list composed of the permissions at
+        each of those instead.
+        """
+        if len(args) == 0:
+            return xshandle().get_permissions(self.transaction, self.path)
+        if len(args) == 1:
+            return self._get_permissions(args[0])
+        ret = []
+        for key in args:
+            ret.append(self._get_permissions(key))
+        return ret
+
+
+    def _get_permissions(self, key):
+        path = self.prependPath(key)
+        try:
+            return xshandle().get_permissions(self.transaction, path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while getting permissions from %s' %
+                               (ex.args[1], path))
+
+
     def set_permissions(self, *args):
         if len(args) == 0:
             raise TypeError
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xm/main.py       Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
                                     specifies another way of setting a 
domain's\n\
                                     cpu period/slice."
 
+sched_credit_help = "sched-credit                           Set or get credit 
scheduler parameters"
 block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
                 [BackDomId]         Create a new virtual block device"""
 block_detach_help = """block-detach  <DomId> <DevId>    Destroy a domain's 
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
     ]
 
 scheduler_commands = [
+    "sched-credit",
     "sched-bvt",
     "sched-bvt-ctxallow",
     "sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
         else:
             print_sedf(sedf_info)
 
+def xm_sched_credit(args):
+    usage_msg = """sched-credit:     Set or get credit scheduler parameters
+ Usage:
+
+        sched-credit -d domain [-w weight] [-c cap]
+    """
+    try:
+        opts, args = getopt.getopt(args[0:], "d:w:c:",
+            ["domain=", "weight=", "cap="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print usage_msg
+        sys.exit(1)
+
+    domain = None
+    weight = None
+    cap = None
+
+    for o, a in opts:
+        if o == "-d":
+            domain = a
+        elif o == "-w":
+            weight = int(a)
+        elif o == "-c":
+            cap = int(a);
+
+    if domain is None:
+        # place holder for system-wide scheduler parameters
+        print usage_msg
+        sys.exit(1)
+
+    if weight is None and cap is None:
+        print server.xend.domain.sched_credit_get(domain)
+    else:
+        if weight is None:
+            weight = int(0)
+        if cap is None:
+            cap = int(~0)
+
+        err = server.xend.domain.sched_credit_set(domain, weight, cap)
+        if err != 0:
+            print err
 
 def xm_info(args):
     arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
     "sched-sedf": xm_sched_sedf,
+    "sched-credit": xm_sched_credit,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
diff -r e74246451527 -r f54d38cea8ac tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/security/secpol_tool.c      Tue May 30 14:30:34 2006 -0500
@@ -231,14 +231,16 @@ uint8_t pull_buffer[PULL_CACHE_SIZE];
 uint8_t pull_buffer[PULL_CACHE_SIZE];
 int acm_domain_getpolicy(int xc_handle)
 {
-    struct acm_op op;
+    struct acm_getpolicy getpolicy;
     int ret;
 
     memset(pull_buffer, 0x00, sizeof(pull_buffer));
-    op.cmd = ACM_GETPOLICY;
-    op.u.getpolicy.pullcache = (void *) pull_buffer;
-    op.u.getpolicy.pullcache_size = sizeof(pull_buffer);
-    if ((ret = xc_acm_op(xc_handle, &op)) < 0) {
+    getpolicy.interface_version = ACM_INTERFACE_VERSION;
+    getpolicy.pullcache = (void *) pull_buffer;
+    getpolicy.pullcache_size = sizeof(pull_buffer);
+    ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
+
+    if (ret < 0) {
         printf("ACM operation failed: errno=%d\n", errno);
         if (errno == EACCES)
             fprintf(stderr, "ACM operation failed -- need to"
@@ -275,13 +277,13 @@ int acm_domain_loadpolicy(int xc_handle,
         goto free_out;
     }
     if (len == read(fd, buffer, len)) {
-        struct acm_op op;
+        struct acm_setpolicy setpolicy;
         /* dump it and then push it down into xen/acm */
         acm_dump_policy_buffer(buffer, len);
-        op.cmd = ACM_SETPOLICY;
-        op.u.setpolicy.pushcache = (void *) buffer;
-        op.u.setpolicy.pushcache_size = len;
-        ret = xc_acm_op(xc_handle, &op);
+        setpolicy.interface_version = ACM_INTERFACE_VERSION;
+        setpolicy.pushcache = (void *) buffer;
+        setpolicy.pushcache_size = len;
+        ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, 
sizeof(setpolicy));
 
         if (ret)
             printf
@@ -322,15 +324,15 @@ int acm_domain_dumpstats(int xc_handle)
 int acm_domain_dumpstats(int xc_handle)
 {
     uint8_t stats_buffer[PULL_STATS_SIZE];
-    struct acm_op op;
+    struct acm_dumpstats dumpstats;
     int ret;
     struct acm_stats_buffer *stats;
 
     memset(stats_buffer, 0x00, sizeof(stats_buffer));
-    op.cmd = ACM_DUMPSTATS;
-    op.u.dumpstats.pullcache = (void *) stats_buffer;
-    op.u.dumpstats.pullcache_size = sizeof(stats_buffer);
-    ret = xc_acm_op(xc_handle, &op);
+    dumpstats.interface_version = ACM_INTERFACE_VERSION;
+    dumpstats.pullcache = (void *) stats_buffer;
+    dumpstats.pullcache_size = sizeof(stats_buffer);
+    ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
 
     if (ret < 0) {
         printf
diff -r e74246451527 -r f54d38cea8ac tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/tests/test_x86_emulator.c   Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,8 @@ static int read_any(
 static int read_any(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -32,7 +33,8 @@ static int write_any(
 static int write_any(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -48,7 +50,8 @@ static int cmpxchg_any(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
     unsigned long old_lo,
     unsigned long old_hi,
     unsigned long new_lo,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     ((unsigned long *)addr)[0] = new_lo;
     ((unsigned long *)addr)[1] = new_hi;
     return X86EMUL_CONTINUE;
 }
 
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
     read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
 };
 
 int main(int argc, char **argv)
 {
+    struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
     unsigned int res = 0x7FFFFFFF;
     u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
-    unsigned long cr2;
     int rc;
+
+    ctxt.regs = &regs;
+    ctxt.mode = X86EMUL_MODE_PROT32;
 
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
     regs.esi    = (unsigned long)&res + 0;
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
-    cr2         = regs.esi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.esi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/Makefile   Tue May 30 14:30:34 2006 -0500
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
 CLIENTS += xenstore-write
 CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
 
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
 .PHONY: all
 all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control 
xenstore-ls
 
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
 .PHONY: testcode
 testcode: xs_test xenstored_test xs_random
 
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.c   Tue May 30 14:30:34 2006 -0500
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
 
 static bool write_node(struct connection *conn, const struct node *node)
 {
+       /*
+        * conn will be null when this is called from manual_node.
+        * tdb_context copes with this.
+        */
+
        TDB_DATA key, data;
        void *p;
 
@@ -478,7 +483,7 @@ static bool write_node(struct connection
 
        /* TDB should set errno, but doesn't even set ecode AFAICT. */
        if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
-               corrupt(conn, "Write of %s = %s failed", key, data);
+               corrupt(conn, "Write of %s failed", key.dptr);
                goto error;
        }
        return true;
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.h   Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,8 @@
 
 #ifndef _XENSTORED_CORE_H
 #define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
 
 extern int event_fd;
 
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
 #endif /* _XENSTORED_CORE_H */
 
 /*
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_domain.c Tue May 30 14:30:34 2006 -0500
@@ -33,12 +33,11 @@
 #include "talloc.h"
 #include "xenstored_core.h"
 #include "xenstored_domain.h"
-#include "xenstored_proc.h"
 #include "xenstored_watch.h"
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
 
 static int dom0_init(void) 
 { 
-       int rc, fd;
-       evtchn_port_t port; 
-       char str[20]; 
-       struct domain *dom0; 
-
-       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
-       if (fd == -1)
+       evtchn_port_t port;
+       struct domain *dom0;
+
+       port = xenbus_evtchn();
+       if (port == -1)
                return -1;
 
-       rc = read(fd, str, sizeof(str)); 
-       if (rc == -1)
-               goto outfd;
-       str[rc] = '\0'; 
-       port = strtoul(str, NULL, 0); 
-
-       close(fd); 
-
        dom0 = new_domain(NULL, 0, port); 
 
-       fd = open(XENSTORED_PROC_KVA, O_RDWR);
-       if (fd == -1)
+       dom0->interface = xenbus_map();
+       if (dom0->interface == NULL)
                return -1;
 
-       dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
-                              MAP_SHARED, fd, 0);
-       if (dom0->interface == MAP_FAILED)
-               goto outfd;
-
-       close(fd);
-
        talloc_steal(dom0->conn, dom0); 
 
        evtchn_notify(dom0->port); 
 
        return 0; 
-outfd:
-       close(fd);
-       return -1;
 }
 
 
diff -r e74246451527 -r f54d38cea8ac tools/xentrace/xentrace_format
--- a/tools/xentrace/xentrace_format    Tue May 30 12:52:02 2006 -0500
+++ b/tools/xentrace/xentrace_format    Tue May 30 14:30:34 2006 -0500
@@ -89,7 +89,7 @@ CPUREC = "I"
 CPUREC = "I"
 TRCREC = "QLLLLLL"
 
-last_tsc = [0,0,0,0,0,0,0,0]
+last_tsc = [0]
 
 i=0
 
@@ -111,7 +111,9 @@ while not interrupted:
 
        #print i, tsc
 
-       if tsc < last_tsc[cpu]:
+        if cpu >= len(last_tsc):
+            last_tsc += [0] * (cpu - len(last_tsc) + 1)
+       elif tsc < last_tsc[cpu]:
            print "TSC stepped backward cpu %d !  %d %d" % 
(cpu,tsc,last_tsc[cpu])
 
        last_tsc[cpu] = tsc
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac        Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/configure.ac        Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ AC_CONFIG_FILES([
     tests/block-list/Makefile
     tests/block-create/Makefile
     tests/block-destroy/Makefile
+    tests/block-integrity/Makefile
     tests/console/Makefile
     tests/create/Makefile
     tests/destroy/Makefile
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/ramdisk/bin/create_disk_image
--- a/tools/xm-test/ramdisk/bin/create_disk_image       Tue May 30 12:52:02 
2006 -0500
+++ b/tools/xm-test/ramdisk/bin/create_disk_image       Tue May 30 14:30:34 
2006 -0500
@@ -207,6 +207,13 @@ function dd_rootfs_to_image()
        dd if="$ROOTFS" of="$LOOPP" > /dev/null 2>&1
        if [ $? -ne 0 ]; then
                die "Failed to dd $ROOTFS to $LOOPP."
+       fi
+
+       # Resize fs to use full partition
+       e2fsck -f $LOOPP 
+       resize2fs $LOOPP
+       if [ $? -ne 0 ]; then
+               die "Failed to resize rootfs on $LOOPP."
        fi
 }
 
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/Makefile.am
--- a/tools/xm-test/tests/Makefile.am   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/Makefile.am   Tue May 30 14:30:34 2006 -0500
@@ -1,14 +1,15 @@ SUBDIRS =                     \
 SUBDIRS =                      \
                block-create    \
-               block-list      \
-                block-destroy   \
+               block-list      \
+               block-destroy   \
+               block-integrity \
                console         \
                create          \
                destroy         \
                dmesg           \
                domid           \
                domname         \
-               help            \
+               help            \
                info            \
                list            \
                memmax          \
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py
--- a/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py   
Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py   
Tue May 30 14:30:34 2006 -0500
@@ -65,13 +65,24 @@ if check_status and status != 0:
         FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
 # 5) check /proc/cpuinfo for cpu count
-cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
-status, output = traceCommand(cmd)
-if check_status and status != 0:
-    os.unsetenv("XEND_CONFIG")
-    restartXend()
-    FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
+# It takes some time for the CPU count to change, on multi-proc systems, so 
check the number of procs in a loop for 20 seconds. 
+#Sleep inside the loop for a second each time.
+timeout = 20
+starttime = time.time()
+while timeout + starttime > time.time():
+# Check /proc/cpuinfo
+    cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
+    status, output = traceCommand(cmd)
+    if check_status and status != 0:
+        os.unsetenv("XEND_CONFIG")
+        restartXend()
+        FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
+# Has it succeeded? If so, we can leave the loop
+    if output == str(enforce_dom0_cpus):
+        break
+# Sleep for 1 second before trying again
+    time.sleep(1)
 if output != str(enforce_dom0_cpus):
     os.unsetenv("XEND_CONFIG")
     restartXend()
@@ -94,7 +105,14 @@ if check_status and status != 0:
     FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
 # check restore worked
-num_online = int(getDomInfo("Domain-0", "VCPUs"))
+# Since this also takes time, we will do it in a loop with a 20 second timeout.
+timeout=20
+starttime=time.time()
+while timeout + starttime > time.time(): 
+    num_online = int(getDomInfo("Domain-0", "VCPUs"))
+    if num_online == dom0_online_vcpus:
+        break
+    time.sleep(1)
 if num_online != dom0_online_vcpus:
     os.unsetenv("XEND_CONFIG")
     restartXend()
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/03_network_local_tcp_pos.py
--- a/tools/xm-test/tests/network/03_network_local_tcp_pos.py   Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/03_network_local_tcp_pos.py   Tue May 30 
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
     lofails=""
     for size in trysizes:
         out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -q -c 20 " 
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             lofails += " " + str(size)
 
@@ -54,7 +54,7 @@ try:
     ip = netdev.getNetDevIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + ip + " -E /dev/urandom -q -c 20 "
-              + "--fast -d "+ str(size))
+              + "--fast -d "+ str(size) + " -N " + str(size))
         if out["return"]:
             eth0fails += " " + str(size) 
 except ConsoleError, e:
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/04_network_local_udp_pos.py
--- a/tools/xm-test/tests/network/04_network_local_udp_pos.py   Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/04_network_local_udp_pos.py   Tue May 30 
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
     lofails=""
     for size in trysizes:
         out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -2 -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             lofails += " " + str(size)
             print out["output"]
@@ -54,7 +54,7 @@ try:
     ip = netdev.getNetDevIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + ip + " -E /dev/urandom -2 -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             eth0fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py
--- a/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
     dom0ip = netdev.getDom0AliasIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/07_network_dom0_udp_pos.py
--- a/tools/xm-test/tests/network/07_network_dom0_udp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/07_network_dom0_udp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
     dom0ip = netdev.getDom0AliasIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -2 -q -c 
20"
-             + " --fast -d " + str(size))
+             + " --fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/12_network_domU_tcp_pos.py
--- a/tools/xm-test/tests/network/12_network_domU_tcp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/12_network_domU_tcp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
     ip2 = dst_netdev.getNetDevIP()
     for size in pingsizes:
         out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/13_network_domU_udp_pos.py
--- a/tools/xm-test/tests/network/13_network_domU_udp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/13_network_domU_udp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
     ip2 = dst_netdev.getNetDevIP()
     for size in pingsizes:
         out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -2 -q "
-              + "-c 20 --fast -d " + str(size))
+              + "-c 20 --fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk    Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/Rules.mk    Tue May 30 14:30:34 2006 -0500
@@ -3,31 +3,31 @@
 
 HAS_ACPI := y
 VALIDATE_VT    ?= n
-xen_ia64_dom0_virtual_physical ?= n
+xen_ia64_dom0_virtual_physical ?= y
+no_warns ?= n
 
 ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
 CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
 endif
-AFLAGS  += -D__ASSEMBLY__ -nostdinc $(CPPFLAGS)
-AFLAGS  += -mconstant-gp
-CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64        \
-             -I$(BASEDIR)/include/asm-ia64/linux                       \
-            -I$(BASEDIR)/include/asm-ia64/linux-xen                    \
-            -I$(BASEDIR)/include/asm-ia64/linux-null                   \
-             -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+
+# Used only by linux/Makefile.
+AFLAGS_KERNEL  += -mconstant-gp
+
+# Note: .S -> .o rule uses AFLAGS and CFLAGS.
 
 CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
 CFLAGS  += -mconstant-gp
 #CFLAGS  += -O3                # -O3 over-inlines making debugging tough!
 CFLAGS  += -O2         # but no optimization causes compile errors!
-#CFLAGS  += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
-CFLAGS  += -iwithprefix include -Wall
-CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
-CFLAGS  += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \
+CFLAGS  += -fomit-frame-pointer -D__KERNEL__
+CFLAGS  += -iwithprefix include
+CPPFLAGS+= -I$(BASEDIR)/include                                         \
+           -I$(BASEDIR)/include/asm-ia64                                \
            -I$(BASEDIR)/include/asm-ia64/linux                                 
\
            -I$(BASEDIR)/include/asm-ia64/linux-xen                     \
           -I$(BASEDIR)/include/asm-ia64/linux-null                     \
            -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+CFLAGS += $(CPPFLAGS)
 #CFLAGS  += -Wno-pointer-arith -Wredundant-decls
 CFLAGS  += -DIA64 -DXEN -DLINUX_2_6 -DV_IOSAPIC_READY
 CFLAGS += -ffixed-r13 -mfixed-range=f2-f5,f12-f127
@@ -39,4 +39,8 @@ ifeq ($(xen_ia64_dom0_virtual_physical),
 ifeq ($(xen_ia64_dom0_virtual_physical),y)
 CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP
 endif
+ifeq ($(no_warns),y)
+CFLAGS += -Wa,--fatal-warnings
+endif
+
 LDFLAGS := -g
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/asm-offsets.c       Tue May 30 14:30:34 2006 -0500
@@ -50,8 +50,6 @@ void foo(void)
        DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_saved_rr0));
        DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, 
arch.breakimm));
        DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
-       DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, 
arch.dtlb_pte));
-       DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, 
arch.itlb_pte));
        DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
        DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
        DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/setup.c   Tue May 30 14:30:34 2006 -0500
@@ -800,8 +800,7 @@ cpu_init (void)
        cpu_data = per_cpu_init();
 
 #ifdef XEN
-       printf ("cpu_init: current=%p, current->domain->arch.mm=%p\n",
-               current, current->domain->arch.mm);
+       printf ("cpu_init: current=%p\n", current);
 #endif
 
        /*
@@ -872,12 +871,11 @@ cpu_init (void)
 #ifndef XEN
        current->active_mm = &init_mm;
 #endif
-#ifdef XEN
-       if (current->domain->arch.mm)
-#else
+#ifndef XEN
        if (current->mm)
-#endif
                BUG();
+#endif
+
 
 #ifdef XEN
        ia64_fph_enable();
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/smp.c     Tue May 30 14:30:34 2006 -0500
@@ -53,28 +53,6 @@
 #endif
 
 #ifdef XEN
-// FIXME: MOVE ELSEWHERE
-//Huh? This seems to be used on ia64 even if !CONFIG_SMP
-void flush_tlb_mask(cpumask_t mask)
-{
-    int cpu;
-
-    cpu = smp_processor_id();
-    if (cpu_isset (cpu, mask)) {
-        cpu_clear(cpu, mask);
-       local_flush_tlb_all ();
-    }
-
-#ifdef CONFIG_SMP
-    if (cpus_empty(mask))
-        return;
-
-    for (cpu = 0; cpu < NR_CPUS; ++cpu)
-        if (cpu_isset(cpu, mask))
-          smp_call_function_single
-            (cpu, (void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
-#endif
-}
 //#if CONFIG_SMP || IA64
 #if CONFIG_SMP
 //Huh? This seems to be used on ia64 even if !CONFIG_SMP
@@ -276,7 +254,6 @@ smp_send_reschedule (int cpu)
 {
        platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
 }
-#endif
 
 void
 smp_flush_tlb_all (void)
@@ -284,15 +261,6 @@ smp_flush_tlb_all (void)
        on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
 }
 
-#ifdef XEN
-void
-smp_vhpt_flush_all(void)
-{
-       on_each_cpu((void (*)(void *))vhpt_flush, NULL, 1, 1);
-}
-#endif
-
-#ifndef XEN
 void
 smp_flush_tlb_mm (struct mm_struct *mm)
 {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/unaligned.c
--- a/xen/arch/ia64/linux-xen/unaligned.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/unaligned.c       Tue May 30 14:30:34 2006 -0500
@@ -377,7 +377,7 @@ get_rse_reg (struct pt_regs *regs, unsig
     if (ridx >= sof) {
         /* read of out-of-frame register returns an undefined value; 0 in our 
case.  */
         DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", 
r1, sof);
-        panic("wrong stack register number (iip=%p)\n", regs->cr_iip);
+        panic("wrong stack register number (iip=%lx)\n", regs->cr_iip);
     }
 
     if (ridx < sor)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/pal_emul.c
--- a/xen/arch/ia64/vmx/pal_emul.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/pal_emul.c      Tue May 30 14:30:34 2006 -0500
@@ -62,8 +62,8 @@ pal_cache_flush (VCPU *vcpu) {
 //             ia64_pal_call_static(gr28 ,gr29, gr30, 
 //                             result.v1,1LL);
 //     }
-       while (result.status != 0) {
-        panic("PAL_CACHE_FLUSH ERROR, status %ld", result.status);
+       if(result.status != 0) {
+               panic_domain(vcpu_regs(vcpu),"PAL_CACHE_FLUSH ERROR, status 
%ld", result.status);
        }
 
        return result;
@@ -445,7 +445,7 @@ pal_emul( VCPU *vcpu) {
                        break;
 
                default:
-                       panic("pal_emul(): guest call unsupported pal" );
+                       panic_domain(vcpu_regs(vcpu),"pal_emul(): guest call 
unsupported pal" );
   }
                set_pal_result (vcpu, result);
 }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vlsapic.c       Tue May 30 14:30:34 2006 -0500
@@ -568,7 +568,7 @@ int vmx_check_pending_irq(VCPU *vcpu)
     if (  vpsr.i && IRQ_NO_MASKED == mask ) {
         isr = vpsr.val & IA64_PSR_RI;
         if ( !vpsr.ic )
-            panic("Interrupt when IC=0\n");
+            panic_domain(regs,"Interrupt when IC=0\n");
         vmx_reflect_interruption(0,isr,0, 12, regs ); // EXT IRQ
         injected = 1;
     }
@@ -595,7 +595,8 @@ void guest_write_eoi(VCPU *vcpu)
     uint64_t  spsr;
 
     vec = highest_inservice_irq(vcpu);
-    if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
+    if ( vec == NULL_VECTOR ) 
+       panic_domain(vcpu_regs(vcpu),"Wrong vector to EOI\n");
     local_irq_save(spsr);
     VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
     local_irq_restore(spsr);
@@ -634,7 +635,7 @@ static void generate_exirq(VCPU *vcpu)
     update_vhpi(vcpu, NULL_VECTOR);
     isr = vpsr.val & IA64_PSR_RI;
     if ( !vpsr.ic )
-        panic("Interrupt when IC=0\n");
+        panic_domain(regs,"Interrupt when IC=0\n");
     vmx_reflect_interruption(0,isr,0, 12, regs); // EXT IRQ
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmmu.c  Tue May 30 14:30:34 2006 -0500
@@ -134,11 +134,11 @@ static void init_domain_vhpt(struct vcpu
     void * vbase;
     page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0);
     if ( page == NULL ) {
-        panic("No enough contiguous memory for init_domain_vhpt\n");
+        panic_domain(vcpu_regs(v),"No enough contiguous memory for 
init_domain_vhpt\n");
     }
     vbase = page_to_virt(page);
     memset(vbase, 0, VCPU_VHPT_SIZE);
-    printk("Allocate domain tlb at 0x%p\n", vbase);
+    printk("Allocate domain vhpt at 0x%p\n", vbase);
     
     VHPT(v,hash) = vbase;
     VHPT(v,hash_sz) = VCPU_VHPT_SIZE/2;
@@ -157,11 +157,11 @@ void init_domain_tlb(struct vcpu *v)
     init_domain_vhpt(v);
     page = alloc_domheap_pages (NULL, VCPU_VTLB_ORDER, 0);
     if ( page == NULL ) {
-        panic("No enough contiguous memory for init_domain_tlb\n");
+        panic_domain(vcpu_regs(v),"No enough contiguous memory for 
init_domain_tlb\n");
     }
     vbase = page_to_virt(page);
     memset(vbase, 0, VCPU_VTLB_SIZE);
-    printk("Allocate domain tlb at 0x%p\n", vbase);
+    printk("Allocate domain vtlb at 0x%p\n", vbase);
     
     VTLB(v,hash) = vbase;
     VTLB(v,hash_sz) = VCPU_VTLB_SIZE/2;
@@ -202,7 +202,7 @@ void machine_tlb_insert(struct vcpu *d, 
     mtlb.ppn = get_mfn(d->domain,tlb->ppn);
     mtlb_ppn=mtlb.ppn;
     if (mtlb_ppn == INVALID_MFN)
-    panic("Machine tlb insert with invalid mfn number.\n");
+        panic_domain(vcpu_regs(d),"Machine tlb insert with invalid mfn 
number.\n");
 
     psr = ia64_clear_ic();
     if ( cl == ISIDE_TLB ) {
@@ -325,12 +325,12 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod
     }
     if( gpip){
         mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
-       if( mfn == INVALID_MFN )  panic("fetch_code: invalid memory\n");
+       if( mfn == INVALID_MFN )  panic_domain(vcpu_regs(vcpu),"fetch_code: 
invalid memory\n");
        vpa =(u64 *)__va( (gip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT));
     }else{
        tlb = vhpt_lookup(gip);
        if( tlb == NULL)
-           panic("No entry found in ITLB and DTLB\n");
+           panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n");
        vpa =(u64 
*)__va((tlb->ppn>>(PAGE_SHIFT-ARCH_PAGE_SHIFT)<<PAGE_SHIFT)|(gip&(PAGE_SIZE-1)));
     }
     *code1 = *vpa++;
@@ -347,7 +347,7 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UIN
     slot = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
     if (slot >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_and_insert(vcpu, pte, itir, ifa);
@@ -363,7 +363,7 @@ IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UIN
     slot = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
     if (slot >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT;
@@ -385,7 +385,7 @@ IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64
     index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
     if (index >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_entries(vcpu, va, ps);
@@ -407,7 +407,7 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64
     index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
     if (index>=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_entries(vcpu, va, ps);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_entry.S
--- a/xen/arch/ia64/vmx/vmx_entry.S     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_entry.S     Tue May 30 14:30:34 2006 -0500
@@ -290,10 +290,59 @@ GLOBAL_ENTRY(ia64_leave_hypervisor)
     mov ar.ccv=r18
     ;;
 //rbs_switch
-    // loadrs has already been shifted
+    
+    shr.u r18=r20,16
+    ;;
+    movl r19= THIS_CPU(ia64_phys_stacked_size_p8)
+    ;;
+    ld4 r19=[r19]
+     
+vmx_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse    p6
+#   define pReturn     p7
+#   define Nregs       14
+    
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    shr.u loc1=r18,9           // RNaTslots <= floor(dirtySize / (64*8))
+    sub r19=r19,r18                    // r19 = (physStackedSize + 8) - 
dirtySize
+    ;;
+    mov ar.rsc=r20                     // load ar.rsc to be used for "loadrs"
+    shladd in0=loc1,3,r19
+    mov in1=0
+    ;;
+    TEXT_ALIGN(32)
+vmx_rse_clear_invalid:
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    cmp.lt pRecurse,p0=Nregs*8,in0     // if more than Nregs regs left to 
clear, (re)curse
+    add out0=-Nregs*8,in0
+    add out1=1,in1                     // increment recursion count
+    mov loc1=0
+    mov loc2=0
+    ;;
+    mov loc3=0
+    mov loc4=0
+    mov loc5=0
+    mov loc6=0
+    mov loc7=0
+(pRecurse) br.call.dptk.few b0=vmx_rse_clear_invalid
+    ;;
+    mov loc8=0
+    mov loc9=0
+    cmp.ne pReturn,p0=r0,in1   // if recursion count != 0, we need to do a 
br.ret
+    mov loc10=0
+    mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#      undef pRecurse
+#      undef pReturn
+
+// loadrs has already been shifted
     alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-    ;;
-    mov ar.rsc=r20
     ;;
     loadrs
     ;;
@@ -315,7 +364,9 @@ vmx_dorfirfi_back:
     adds r18=IA64_VPD_BASE_OFFSET,r21
     ;;
     ld8 r18=[r18]   //vpd
-    ;;
+    adds r17=IA64_VCPU_ISR_OFFSET,r21
+    ;;
+    ld8 r17=[r17]
     adds r19=VPD(VPSR),r18
     ;;
     ld8 r19=[r19]        //vpsr
@@ -331,12 +382,14 @@ vmx_dorfirfi_back:
     mov b0=r16
     br.cond.sptk b0         // call the service
     ;;
+END(ia64_leave_hypervisor)
 switch_rr7:
 // fall through
 GLOBAL_ENTRY(ia64_vmm_entry)
 /*
  *  must be at bank 0
  *  parameter:
+ *  r17:cr.isr
  *  r18:vpd
  *  r19:vpsr
  *  r20:__vsa_base
@@ -348,13 +401,19 @@ GLOBAL_ENTRY(ia64_vmm_entry)
     tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
     ;;
     (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p1) br.sptk.many ia64_vmm_entry_out
+    ;;
+    tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT                //p1=cr.isr.ir
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
     (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
     ;;
+ia64_vmm_entry_out:    
     mov pr=r23,-2
     mov b0=r29
     ;;
     br.cond.sptk b0             // call pal service
-END(ia64_leave_hypervisor)
+END(ia64_vmm_entry)
 
 //r24 rfi_pfs
 //r17 address of rfi_pfs
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_init.c      Tue May 30 14:30:34 2006 -0500
@@ -208,8 +208,9 @@ vmx_create_vp(struct vcpu *v)
        ivt_base = (u64) &vmx_ia64_ivt;
        printk("ivt_base: 0x%lx\n", ivt_base);
        ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
-       if (ret != PAL_STATUS_SUCCESS)
-               panic("ia64_pal_vp_create failed. \n");
+       if (ret != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
+       }
 }
 
 /* Other non-context related tasks can be done in context switch */
@@ -220,8 +221,9 @@ vmx_save_state(struct vcpu *v)
 
        /* FIXME: about setting of pal_proc_vector... time consuming */
        status = ia64_pal_vp_save((u64 *)v->arch.privregs, 0);
-       if (status != PAL_STATUS_SUCCESS)
-               panic("Save vp status failed\n");
+       if (status != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"Save vp status failed\n");
+       }
 
 
        /* Need to save KR when domain switch, though HV itself doesn;t
@@ -244,8 +246,9 @@ vmx_load_state(struct vcpu *v)
        u64 status;
 
        status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0);
-       if (status != PAL_STATUS_SUCCESS)
-               panic("Restore vp status failed\n");
+       if (status != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"Restore vp status failed\n");
+       }
 
        ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
        ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
@@ -343,17 +346,18 @@ int vmx_build_physmap_table(struct domai
            for (j = io_ranges[i].start;
                 j < io_ranges[i].start + io_ranges[i].size;
                 j += PAGE_SIZE)
-               assign_domain_page(d, j, io_ranges[i].type);
+               __assign_domain_page(d, j, io_ranges[i].type);
        }
 
        /* Map normal memory below 3G */
        end = VMX_CONFIG_PAGES(d) << PAGE_SHIFT;
        tmp = end < MMIO_START ? end : MMIO_START;
        for (i = 0; (i < tmp) && (list_ent != &d->page_list); i += PAGE_SIZE) {
-           mfn = page_to_mfn(list_entry(
-               list_ent, struct page_info, list));
+           mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
+           list_ent = mfn_to_page(mfn)->list.next;
+           if (VGA_IO_START <= i && i < VGA_IO_START + VGA_IO_SIZE)
+               continue;
            assign_domain_page(d, i, mfn << PAGE_SHIFT);
-           list_ent = mfn_to_page(mfn)->list.next;
        }
        ASSERT(list_ent != &d->page_list);
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_interrupt.c
--- a/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 14:30:34 2006 -0500
@@ -91,8 +91,12 @@ inject_guest_interruption(VCPU *vcpu, u6
 {
     u64 viva;
     REGS *regs;
+    ISR pt_isr;
     regs=vcpu_regs(vcpu);
-
+    // clear cr.isr.ri 
+    pt_isr.val = VMX(vcpu,cr_isr);
+    pt_isr.ir = 0;
+    VMX(vcpu,cr_isr) = pt_isr.val;
     collect_interruption(vcpu);
 
     vmx_vcpu_get_iva(vcpu,&viva);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_ivt.S
--- a/xen/arch/ia64/vmx/vmx_ivt.S       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_ivt.S       Tue May 30 14:30:34 2006 -0500
@@ -143,35 +143,62 @@ ENTRY(vmx_itlb_miss)
     thash r17 = r16
     ;;
     ttag r20 = r16
+    mov r18 = r17      
     ;;
 vmx_itlb_loop:
     cmp.eq p6,p0 = r0, r17
-(p6) br vmx_itlb_out
-    ;;
-    adds r22 = VLE_TITAG_OFFSET, r17
-    adds r23 = VLE_CCHAIN_OFFSET, r17
-    ;;
-    ld8 r24 = [r22]
-    ld8 r25 = [r23]
-    ;;
-    lfetch [r25]
-    cmp.eq  p6,p7 = r20, r24
-    ;;
-(p7)    mov r17 = r25;
-(p7)    br.sptk vmx_itlb_loop
+(p6)br vmx_itlb_out
+    ;;
+    adds r16 = VLE_TITAG_OFFSET, r17
+    adds r19 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r22 = [r16]
+    ld8 r23 = [r19]
+    ;;
+    lfetch [r23]
+    cmp.eq  p6,p7 = r20, r22
+    ;;
+(p7)mov r17 = r23;
+(p7)br.sptk vmx_itlb_loop
     ;;
     adds r23 = VLE_PGFLAGS_OFFSET, r17
     adds r24 = VLE_ITIR_OFFSET, r17
     ;;
-    ld8 r26 = [r23]
-    ld8 r25 = [r24]
-    ;;
-    mov cr.itir = r25
-    ;;
-    itc.i r26
+    ld8 r25 = [r23]
+    ld8 r26 = [r24]
+    ;;
+    cmp.eq p6,p7=r18,r17
+(p6) br vmx_itlb_loop1
+    ;;
+    ld8 r27 = [r18]
+    ;;
+    extr.u r19 = r27, 56, 8
+    extr.u r20 = r25, 56, 8
+    ;;
+    dep r27 = r20, r27, 56, 8
+    dep r25 = r19, r25, 56, 8
+    ;;
+    st8 [r18] = r25,8
+    st8 [r23] = r27
+    ;;
+    ld8 r28 = [r18]
+    ;;
+    st8 [r18] = r26,8
+    st8 [r24] = r28
+    ;;
+    ld8 r30 = [r18]
+    ;;
+    st8 [r18] = r22
+    st8 [r16] = r30 
+    ;;
+vmx_itlb_loop1:
+    mov cr.itir = r26
+    ;;
+    itc.i r25
     ;;
     srlz.i
     ;;
+    mov r17=cr.isr
     mov r23=r31
     mov r22=b0
     adds r16=IA64_VPD_BASE_OFFSET,r21
@@ -201,42 +228,68 @@ ENTRY(vmx_dtlb_miss)
     mov r29=cr.ipsr;
     ;;
     tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-    (p6)br.sptk vmx_alt_dtlb_miss_1
-//(p6)br.sptk vmx_fault_2
+(p6)br.sptk vmx_alt_dtlb_miss_1
     mov r16 = cr.ifa
     ;;
     thash r17 = r16
     ;;
     ttag r20 = r16
+    mov r18 = r17      
     ;;
 vmx_dtlb_loop:
     cmp.eq p6,p0 = r0, r17
 (p6)br vmx_dtlb_out
     ;;
-    adds r22 = VLE_TITAG_OFFSET, r17
-    adds r23 = VLE_CCHAIN_OFFSET, r17
-    ;;
-    ld8 r24 = [r22]
-    ld8 r25 = [r23]
-    ;;
-    lfetch [r25]
-    cmp.eq  p6,p7 = r20, r24
-    ;;
-(p7)mov r17 = r25;
+    adds r16 = VLE_TITAG_OFFSET, r17
+    adds r19 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r22 = [r16]
+    ld8 r23 = [r19]
+    ;;
+    lfetch [r23]
+    cmp.eq  p6,p7 = r20, r22
+    ;;
+(p7)mov r17 = r23;
 (p7)br.sptk vmx_dtlb_loop
     ;;
     adds r23 = VLE_PGFLAGS_OFFSET, r17
     adds r24 = VLE_ITIR_OFFSET, r17
     ;;
-    ld8 r26 = [r23]
-    ld8 r25 = [r24]
-    ;;
-    mov cr.itir = r25
-    ;;
-    itc.d r26
+    ld8 r25 = [r23]
+    ld8 r26 = [r24]
+    ;;
+    cmp.eq p6,p7=r18,r17
+(p6) br vmx_dtlb_loop1
+    ;;
+    ld8 r27 = [r18]
+    ;;
+    extr.u r19 = r27, 56, 8
+    extr.u r20 = r25, 56, 8
+    ;;
+    dep r27 = r20, r27, 56, 8
+    dep r25 = r19, r25, 56, 8
+    ;;
+    st8 [r18] = r25,8
+    st8 [r23] = r27
+    ;;
+    ld8 r28 = [r18]
+    ;;
+    st8 [r18] = r26,8
+    st8 [r24] = r28
+    ;;
+    ld8 r30 = [r18]
+    ;;
+    st8 [r18] = r22
+    st8 [r16] = r30 
+    ;;
+vmx_dtlb_loop1:
+    mov cr.itir = r26
+    ;;
+    itc.d r25
     ;;
     srlz.d;
     ;;
+    mov r17=cr.isr
     mov r23=r31
     mov r22=b0
     adds r16=IA64_VPD_BASE_OFFSET,r21
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx/vmx_phy_mode.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c  Tue May 30 14:30:34 2006 -0500
@@ -186,8 +186,10 @@ vmx_load_all_rr(VCPU *vcpu)
         * mode in same region
         */
        if (is_physical_mode(vcpu)) {
-               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-                       panic("Unexpected domain switch in phy emul\n");
+               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL){
+                       panic_domain(vcpu_regs(vcpu),
+                                    "Unexpected domain switch in phy emul\n");
+               }
                phy_rr.rrval = vcpu->arch.metaphysical_rr0;
                //phy_rr.ps = PAGE_SHIFT;
                phy_rr.ve = 1;
@@ -322,8 +324,7 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_
         break;
     default:
         /* Sanity check */
-    printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
-        panic("Unexpected virtual <--> physical mode transition");
+        panic_domain(vcpu_regs(vcpu),"Unexpected virtual <--> physical mode 
transition,old:%lx,new:%lx\n",old_psr.val,new_psr.val);
         break;
     }
     return;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_process.c   Tue May 30 14:30:34 2006 -0500
@@ -338,7 +338,7 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
     }
     if(vec == 1) type = ISIDE_TLB;
     else if(vec == 2) type = DSIDE_TLB;
-    else panic("wrong vec\n");
+    else panic_domain(regs,"wrong vec:%0xlx\n",vec);
 
 //    prepare_if_physical_mode(v);
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_support.c   Tue May 30 14:30:34 2006 -0500
@@ -92,12 +92,12 @@ void vmx_io_assist(struct vcpu *v)
      */
     vio = get_vio(v->domain, v->vcpu_id);
     if (!vio)
-       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+       panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", 
(unsigned long)vio);
 
     p = &vio->vp_ioreq;
 
     if (p->state == STATE_IORESP_HOOK)
-       panic("Not supported: No hook available for DM request\n");
+       panic_domain(vcpu_regs(v),"Not supported: No hook available for DM 
request\n");
 
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
        if (p->state != STATE_IORESP_READY) {
@@ -135,7 +135,7 @@ void vmx_intr_assist(struct vcpu *v)
      * out of vmx_wait_io, when guest is still waiting for response.
      */
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
-       panic("!!!Bad resume to guest before I/O emulation is done.\n");
+       panic_domain(vcpu_regs(v),"!!!Bad resume to guest before I/O emulation 
is done.\n");
 
     /* Clear indicator specific to interrupt delivered from DM */
     if (test_and_clear_bit(port,
@@ -154,7 +154,7 @@ void vmx_intr_assist(struct vcpu *v)
      */
     vio = get_vio(v->domain, v->vcpu_id);
     if (!vio)
-       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+       panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", 
(unsigned long)vio);
 
 #ifdef V_IOSAPIC_READY
     /* Confirm virtual interrupt line signals, and set pending bits in vpd */
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_vcpu.c
--- a/xen/arch/ia64/vmx/vmx_vcpu.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c      Tue May 30 14:30:34 2006 -0500
@@ -91,7 +91,7 @@ vmx_vcpu_set_psr(VCPU *vcpu, unsigned lo
      * Otherwise panic
      */
     if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
-        panic ("Setting unsupport guest psr!");
+        panic_domain (regs,"Setting unsupport guest psr!");
     }
 
     /*
@@ -206,7 +206,7 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI
     vcpu_get_rr(vcpu, reg, &oldrr.rrval);
     newrr.rrval=val;
     if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits))
-        panic_domain (NULL, "use of invalid rid %lx\n", newrr.rid);
+        panic_domain (NULL, "use of invalid rid %x\n", newrr.rid);
     if(oldrr.ps!=newrr.ps){
         thash_purge_all(vcpu);
     }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_virt.c
--- a/xen/arch/ia64/vmx/vmx_virt.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_virt.c      Tue May 30 14:30:34 2006 -0500
@@ -182,8 +182,9 @@ IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu
 IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
 {
     UINT64 val;
+
     if(vcpu_get_gr_nat(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
-       panic(" get_psr nat bit fault\n");
+       panic_domain(vcpu_regs(vcpu),"get_psr nat bit fault\n");
 
        val = (val & MASK(0, 32)) | (VCPU(vcpu, vpsr) & MASK(32, 32));
 #if 0
@@ -216,7 +217,7 @@ IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST6
     regs=vcpu_regs(vcpu);
     vpsr.val=regs->cr_ipsr;
     if ( vpsr.is == 1 ) {
-        panic ("We do not support IA32 instruction yet");
+        panic_domain(regs,"We do not support IA32 instruction yet");
     }
 
     return vmx_vcpu_rfi(vcpu);
@@ -715,8 +716,9 @@ IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *v
 {
     // I27 and M30 are identical for these fields
     UINT64  imm;
+
     if(inst.M30.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
 #ifdef  CHECK_FAULT
     IA64_PSR vpsr;
@@ -741,7 +743,7 @@ IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *v
     // I26 and M29 are identical for these fields
     u64 r2;
     if(inst.M29.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
     if(vcpu_get_gr_nat(vcpu,inst.M29.r2,&r2)){
 #ifdef  CHECK_FAULT
@@ -769,7 +771,7 @@ IA64FAULT vmx_emul_mov_from_ar_reg(VCPU 
     // I27 and M30 are identical for these fields
     u64 r1;
     if(inst.M31.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
 #ifdef  CHECK_FAULT
     if(check_target_register(vcpu,inst.M31.r1)){
@@ -1359,8 +1361,7 @@ if ( (cause == 0xff && opcode == 0x1e000
     slot_type = slot_types[bundle.template][slot];
     ia64_priv_decoder(slot_type, inst, &cause);
     if(cause==0){
-        printf("This instruction at 0x%lx slot %d can't be  virtualized", iip, 
slot);
-        panic("123456\n");
+        panic_domain(regs,"This instruction at 0x%lx slot %d can't be  
virtualized", iip, slot);
     }
 #else
     inst.inst=opcode;
@@ -1494,12 +1495,8 @@ if ( (cause == 0xff && opcode == 0x1e000
        status=IA64_FAULT;
         break;
     default:
-        printf("unknown cause %ld, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
-        while(1);
-       /* For unknown cause, let hardware to re-execute */
-       status=IA64_RETRY;
-        break;
-//        panic("unknown cause in virtualization intercept");
+        panic_domain(regs,"unknown cause %ld, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
+        break;
     };
 
 #if 0
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vtlb.c  Tue May 30 14:30:34 2006 -0500
@@ -274,36 +274,36 @@ static void vtlb_purge(thash_cb_t *hcb, 
 static void vtlb_purge(thash_cb_t *hcb, u64 va, u64 ps)
 {
     thash_data_t *hash_table, *prev, *next;
-    u64 start, end, size, tag, rid;
+    u64 start, end, size, tag, rid, def_size;
     ia64_rr vrr;
     vcpu_get_rr(current, va, &vrr.rrval);
     rid = vrr.rid;
     size = PSIZE(ps);
     start = va & (-size);
     end = start + size;
+    def_size = PSIZE(vrr.ps);
     while(start < end){
         hash_table = vsa_thash(hcb->pta, start, vrr.rrval, &tag);
-//         tag = ia64_ttag(start);
         if(!INVALID_TLB(hash_table)){
-       if(hash_table->etag == tag){
-            __rem_hash_head(hcb, hash_table);
-       }
-           else{
-           prev=hash_table;
-               next=prev->next;
-               while(next){
-                       if(next->etag == tag){
-                           prev->next=next->next;
-                           cch_free(hcb,next);
-                           hash_table->len--;
-                           break;
-                       }
-                       prev=next;
-                   next=next->next;
-           }
-       }
-        }
-           start += PAGE_SIZE;
+            if(hash_table->etag == tag){
+                __rem_hash_head(hcb, hash_table);
+            }
+            else{
+                prev=hash_table;
+                next=prev->next;
+                while(next){
+                    if(next->etag == tag){
+                        prev->next=next->next;
+                        cch_free(hcb,next);
+                        hash_table->len--;
+                        break;
+                    }
+                    prev=next;
+                    next=next->next;
+                }
+            }
+        }
+        start += def_size;
     }
 //    machine_tlb_purge(va, ps);
 }
@@ -319,26 +319,26 @@ static void vhpt_purge(thash_cb_t *hcb, 
     start = va & (-size);
     end = start + size;
     while(start < end){
-       hash_table = (thash_data_t *)ia64_thash(start);
-           tag = ia64_ttag(start);
-       if(hash_table->etag == tag ){
+        hash_table = (thash_data_t *)ia64_thash(start);
+        tag = ia64_ttag(start);
+        if(hash_table->etag == tag ){
             __rem_hash_head(hcb, hash_table);
-       }
-           else{
-           prev=hash_table;
-               next=prev->next;
-               while(next){
-                       if(next->etag == tag){
-                           prev->next=next->next;
-                           cch_free(hcb,next);
-                           hash_table->len--;
-                           break;
-                       }
-                       prev=next;
-                   next=next->next;
-           }
-       }
-           start += PAGE_SIZE;
+        }
+        else{
+            prev=hash_table;
+            next=prev->next;
+            while(next){
+                if(next->etag == tag){
+                    prev->next=next->next;
+                    cch_free(hcb,next);
+                    hash_table->len--;
+                    break;
+                }
+                prev=next;
+                next=next->next;
+            }
+        }
+        start += PAGE_SIZE;
     }
     machine_tlb_purge(va, ps);
 }
@@ -390,9 +390,9 @@ void vtlb_insert(thash_cb_t *hcb, u64 pt
     vcpu_get_rr(current, va, &vrr.rrval);
     if (vrr.ps != ps) {
 //        machine_tlb_insert(hcb->vcpu, entry);
-       panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d ps=%d\n",
-                    va, vrr.ps, ps);
-       return;
+        panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d 
ps=%ld\n",
+                     va, vrr.ps, ps);
+        return;
     }
     hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
     if( INVALID_TLB(hash_table) ) {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/Makefile
--- a/xen/arch/ia64/xen/Makefile        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/Makefile        Tue May 30 14:30:34 2006 -0500
@@ -2,6 +2,7 @@ obj-y += dom0_ops.o
 obj-y += dom0_ops.o
 obj-y += domain.o
 obj-y += dom_fw.o
+obj-y += efi_emul.o
 obj-y += hpsimserial.o
 obj-y += hypercall.o
 obj-y += hyperprivop.o
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom0_ops.c      Tue May 30 14:30:34 2006 -0500
@@ -151,10 +151,7 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
         put_domain(d);
     }
     break;
-    /*
-     * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
-     * it actually allocates and maps pages.
-     */
+
     case DOM0_GETMEMLIST:
     {
         unsigned long i = 0;
@@ -198,7 +195,8 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
                 ret = -ENOMEM;
 
             op->u.getmemlist.num_pfns = i - start_page;
-            copy_to_guest(u_dom0_op, op, 1);
+            if (copy_to_guest(u_dom0_op, op, 1))
+                ret = -EFAULT;
             
             put_domain(d);
         }
@@ -264,10 +262,6 @@ do_dom0vp_op(unsigned long cmd,
         }
         ret = get_gpfn_from_mfn(arg0);
         break;
-    case IA64_DOM0VP_populate_physmap:
-        ret = dom0vp_populate_physmap(d, arg0,
-                                      (unsigned int)arg1, (unsigned int)arg2);
-        break;
     case IA64_DOM0VP_zap_physmap:
         ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1);
         break;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom_fw.c        Tue May 30 14:30:34 2006 -0500
@@ -462,7 +462,7 @@ static void print_md(efi_memory_desc_t *
 static void print_md(efi_memory_desc_t *md)
 {
 #if 1
-       printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) 
(%luMB)\n",
+       printk("domain mem: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) 
(%luMB)\n",
                md->type, md->attribute, md->phys_addr,
                md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
                md->num_pages >> (20 - EFI_PAGE_SHIFT));
@@ -541,7 +541,7 @@ struct fake_acpi_tables {
        struct fadt_descriptor_rev2 fadt;
        struct facs_descriptor_rev2 facs;
        struct acpi_table_header dsdt;
-       u8 aml[16];
+       u8 aml[8 + 11 * MAX_VIRT_CPUS];
        struct acpi_table_madt madt;
        struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS];
        u8 pm1a_evt_blk[4];
@@ -561,6 +561,7 @@ dom_fw_fake_acpi(struct domain *d, struc
        struct acpi_table_madt *madt = &tables->madt;
        struct acpi_table_lsapic *lsapic = tables->lsapic;
        int i;
+       int aml_len;
 
        memset(tables, 0, sizeof(struct fake_acpi_tables));
 
@@ -629,7 +630,6 @@ dom_fw_fake_acpi(struct domain *d, struc
        /* setup DSDT with trivial namespace. */ 
        strncpy(dsdt->signature, DSDT_SIG, 4);
        dsdt->revision = 1;
-       dsdt->length = sizeof(struct acpi_table_header) + sizeof(tables->aml);
        strcpy(dsdt->oem_id, "XEN");
        strcpy(dsdt->oem_table_id, "Xen/ia64");
        strcpy(dsdt->asl_compiler_id, "XEN");
@@ -637,15 +637,33 @@ dom_fw_fake_acpi(struct domain *d, struc
 
        /* Trivial namespace, avoids ACPI CA complaints */
        tables->aml[0] = 0x10; /* Scope */
-       tables->aml[1] = 0x12; /* length/offset to next object */
-       strncpy((char *)&tables->aml[2], "_SB_", 4);
+       tables->aml[1] = 0x40; /* length/offset to next object (patched) */
+       tables->aml[2] = 0x00;
+       strncpy((char *)&tables->aml[3], "_SB_", 4);
 
        /* The processor object isn't absolutely necessary, revist for SMP */
-       tables->aml[6] = 0x5b; /* processor object */
-       tables->aml[7] = 0x83;
-       tables->aml[8] = 0x0b; /* next */
-       strncpy((char *)&tables->aml[9], "CPU0", 4);
-
+       aml_len = 7;
+       for (i = 0; i < 3; i++) {
+               unsigned char *p = tables->aml + aml_len;
+               p[0] = 0x5b; /* processor object */
+               p[1] = 0x83;
+               p[2] = 0x0b; /* next */
+               p[3] = 'C';
+               p[4] = 'P';
+               snprintf ((char *)p + 5, 3, "%02x", i);
+               if (i < 16)
+                       p[5] = 'U';
+               p[7] = i;       /* acpi_id */
+               p[8] = 0;       /* pblk_addr */
+               p[9] = 0;
+               p[10] = 0;
+               p[11] = 0;
+               p[12] = 0;      /* pblk_len */
+               aml_len += 13;
+       }
+       tables->aml[1] = 0x40 + ((aml_len - 1) & 0x0f);
+       tables->aml[2] = (aml_len - 1) >> 4;
+       dsdt->length = sizeof(struct acpi_table_header) + aml_len;
        dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length);
 
        /* setup MADT */
@@ -662,6 +680,7 @@ dom_fw_fake_acpi(struct domain *d, struc
        for (i = 0; i < MAX_VIRT_CPUS; i++) {
                lsapic[i].header.type = ACPI_MADT_LSAPIC;
                lsapic[i].header.length = sizeof(struct acpi_table_lsapic);
+               lsapic[i].acpi_id = i;
                lsapic[i].id = i;
                lsapic[i].eid = 0;
                lsapic[i].flags.enabled = (d->vcpu[i] != NULL);
@@ -798,6 +817,9 @@ dom_fw_init (struct domain *d, const cha
        pfn         = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
        cmd_line    = (void *) cp;
 
+       /* Initialise for EFI_SET_VIRTUAL_ADDRESS_MAP emulation */
+       d->arch.efi_runtime = efi_runtime;
+
        if (args) {
                if (arglen >= 1024)
                        arglen = 1023;
@@ -959,7 +981,7 @@ dom_fw_init (struct domain *d, const cha
                MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX
 #endif
                /* hypercall patches live here, masquerade as reserved PAL 
memory */
-               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
 0);
                
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE,
 0);//XXX make sure this doesn't overlap on i/o, runtime area.
 #ifndef CONFIG_XEN_IA64_DOM0_VP
 /* hack */     
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1);
@@ -993,7 +1015,7 @@ dom_fw_init (struct domain *d, const cha
                MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1);
 #endif
                /* hypercall patches live here, masquerade as reserved PAL 
memory */
-               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1);
+               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
 1);
                
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1);
                /* Create a dummy entry for IO ports, so that IO accesses are
                   trapped by Xen.  */
@@ -1009,7 +1031,7 @@ dom_fw_init (struct domain *d, const cha
        BUG_ON(i > NUM_MEM_DESCS);
        bp->efi_memmap_size = i * sizeof(efi_memory_desc_t);
        bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
-       bp->efi_memdesc_version = 1;
+       bp->efi_memdesc_version = EFI_MEMDESC_VERSION;
        bp->command_line = dom_pa((unsigned long) cmd_line);
        bp->console_info.num_cols = 80;
        bp->console_info.num_rows = 25;
@@ -1019,7 +1041,8 @@ dom_fw_init (struct domain *d, const cha
        if (d == dom0) {
                // XXX CONFIG_XEN_IA64_DOM0_VP
                // initrd_start address is hard coded in start_kernel()
-               bp->initrd_start = ia64_boot_param->initrd_start;
+               bp->initrd_start = (dom0_start+dom0_size) -
+                 (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
                bp->initrd_size = ia64_boot_param->initrd_size;
        }
        else {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/domain.c        Tue May 30 14:30:34 2006 -0500
@@ -77,36 +77,19 @@ static void init_switch_stack(struct vcp
 static void init_switch_stack(struct vcpu *v);
 void build_physmap_table(struct domain *d);
 
+static void try_to_clear_PGC_allocate(struct domain* d,
+                                      struct page_info* page);
+
 /* this belongs in include/asm, but there doesn't seem to be a suitable place 
*/
 void arch_domain_destroy(struct domain *d)
 {
-       struct page_info *page;
-       struct list_head *ent, *prev;
-
-       if (d->arch.mm->pgd != NULL)
-       {
-               list_for_each ( ent, &d->arch.mm->pt_list )
-               {
-                       page = list_entry(ent, struct page_info, list);
-                       prev = ent->prev;
-                       list_del(ent);
-                       free_xenheap_page(page_to_virt(page));
-                       ent = prev;
-               }
-               pgd_free(d->arch.mm->pgd);
-       }
-       if (d->arch.mm != NULL)
-               xfree(d->arch.mm);
+       BUG_ON(d->arch.mm.pgd != NULL);
        if (d->shared_info != NULL)
                free_xenheap_page(d->shared_info);
 
+       domain_flush_destroy (d);
+
        deallocate_rid_range(d);
-
-       /* It is really good in this? */
-       flush_tlb_all();
-
-       /* It is really good in this? */
-       vhpt_flush_all();
 }
 
 static void default_idle(void)
@@ -179,7 +162,6 @@ struct vcpu *alloc_vcpu_struct(struct do
                memset(&d->shared_info->evtchn_mask[0], 0xff,
                    sizeof(d->shared_info->evtchn_mask));
 
-           v->vcpu_info = &(d->shared_info->vcpu_info[0]);
            v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
            v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
            v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
@@ -239,7 +221,8 @@ int arch_domain_create(struct domain *d)
        // the following will eventually need to be negotiated dynamically
        d->xen_vastart = XEN_START_ADDR;
        d->xen_vaend = XEN_END_ADDR;
-       d->shared_info_va = SHAREDINFO_ADDR;
+       d->arch.shared_info_va = SHAREDINFO_ADDR;
+       d->arch.breakimm = 0x1000;
 
        if (is_idle_domain(d))
            return 0;
@@ -255,26 +238,20 @@ int arch_domain_create(struct domain *d)
         */
        if (!allocate_rid_range(d,0))
                goto fail_nomem;
-       d->arch.breakimm = 0x1000;
        d->arch.sys_pgnr = 0;
 
-       if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL)
-           goto fail_nomem;
-       memset(d->arch.mm, 0, sizeof(*d->arch.mm));
-       INIT_LIST_HEAD(&d->arch.mm->pt_list);
+       memset(&d->arch.mm, 0, sizeof(d->arch.mm));
 
        d->arch.physmap_built = 0;
-       if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL)
+       if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
            goto fail_nomem;
 
        printf ("arch_domain_create: domain=%p\n", d);
        return 0;
 
 fail_nomem:
-       if (d->arch.mm->pgd != NULL)
-           pgd_free(d->arch.mm->pgd);
-       if (d->arch.mm != NULL)
-           xfree(d->arch.mm);
+       if (d->arch.mm.pgd != NULL)
+           pgd_free(d->arch.mm.pgd);
        if (d->shared_info != NULL)
            free_xenheap_page(d->shared_info);
        return -ENOMEM;
@@ -282,11 +259,7 @@ fail_nomem:
 
 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
 {
-       struct pt_regs *regs = vcpu_regs (v);
-
-       c->regs = *regs;
-       c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
-
+       c->regs = *vcpu_regs (v);
        c->shared = v->domain->shared_info->arch;
 }
 
@@ -325,11 +298,10 @@ int arch_set_info_guest(struct vcpu *v, 
        }
        new_thread(v, regs->cr_iip, 0, 0);
 
-       v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
-       if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
-                          c->vcpu.privregs, sizeof(mapped_regs_t))) {
+       if ( c->privregs && copy_from_user(v->arch.privregs,
+                          c->privregs, sizeof(mapped_regs_t))) {
            printk("Bad ctxt address in arch_set_info_guest: %p\n",
-                  c->vcpu.privregs);
+                  c->privregs);
            return -EFAULT;
        }
 
@@ -394,19 +366,129 @@ static void relinquish_memory(struct dom
 
         /* Follow the list chain and /then/ potentially free the page. */
         ent = ent->next;
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#if 1
+        BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
+#else
+        //XXX this should be done at traversing the P2M table.
+        if (page_get_owner(page) == d)
+            set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+#endif
+#endif
         put_page(page);
     }
 
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
+static void
+relinquish_pte(struct domain* d, pte_t* pte)
+{
+    unsigned long mfn = pte_pfn(*pte);
+    struct page_info* page;
+
+    // vmx domain use bit[58:56] to distinguish io region from memory.
+    // see vmx_build_physmap_table() in vmx_init.c
+    if (((mfn << PAGE_SHIFT) & GPFN_IO_MASK) != GPFN_MEM)
+        return;
+
+    // domain might map IO space or acpi table pages. check it.
+    if (!mfn_valid(mfn))
+        return;
+    page = mfn_to_page(mfn);
+    // struct page_info corresponding to mfn may exist or not depending
+    // on CONFIG_VIRTUAL_FRAME_TABLE.
+    // This check is too easy.
+    // The right way is to check whether this page is of io area or acpi pages
+    if (page_get_owner(page) == NULL) {
+        BUG_ON(page->count_info != 0);
+        return;
+    }
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+    if (page_get_owner(page) == d) {
+        BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
+        set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+    }
+#endif
+    try_to_clear_PGC_allocate(d, page);
+    put_page(page);
+}
+
+static void
+relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
+{
+    unsigned long i;
+    pte_t* pte = pte_offset_map(pmd, offset);
+
+    for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+        if (!pte_present(*pte))
+            continue;
+        
+        relinquish_pte(d, pte);
+    }
+    pte_free_kernel(pte_offset_map(pmd, offset));
+}
+
+static void
+relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
+{
+    unsigned long i;
+    pmd_t *pmd = pmd_offset(pud, offset);
+    
+    for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+        if (!pmd_present(*pmd))
+            continue;
+        
+        relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
+    }
+    pmd_free(pmd_offset(pud, offset));
+}
+
+static void
+relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
+{
+    unsigned long i;
+    pud_t *pud = pud_offset(pgd, offset);
+
+    for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+        if (!pud_present(*pud))
+            continue;
+
+        relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
+    }
+    pud_free(pud_offset(pgd, offset));
+}
+
+static void
+relinquish_mm(struct domain* d)
+{
+    struct mm_struct* mm = &d->arch.mm;
+    unsigned long i;
+    pgd_t* pgd;
+
+    if (mm->pgd == NULL)
+        return;
+
+    pgd = pgd_offset(mm, 0);
+    for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+        if (!pgd_present(*pgd))
+            continue;
+
+        relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
+    }
+    pgd_free(mm->pgd);
+    mm->pgd = NULL;
+}
+
 void domain_relinquish_resources(struct domain *d)
 {
     /* Relinquish every page of memory. */
 
-    /* xenheap_list is not used in ia64. */
-    BUG_ON(!list_empty(&d->xenpage_list));
-
+    // relase page traversing d->arch.mm.
+    relinquish_mm(d);
+
+    relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
 }
 
@@ -483,11 +565,58 @@ void new_thread(struct vcpu *v,
        }
 }
 
+// stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
+void
+share_xen_page_with_guest(struct page_info *page,
+                          struct domain *d, int readonly)
+{
+    if ( page_get_owner(page) == d )
+        return;
+
+#if 1
+    if (readonly) {
+        printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
+    }
+#endif
+
+    // alloc_xenheap_pages() doesn't initialize page owner.
+    //BUG_ON(page_get_owner(page) != NULL);
+#if 0
+    if (get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY) {
+        printk("%s:%d page 0x%p mfn 0x%lx gpfn 0x%lx\n", __func__, __LINE__,
+               page, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)));
+    }
+#endif
+    // grant_table_destroy() release these pages.
+    // but it doesn't clear m2p entry. So there might remain stale entry.
+    // We clear such a stale entry here.
+    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+
+    spin_lock(&d->page_alloc_lock);
+
+#ifndef __ia64__
+    /* The incremented type count pins as writable or read-only. */
+    page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
+    page->u.inuse.type_info |= PGT_validated | 1;
+#endif
+
+    page_set_owner(page, d);
+    wmb(); /* install valid domain ptr before updating refcnt. */
+    ASSERT(page->count_info == 0);
+    page->count_info |= PGC_allocated | 1;
+
+    if ( unlikely(d->xenheap_pages++ == 0) )
+        get_knownalive_domain(d);
+    list_add_tail(&page->list, &d->xenpage_list);
+
+    spin_unlock(&d->page_alloc_lock);
+}
+
+//XXX !xxx_present() should be used instread of !xxx_none()?
 static pte_t*
 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
-    struct page_info *pt;
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -496,22 +625,16 @@ lookup_alloc_domain_pte(struct domain* d
     pgd = pgd_offset(mm, mpaddr);
     if (pgd_none(*pgd)) {
         pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
-        pt = maddr_to_page(pgd_val(*pgd));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     pud = pud_offset(pgd, mpaddr);
     if (pud_none(*pud)) {
         pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
-        pt = maddr_to_page(pud_val(*pud));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     pmd = pmd_offset(pud, mpaddr);
     if (pmd_none(*pmd)) {
         pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
-        pt = maddr_to_page(pmd_val(*pmd));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     return pte_offset_map(pmd, mpaddr);
@@ -521,7 +644,7 @@ static pte_t*
 static pte_t*
 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -549,7 +672,7 @@ static pte_t*
 static pte_t*
 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -581,6 +704,7 @@ __assign_new_domain_page(struct domain *
 {
     struct page_info *p = NULL;
     unsigned long maddr;
+    int ret;
 
     BUG_ON(!pte_none(*pte));
 
@@ -601,14 +725,13 @@ __assign_new_domain_page(struct domain *
 #endif
 
     p = alloc_domheap_page(d);
-    // zero out pages for security reasons
-    if (p)
-        clear_page(page_to_virt(p));
-
     if (unlikely(!p)) {
         printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
         return(p);
     }
+
+    // zero out pages for security reasons
+    clear_page(page_to_virt(p));
     maddr = page_to_maddr (p);
     if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
                  && maddr < __get_cpu_var(vhpt_pend))) {
@@ -618,13 +741,15 @@ __assign_new_domain_page(struct domain *
                maddr);
     }
 
+    ret = get_page(p, d);
+    BUG_ON(ret == 0);
     set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
                          __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
 
+    mb ();
     //XXX CONFIG_XEN_IA64_DOM0_VP
     //    TODO racy
-    if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
-        set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
+    set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
     return p;
 }
 
@@ -668,21 +793,38 @@ assign_new_domain0_page(struct domain *d
 }
 
 /* map a physical address to the specified metaphysical addr */
-void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long 
physaddr)
-{
-       pte_t *pte;
-
-       pte = lookup_alloc_domain_pte(d, mpaddr);
-       if (pte_none(*pte)) {
-               set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
-                       __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
-
-       //XXX CONFIG_XEN_IA64_DOM0_VP
-       //    TODO racy
-       if ((physaddr & GPFN_IO_MASK) == GPFN_MEM)
-               set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
-       }
-       else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+void
+__assign_domain_page(struct domain *d,
+                     unsigned long mpaddr, unsigned long physaddr)
+{
+    pte_t *pte;
+
+    pte = lookup_alloc_domain_pte(d, mpaddr);
+    if (pte_none(*pte)) {
+        set_pte(pte,
+                pfn_pte(physaddr >> PAGE_SHIFT,
+                        __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+        mb ();
+    } else
+        printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr);
+}
+
+/* get_page() and map a physical address to the specified metaphysical addr */
+void
+assign_domain_page(struct domain *d,
+                   unsigned long mpaddr, unsigned long physaddr)
+{
+    struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
+    int ret;
+
+    BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
+    ret = get_page(page, d);
+    BUG_ON(ret == 0);
+    __assign_domain_page(d, mpaddr, physaddr);
+
+    //XXX CONFIG_XEN_IA64_DOM0_VP
+    //    TODO racy
+    set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
 }
 
 #ifdef CONFIG_XEN_IA64_DOM0_VP
@@ -693,8 +835,58 @@ assign_domain_same_page(struct domain *d
     //XXX optimization
     unsigned long end = mpaddr + size;
     for (; mpaddr < end; mpaddr += PAGE_SIZE) {
-        assign_domain_page(d, mpaddr, mpaddr);
-    }
+        __assign_domain_page(d, mpaddr, mpaddr);
+    }
+}
+
+static int
+efi_mmio(unsigned long physaddr, unsigned long size)
+{
+    void *efi_map_start, *efi_map_end;
+    u64 efi_desc_size;
+    void* p;
+
+    efi_map_start = __va(ia64_boot_param->efi_memmap);
+    efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+    efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+    for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+        efi_memory_desc_t* md = (efi_memory_desc_t *)p;
+        unsigned long start = md->phys_addr;
+        unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+        
+        if (start <= physaddr && physaddr < end) {
+            if ((physaddr + size) > end) {
+                DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+                        __func__, __LINE__, physaddr, size);
+                return 0;
+            }
+
+            // for io space
+            if (md->type == EFI_MEMORY_MAPPED_IO ||
+                md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+                return 1;
+            }
+
+            // for runtime
+            // see efi_enter_virtual_mode(void)
+            // in linux/arch/ia64/kernel/efi.c
+            if ((md->attribute & EFI_MEMORY_RUNTIME) &&
+                !(md->attribute & EFI_MEMORY_WB)) {
+                return 1;
+            }
+
+            DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+                    __func__, __LINE__, physaddr, size);
+            return 0;
+        }
+
+        if (physaddr < start) {
+            break;
+        }
+    }
+
+    return 1;
 }
 
 unsigned long
@@ -704,6 +896,11 @@ assign_domain_mmio_page(struct domain *d
     if (size == 0) {
         DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
                 __func__, d, mpaddr, size);
+    }
+    if (!efi_mmio(mpaddr, size)) {
+        DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
+                __func__, __LINE__, d, mpaddr, size);
+        return -EINVAL;
     }
     assign_domain_same_page(d, mpaddr, size);
     return mpaddr;
@@ -723,23 +920,55 @@ domain_page_flush(struct domain* d, unsi
 domain_page_flush(struct domain* d, unsigned long mpaddr,
                   unsigned long old_mfn, unsigned long new_mfn)
 {
-    struct vcpu* v;
-    //XXX SMP
-    for_each_vcpu(d, v) {
-        vcpu_purge_tr_entry(&v->arch.dtlb);
-        vcpu_purge_tr_entry(&v->arch.itlb);
-    }
-
-    // flush vhpt
-    vhpt_flush();
-    // flush tlb
-    flush_tlb_all();
-}
-
+    domain_flush_vtlb_all();
+}
+#endif
+
+//XXX heavily depends on the struct page_info layout.
+//
+// if (page_get_owner(page) == d &&
+//     test_and_clear_bit(_PGC_allocated, &page->count_info)) {
+//     put_page(page);
+// }
 static void
-zap_domain_page_one(struct domain *d, unsigned long mpaddr)
-{
-    struct mm_struct *mm = d->arch.mm;
+try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
+{
+    u32 _d, _nd;
+    u64 x, nx, y;
+
+    _d = pickle_domptr(d);
+    y = *((u64*)&page->count_info);
+    do {
+        x = y;
+        _nd = x >> 32;
+        nx = x - 1;
+        __clear_bit(_PGC_allocated, &nx);
+
+        if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
+            struct domain* nd = unpickle_domptr(_nd);
+            if (nd == NULL) {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            }
+            break;
+        }
+
+        BUG_ON((nx & PGC_count_mask) < 1);
+        y = cmpxchg((u64*)&page->count_info, x, nx);
+    } while (unlikely(y != x));
+}
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+static void
+zap_domain_page_one(struct domain *d, unsigned long mpaddr, int do_put_page)
+{
+    struct mm_struct *mm = &d->arch.mm;
     pte_t *pte;
     pte_t old_pte;
     unsigned long mfn;
@@ -755,6 +984,7 @@ zap_domain_page_one(struct domain *d, un
     old_pte = ptep_get_and_clear(mm, mpaddr, pte);
     mfn = pte_pfn(old_pte);
     page = mfn_to_page(mfn);
+    BUG_ON((page->count_info & PGC_count_mask) == 0);
 
     if (page_get_owner(page) == d) {
         BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
@@ -763,7 +993,10 @@ zap_domain_page_one(struct domain *d, un
 
     domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
 
-    put_page(page);
+    if (do_put_page) {
+        try_to_clear_PGC_allocate(d, page);
+        put_page(page);
+    }
 }
 #endif
 
@@ -867,66 +1100,6 @@ unsigned long lookup_domain_mpa(struct d
 #ifdef CONFIG_XEN_IA64_DOM0_VP
 //XXX SMP
 unsigned long
-dom0vp_populate_physmap(struct domain *d, unsigned long gpfn,
-                        unsigned int extent_order, unsigned int address_bits)
-{
-    unsigned long ret = 0;
-    int flags = 0;
-    unsigned long mpaddr = gpfn << PAGE_SHIFT;
-    unsigned long extent_size = 1UL << extent_order;
-    unsigned long offset;
-    struct page_info* page;
-    unsigned long physaddr;
-
-    if (extent_order > 0 && !multipage_allocation_permitted(d)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (gpfn + (1 << extent_order) < gpfn) {
-        ret = -EINVAL;
-        goto out;
-    }
-    if (gpfn > d->max_pages || gpfn + (1 << extent_order) > d->max_pages) {
-        ret = -EINVAL;
-        goto out;
-    }
-    if ((extent_size << PAGE_SHIFT) < extent_size) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    //XXX check address_bits and set flags = ALLOC_DOM_DMA if needed
-
-    // check the rage is not populated yet.
-    //XXX loop optimization
-    for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
-        if (____lookup_domain_mpa(d, mpaddr + offset) != INVALID_MFN) {
-            ret = -EBUSY;
-            goto out;
-        }
-    }
-
-    page = alloc_domheap_pages(d, extent_order, flags);
-    if (page == NULL) {
-        ret = -ENOMEM;
-        DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n",
-                extent_order, d->domain_id, flags);
-        goto out;
-    }
-
-    //XXX loop optimization
-    physaddr = page_to_maddr(page);
-    for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
-        assign_domain_page(d, mpaddr + offset, physaddr + offset);
-    }
-
-out:
-    return ret;
-}
-
-//XXX SMP
-unsigned long
 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
                    unsigned int extent_order)
 {
@@ -937,26 +1110,28 @@ dom0vp_zap_physmap(struct domain *d, uns
         goto out;
     }
 
-    zap_domain_page_one(d, gpfn << PAGE_SHIFT);
+    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
 
 out:
     return ret;
 }
 
+// caller must get_page(mfn_to_page(mfn)) before
+// caller must call set_gpfn_from_mfn().
 static void
 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
                            unsigned long mfn, unsigned int flags)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pte_t* pte;
     pte_t old_pte;
+    pte_t npte;
 
     pte = lookup_alloc_domain_pte(d, mpaddr);
 
     // update pte
-    old_pte = ptep_get_and_clear(mm, mpaddr, pte);
-    set_pte(pte, pfn_pte(mfn,
-                         __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+    npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+    old_pte = ptep_xchg(mm, mpaddr, pte, npte);
     if (!pte_none(old_pte)) {
         unsigned long old_mfn;
         struct page_info* old_page;
@@ -973,8 +1148,10 @@ assign_domain_page_replace(struct domain
 
         domain_page_flush(d, mpaddr, old_mfn, mfn);
 
+        try_to_clear_PGC_allocate(d, old_page);
         put_page(old_page);
     } else {
+        BUG_ON(!mfn_valid(mfn));
         BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
                get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
     }
@@ -1002,17 +1179,195 @@ dom0vp_add_physmap(struct domain* d, uns
     }
 
     assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* flags:XXX */);
+    //don't update p2m table because this page belongs to rd, not d.
 out1:
     put_domain(rd);
 out0:
     return error;
 }
+
+// grant table host mapping
+// mpaddr: host_addr: pseudo physical address
+// mfn: frame: machine page frame
+// flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
+int
+create_grant_host_mapping(unsigned long gpaddr,
+                         unsigned long mfn, unsigned int flags)
+{
+    struct domain* d = current->domain;
+    struct page_info* page;
+    int ret;
+
+    if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+        DPRINTK("%s: flags 0x%x\n", __func__, flags);
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_readonly) {
+#if 0
+        DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+                __func__, flags);
+#endif
+        flags &= ~GNTMAP_readonly;
+    }
+
+    page = mfn_to_page(mfn);
+    ret = get_page(page, page_get_owner(page));
+    BUG_ON(ret == 0);
+    assign_domain_page_replace(d, gpaddr, mfn, flags);
+
+    return GNTST_okay;
+}
+
+// grant table host unmapping
+int
+destroy_grant_host_mapping(unsigned long gpaddr,
+                          unsigned long mfn, unsigned int flags)
+{
+    struct domain* d = current->domain;
+    pte_t* pte;
+    pte_t old_pte;
+    unsigned long old_mfn = INVALID_MFN;
+    struct page_info* old_page;
+
+    if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+        DPRINTK("%s: flags 0x%x\n", __func__, flags);
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_readonly) {
+#if 0
+        DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+                __func__, flags);
+#endif
+        flags &= ~GNTMAP_readonly;
+    }
+
+    pte = lookup_noalloc_domain_pte(d, gpaddr);
+    if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
+        return GNTST_general_error;//XXX GNTST_bad_pseudo_phys_addr
+
+    // update pte
+    old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
+    if (pte_present(old_pte)) {
+        old_mfn = pte_pfn(old_pte);//XXX
+    }
+    domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
+
+    old_page = mfn_to_page(old_mfn);
+    BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) 
is not needed.
+    put_page(old_page);
+
+    return GNTST_okay;
+}
+
+//XXX needs refcount patch
+//XXX heavily depends on the struct page layout.
+//XXX SMP
+int
+steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
+{
+#if 0 /* if big endian */
+# error "implement big endian version of steal_page_for_grant_transfer()"
+#endif
+    u32 _d, _nd;
+    u64 x, nx, y;
+    unsigned long mpaddr = get_gpfn_from_mfn(page_to_mfn(page)) << PAGE_SHIFT;
+    struct page_info *new;
+
+    zap_domain_page_one(d, mpaddr, 0);
+    put_page(page);
+
+    spin_lock(&d->page_alloc_lock);
+
+    /*
+     * The tricky bit: atomically release ownership while there is just one
+     * benign reference to the page (PGC_allocated). If that reference
+     * disappears then the deallocation routine will safely spin.
+     */
+    _d  = pickle_domptr(d);
+    y = *((u64*)&page->count_info);
+    do {
+        x = y;
+        nx = x & 0xffffffff;
+        // page->count_info: untouched
+        // page->u.inused._domain = 0;
+        _nd = x >> 32;
+
+        if (unlikely((x & (PGC_count_mask | PGC_allocated)) !=
+                     (1 | PGC_allocated)) ||
+            unlikely(_nd != _d)) {
+            struct domain* nd = unpickle_domptr(_nd);
+            if (nd == NULL) {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            } else {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p(%u) 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, nd->domain_id, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            }
+            spin_unlock(&d->page_alloc_lock);
+            return -1;
+        }
+
+        y = cmpxchg((u64*)&page->count_info, x, nx);
+    } while (unlikely(y != x));
+
+    /*
+     * Unlink from 'd'. At least one reference remains (now anonymous), so
+     * noone else is spinning to try to delete this page from 'd'.
+     */
+    d->tot_pages--;
+    list_del(&page->list);
+
+    spin_unlock(&d->page_alloc_lock);
+
+#if 1
+    //XXX Until net_rx_action() fix
+    // assign new page for this mpaddr
+    new = assign_new_domain_page(d, mpaddr);
+    BUG_ON(new == NULL);//XXX
+#endif
+
+    return 0;
+}
+
+void
+guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+                       unsigned long mfn)
+{
+    int ret;
+
+    ret = get_page(mfn_to_page(mfn), d);
+    BUG_ON(ret == 0);
+    assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* XXX */);
+    set_gpfn_from_mfn(mfn, gpfn);//XXX SMP
+
+    //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> 
PAGE_SHIFT));
+}
+
+void
+guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+                          unsigned long mfn)
+{
+    BUG_ON(mfn == 0);//XXX
+    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
+}
 #endif
 
 /* Flush cache of domain d.  */
 void domain_cache_flush (struct domain *d, int sync_only)
 {
-       struct mm_struct *mm = d->arch.mm;
+       struct mm_struct *mm = &d->arch.mm;
        pgd_t *pgd = mm->pgd;
        unsigned long maddr;
        int i,j,k, l;
@@ -1478,9 +1833,9 @@ void domain_pend_keyboard_interrupt(int 
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-       __ia64_save_fpu(v->arch._thread.fph);
-       if (VMX_DOMAIN(v))
-               vmx_save_state(v);
+//     __ia64_save_fpu(v->arch._thread.fph);
+//     if (VMX_DOMAIN(v))
+//             vmx_save_state(v);
        // FIXME SMP: Anything else needed here for SMP?
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hypercall.c     Tue May 30 14:30:34 2006 -0500
@@ -26,7 +26,6 @@
 #include <public/physdev.h>
 #include <xen/domain.h>
 
-extern unsigned long translate_domain_mpaddr(unsigned long);
 static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop);
 static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg);
 /* FIXME: where these declarations should be there ? */
@@ -71,13 +70,39 @@ hypercall_t ia64_hypercall_table[] =
        (hypercall_t)do_ni_hypercall,           /*  */                          
/* 30 */
        (hypercall_t)do_ni_hypercall,           /*  */
        (hypercall_t)do_event_channel_op,
-       (hypercall_t)do_physdev_op
+       (hypercall_t)do_physdev_op,
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 35 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 40 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 45 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+       (hypercall_t)do_dom0vp_op,                      /* dom0vp_op */
+#else
+       (hypercall_t)do_ni_hypercall,           /* arch_0 */
+#endif
+       (hypercall_t)do_ni_hypercall,           /* arch_1 */
+       (hypercall_t)do_ni_hypercall,           /* arch_2 */            /* 50 */
+       (hypercall_t)do_ni_hypercall,           /* arch_3 */
+       (hypercall_t)do_ni_hypercall,           /* arch_4 */
+       (hypercall_t)do_ni_hypercall,           /* arch_5 */
+       (hypercall_t)do_ni_hypercall,           /* arch_6 */
+       (hypercall_t)do_ni_hypercall            /* arch_7 */            /* 55 */
        };
 
 uint32_t nr_hypercalls =
        sizeof(ia64_hypercall_table) / sizeof(hypercall_t);
 
-static int
+static IA64FAULT
 xen_hypercall (struct pt_regs *regs)
 {
        uint32_t cmd = (uint32_t)regs->r2;
@@ -91,15 +116,9 @@ xen_hypercall (struct pt_regs *regs)
                        regs->r18,
                        regs->r19);
        else
-#ifdef CONFIG_XEN_IA64_DOM0_VP
-       if (cmd ==  __HYPERVISOR_ia64_dom0vp_op) 
-               regs->r8 = do_dom0vp_op(regs->r14, regs->r15, regs->r16,
-                                       regs->r17, regs->r18);
-       else
-#endif
                regs->r8 = -ENOSYS;
 
-       return 1;
+       return IA64_NO_FAULT;
 }
 
 
@@ -134,9 +153,6 @@ fw_hypercall_ipi (struct pt_regs *regs)
                c.regs.cr_iip = targ_regs->cr_iip;
                c.regs.r1 = targ_regs->r1;
                
-               /* Copy from vcpu 0.  */
-               c.vcpu.evtchn_vector =
-                       current->domain->vcpu[0]->vcpu_info->arch.evtchn_vector;
                if (arch_set_info_guest (targ, &c) != 0) {
                        printf ("arch_boot_vcpu: failure\n");
                        return;
@@ -162,14 +178,16 @@ fw_hypercall_ipi (struct pt_regs *regs)
        return;
 }
 
-static int
+static IA64FAULT
 fw_hypercall (struct pt_regs *regs)
 {
        struct vcpu *v = current;
        struct sal_ret_values x;
-       unsigned long *tv, *tc;
-
-       switch (regs->r2) {
+       efi_status_t efi_ret_value;
+       IA64FAULT fault; 
+       unsigned long index = regs->r2 & FW_HYPERCALL_NUM_MASK_HIGH;
+
+       switch (index) {
            case FW_HYPERCALL_PAL_CALL:
                //printf("*** PAL hypercall: index=%d\n",regs->r28);
                //FIXME: This should call a C routine
@@ -227,40 +245,10 @@ fw_hypercall (struct pt_regs *regs)
                regs->r8 = x.r8; regs->r9 = x.r9;
                regs->r10 = x.r10; regs->r11 = x.r11;
                break;
-           case FW_HYPERCALL_EFI_RESET_SYSTEM:
-               printf("efi.reset_system called ");
-               if (current->domain == dom0) {
-                       printf("(by dom0)\n ");
-                       (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
-               }
-               else
-                       domain_shutdown (current->domain, SHUTDOWN_reboot);
-               regs->r8 = EFI_UNSUPPORTED;
-               break;
-           case FW_HYPERCALL_EFI_GET_TIME:
-               tv = (unsigned long *) vcpu_get_gr(v,32);
-               tc = (unsigned long *) vcpu_get_gr(v,33);
-               //printf("efi_get_time(%p,%p) called...",tv,tc);
-               tv = (unsigned long *) __va(translate_domain_mpaddr((unsigned 
long) tv));
-               if (tc) tc = (unsigned long *) 
__va(translate_domain_mpaddr((unsigned long) tc));
-               regs->r8 = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t 
*) tc);
-               //printf("and returns %lx\n",regs->r8);
-               break;
-           case FW_HYPERCALL_EFI_SET_TIME:
-           case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
-           case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
-               // FIXME: need fixes in efi.h from 2.6.9
-           case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
-               // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
-               // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS 
-               // POINTER ARGUMENTS WILL BE VIRTUAL!!
-           case FW_HYPERCALL_EFI_GET_VARIABLE:
-               // FIXME: need fixes in efi.h from 2.6.9
-           case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
-           case FW_HYPERCALL_EFI_SET_VARIABLE:
-           case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
-               // FIXME: need fixes in efi.h from 2.6.9
-               regs->r8 = EFI_UNSUPPORTED;
+           case FW_HYPERCALL_EFI_CALL:
+               efi_ret_value = efi_emulator (regs, &fault);
+               if (fault != IA64_NO_FAULT) return fault;
+               regs->r8 = efi_ret_value;
                break;
            case FW_HYPERCALL_IPI:
                fw_hypercall_ipi (regs);
@@ -269,7 +257,7 @@ fw_hypercall (struct pt_regs *regs)
                printf("unknown ia64 fw hypercall %lx\n", regs->r2);
                regs->r8 = do_ni_hypercall();
        }
-       return 1;
+       return IA64_NO_FAULT;
 }
 
 /* opt_unsafe_hypercall: If true, unsafe debugging hypercalls are allowed.
@@ -277,7 +265,7 @@ static int opt_unsafe_hypercall = 0;
 static int opt_unsafe_hypercall = 0;
 boolean_param("unsafe_hypercall", opt_unsafe_hypercall);
 
-int
+IA64FAULT
 ia64_hypercall (struct pt_regs *regs)
 {
        struct vcpu *v = current;
@@ -307,7 +295,7 @@ ia64_hypercall (struct pt_regs *regs)
                        printf("unknown user xen/ia64 hypercall %lx\n", index);
                        regs->r8 = do_ni_hypercall();
            }
-           return 1;
+           return IA64_NO_FAULT;
        }
 
        /* Hypercalls are only allowed by kernel.
@@ -316,7 +304,7 @@ ia64_hypercall (struct pt_regs *regs)
            /* FIXME: Return a better error value ?
               Reflection ? Illegal operation ?  */
            regs->r8 = -1;
-           return 1;
+           return IA64_NO_FAULT;
        }
 
        if (index >= FW_HYPERCALL_FIRST_ARCH)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hyperprivop.S   Tue May 30 14:30:34 2006 -0500
@@ -30,7 +30,7 @@
 #undef FAST_ITC        //XXX CONFIG_XEN_IA64_DOM0_VP
                //    TODO fast_itc doesn't suport dom0 vp yet.
 #else
-//#define FAST_ITC     // working but default off for now
+//#define FAST_ITC     // to be reviewed
 #endif
 #define FAST_BREAK
 #ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -46,27 +46,8 @@
 #undef RFI_TO_INTERRUPT // not working yet
 #endif
 
-#define    XEN_HYPER_RFI           0x1
-#define    XEN_HYPER_RSM_DT        0x2
-#define    XEN_HYPER_SSM_DT        0x3
-#define    XEN_HYPER_COVER         0x4
-#define    XEN_HYPER_ITC_D         0x5
-#define    XEN_HYPER_ITC_I         0x6
-#define    XEN_HYPER_SSM_I         0x7
-#define    XEN_HYPER_GET_IVR       0x8
-#define    XEN_HYPER_GET_TPR       0x9
-#define    XEN_HYPER_SET_TPR       0xa
-#define    XEN_HYPER_EOI           0xb
-#define    XEN_HYPER_SET_ITM       0xc
-#define    XEN_HYPER_THASH         0xd
-#define    XEN_HYPER_PTC_GA        0xe
-#define    XEN_HYPER_ITR_D         0xf
-#define    XEN_HYPER_GET_RR        0x10
-#define    XEN_HYPER_SET_RR        0x11
-#define    XEN_HYPER_SET_KR        0x12
-
 #ifdef CONFIG_SMP
-#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
+//#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
 #undef FAST_PTC_GA
 #endif
 
@@ -106,7 +87,7 @@ GLOBAL_ENTRY(fast_hyperprivop)
 #endif
        // HYPERPRIVOP_SSM_I?
        // assumes domain interrupts pending, so just do it
-       cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
 (p7)   br.sptk.many hyper_ssm_i;;
 
        // FIXME. This algorithm gives up (goes to the slow path) if there
@@ -127,75 +108,75 @@ 1:        // when we get to here r20=~=interrup
 1:     // when we get to here r20=~=interrupts pending
 
        // HYPERPRIVOP_RFI?
-       cmp.eq p7,p6=XEN_HYPER_RFI,r17
+       cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
 (p7)   br.sptk.many hyper_rfi;;
 
        // HYPERPRIVOP_GET_IVR?
-       cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_IVR,r17
 (p7)   br.sptk.many hyper_get_ivr;;
 
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 
        // HYPERPRIVOP_COVER?
-       cmp.eq p7,p6=XEN_HYPER_COVER,r17
+       cmp.eq p7,p6=HYPERPRIVOP_COVER,r17
 (p7)   br.sptk.many hyper_cover;;
 
        // HYPERPRIVOP_SSM_DT?
-       cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17
 (p7)   br.sptk.many hyper_ssm_dt;;
 
        // HYPERPRIVOP_RSM_DT?
-       cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+       cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17
 (p7)   br.sptk.many hyper_rsm_dt;;
 
        // HYPERPRIVOP_GET_TPR?
-       cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_TPR,r17
 (p7)   br.sptk.many hyper_get_tpr;;
 
        // HYPERPRIVOP_SET_TPR?
-       cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_TPR,r17
 (p7)   br.sptk.many hyper_set_tpr;;
 
        // HYPERPRIVOP_EOI?
-       cmp.eq p7,p6=XEN_HYPER_EOI,r17
+       cmp.eq p7,p6=HYPERPRIVOP_EOI,r17
 (p7)   br.sptk.many hyper_eoi;;
 
        // HYPERPRIVOP_SET_ITM?
-       cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17
 (p7)   br.sptk.many hyper_set_itm;;
 
        // HYPERPRIVOP_SET_RR?
-       cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17
 (p7)   br.sptk.many hyper_set_rr;;
 
        // HYPERPRIVOP_GET_RR?
-       cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17
 (p7)   br.sptk.many hyper_get_rr;;
 
        // HYPERPRIVOP_PTC_GA?
-       cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
+       cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17
 (p7)   br.sptk.many hyper_ptc_ga;;
 
        // HYPERPRIVOP_ITC_D?
-       cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
+       cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17
 (p7)   br.sptk.many hyper_itc_d;;
 
        // HYPERPRIVOP_ITC_I?
-       cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
+       cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17
 (p7)   br.sptk.many hyper_itc_i;;
 
        // HYPERPRIVOP_THASH?
-       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+       cmp.eq p7,p6=HYPERPRIVOP_THASH,r17
 (p7)   br.sptk.many hyper_thash;;
 
        // HYPERPRIVOP_SET_KR?
-       cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17
 (p7)   br.sptk.many hyper_set_kr;;
 
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
-
+END(fast_hyperprivop)
 
 // give up for now if: ipsr.be==1, ipsr.pp==1
 // from reflect_interruption, don't need to:
@@ -250,7 +231,7 @@ ENTRY(hyper_ssm_i)
        cmp.ne p7,p0=r21,r0
 (p7)   br.sptk.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_I);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -348,6 +329,7 @@ ENTRY(hyper_ssm_i)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_ssm_i)
 
 // reflect domain clock interrupt
 //     r31 == pr
@@ -594,7 +576,7 @@ 1:
        adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r17;;
        // fall through
-
+END(fast_break_reflect)
 
 // reflect to domain ivt+r20
 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
@@ -723,6 +705,7 @@ ENTRY(fast_reflect)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(fast_reflect)
 
 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
 //     r16 == isr
@@ -762,6 +745,7 @@ GLOBAL_ENTRY(fast_access_reflect)
        and r22=~3,r22;;
        st8 [r23]=r22;;
        br.cond.sptk.many fast_reflect;;
+END(fast_access_reflect)
 
 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
 // is as it was at the time of original miss.  We want to preserve that
@@ -769,7 +753,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
 GLOBAL_ENTRY(fast_tlb_miss_reflect)
 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
        br.spnt.few page_fault ;;
-#endif
+#else
        mov r31=pr
        mov r30=cr.ipsr
        mov r29=cr.iip
@@ -957,6 +941,7 @@ 1:  // check the guest VHPT
        extr.u r24=r24,2,6;;
        // IFA already in PSCB
        br.cond.sptk.many fast_insert;;
+END(fast_tlb_miss_reflect)
 
 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
 ENTRY(recover_and_page_fault)
@@ -1007,6 +992,7 @@ 1: extr.u r25=r17,61,3;;
        mov r29=cr.iip
        mov r30=cr.ipsr
        br.sptk.many fast_reflect;;
+#endif
 END(fast_tlb_miss_reflect)
 
 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -1065,7 +1051,7 @@ 1:
 
 1:     // OK now, let's do an rfi.
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RFI);;
        ld8 r23=[r20];;
        adds r23=1,r23;;
        st8 [r20]=r23;;
@@ -1145,9 +1131,10 @@ 1:       mov pr=r31,-1
        ;;
        rfi
        ;;
-
+END(hyper_rfi)
+       
 #ifdef RFI_TO_INTERRUPT
-GLOBAL_ENTRY(rfi_check_extint)
+ENTRY(rfi_check_extint)
        //br.sptk.many dispatch_break_fault ;;
 
        // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
@@ -1214,11 +1201,12 @@ GLOBAL_ENTRY(rfi_check_extint)
        adds r29=15,r29;;
        cmp.ge p6,p0=r29,r26    // if tpr masks interrupt, just rfi
 (p6)   br.cond.spnt.few just_do_rfi;;
+END(rfi_check_extint)
 
 // this doesn't work yet (dies early after getting to user mode)
 // but happens relatively infrequently, so fix it later.
 // NOTE that these will be counted incorrectly for now (for privcnt output)
-GLOBAL_ENTRY(rfi_with_interrupt)
+ENTRY(rfi_with_interrupt)
 #if 1
        br.sptk.many dispatch_break_fault ;;
 #endif
@@ -1313,11 +1301,12 @@ GLOBAL_ENTRY(rfi_with_interrupt)
        st4 [r20]=r0 ;;
        mov pr=r31,-1 ;;
        rfi
+END(rfi_with_interrupt)
 #endif // RFI_TO_INTERRUPT
 
 ENTRY(hyper_cover)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_COVER);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1348,11 +1337,12 @@ ENTRY(hyper_cover)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_cover)
 
 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
 ENTRY(hyper_ssm_dt)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_DT);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1384,11 +1374,12 @@ 1:      extr.u r26=r24,41,2 ;;
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_ssm_dt)
 
 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
 ENTRY(hyper_rsm_dt)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RSM_DT);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1421,10 +1412,11 @@ 1:      extr.u r26=r24,41,2 ;;
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_rsm_dt)
 
 ENTRY(hyper_get_tpr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_TPR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1453,7 +1445,7 @@ END(hyper_get_tpr)
 // (or accidentally missing) delivering an interrupt
 ENTRY(hyper_set_tpr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_TPR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1481,7 +1473,7 @@ END(hyper_set_tpr)
 
 ENTRY(hyper_get_ivr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
+       movl r22=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_IVR);;
        ld8 r21=[r22];;
        adds r21=1,r21;;
        st8 [r22]=r21;;
@@ -1593,7 +1585,7 @@ ENTRY(hyper_eoi)
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_EOI);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1657,7 +1649,7 @@ ENTRY(hyper_set_itm)
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_ITM);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1698,7 +1690,7 @@ END(hyper_set_itm)
 
 ENTRY(hyper_get_rr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_RR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1730,7 +1722,7 @@ ENTRY(hyper_set_rr)
        cmp.leu p7,p0=7,r25     // punt on setting rr7
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_RR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1788,7 +1780,7 @@ ENTRY(hyper_set_kr)
        cmp.ne p7,p0=r0,r25     // if kr# > 7, go slow way
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_KR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1844,9 +1836,9 @@ END(hyper_set_kr)
 // On entry:
 //     r18 == XSI_PSR_IC
 //     r31 == pr
-GLOBAL_ENTRY(hyper_thash)
+ENTRY(hyper_thash)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_THASH);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1915,7 +1907,7 @@ ENTRY(hyper_ptc_ga)
 #endif
        // FIXME: validate not flushing Xen addresses
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_PTC_GA);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1992,18 +1984,19 @@ ENTRY(recover_and_dispatch_break_fault)
 #endif
        mov b0=r29 ;;
        br.sptk.many dispatch_break_fault;;
+END(recover_and_dispatch_break_fault)
 
 //  Registers at entry
-//     r17 = break immediate (XEN_HYPER_ITC_D or I)
+//     r17 = break immediate (HYPERPRIVOP_ITC_D or I)
 //     r18 == XSI_PSR_IC_OFS
 //     r31 == pr
-GLOBAL_ENTRY(hyper_itc)
-ENTRY(hyper_itc_i)
+ENTRY(hyper_itc)
+hyper_itc_i:   
        // fall through, hyper_itc_d handles both i and d
-ENTRY(hyper_itc_d)
+hyper_itc_d:   
 #ifndef FAST_ITC
        br.sptk.many dispatch_break_fault ;;
-#endif
+#else
        // ensure itir.ps >= xen's pagesize
        adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r23=[r23];;
@@ -2027,9 +2020,9 @@ ENTRY(hyper_itc_d)
        cmp.ne p7,p0=r27,r28
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
-(p6)   movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
-(p7)   movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
+       cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
+(p6)   movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_D);;
+(p7)   movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_I);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -2040,7 +2033,10 @@ ENTRY(hyper_itc_d)
        movl r30=recover_and_dispatch_break_fault ;;
        mov r16=r8;;
        // fall through
-
+#endif
+END(hyper_itc)
+
+#if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
 
 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
 //     r16 == pte
@@ -2050,7 +2046,7 @@ ENTRY(hyper_itc_d)
 //     r29 == saved value of b0 in case of recovery
 //     r30 == recovery ip if failure occurs
 //     r31 == pr
-GLOBAL_ENTRY(fast_insert)
+ENTRY(fast_insert)
        // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
        mov r19=1;;
        shl r20=r19,r24;;
@@ -2175,4 +2171,4 @@ no_inc_iip:
        rfi
        ;;
 END(fast_insert)
-
+#endif
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/ivt.S   Tue May 30 14:30:34 2006 -0500
@@ -100,6 +100,15 @@
        mov r19=n;;                     /* prepare to save predicates */        
        \
        br.sptk.many dispatch_to_fault_handler
 
+#define FAULT_OR_REFLECT(n)                                                    
        \
+       mov r31=pr;                                                             
        \
+       mov r20=cr.ipsr;;                                                       
        \
+       mov r19=n;      /* prepare to save predicates */                        
        \
+       extr.u r20=r20,IA64_PSR_CPL0_BIT,2;;                                    
        \
+       cmp.ne p6,p0=r0,r20;    /* cpl != 0?*/                                  
        \
+(p6)   br.dptk.many dispatch_reflection;                                       
        \
+       br.sptk.few dispatch_to_fault_handler
+
 #ifdef XEN
 #define REFLECT(n)                                                             
        \
        mov r31=pr;                                                             
        \
@@ -697,7 +706,7 @@ ENTRY(ikey_miss)
 ENTRY(ikey_miss)
        DBG_FAULT(6)
 #ifdef XEN
-       REFLECT(6)
+       FAULT_OR_REFLECT(6)
 #endif
        FAULT(6)
 END(ikey_miss)
@@ -746,7 +755,7 @@ ENTRY(dkey_miss)
 ENTRY(dkey_miss)
        DBG_FAULT(7)
 #ifdef XEN
-       REFLECT(7)
+       FAULT_OR_REFLECT(7)
 #endif
        FAULT(7)
 END(dkey_miss)
@@ -757,7 +766,7 @@ ENTRY(dirty_bit)
 ENTRY(dirty_bit)
        DBG_FAULT(8)
 #ifdef XEN
-       REFLECT(8)
+       FAULT_OR_REFLECT(8)
 #endif
        /*
         * What we do here is to simply turn on the dirty bit in the PTE.  We 
need to
@@ -1523,7 +1532,7 @@ ENTRY(page_not_present)
 ENTRY(page_not_present)
        DBG_FAULT(20)
 #ifdef XEN
-       REFLECT(20)
+       FAULT_OR_REFLECT(20)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1546,7 +1555,7 @@ ENTRY(key_permission)
 ENTRY(key_permission)
        DBG_FAULT(21)
 #ifdef XEN
-       REFLECT(21)
+       FAULT_OR_REFLECT(21)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1562,7 +1571,7 @@ ENTRY(iaccess_rights)
 ENTRY(iaccess_rights)
        DBG_FAULT(22)
 #ifdef XEN
-       REFLECT(22)
+       FAULT_OR_REFLECT(22)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1637,7 +1646,7 @@ ENTRY(disabled_fp_reg)
        mov pr=r20,-1
        ;;
 #endif
-       REFLECT(25)
+       FAULT_OR_REFLECT(25)
 //floating_panic:
 //     br.sptk.many floating_panic
        ;;
@@ -1656,7 +1665,7 @@ ENTRY(nat_consumption)
 ENTRY(nat_consumption)
        DBG_FAULT(26)
 #ifdef XEN
-       REFLECT(26)
+       FAULT_OR_REFLECT(26)
 #endif
        FAULT(26)
 END(nat_consumption)
@@ -1668,7 +1677,7 @@ ENTRY(speculation_vector)
        DBG_FAULT(27)
 #ifdef XEN
        // this probably need not reflect...
-       REFLECT(27)
+       FAULT_OR_REFLECT(27)
 #endif
        /*
         * A [f]chk.[as] instruction needs to take the branch to the recovery 
code but
@@ -1714,7 +1723,7 @@ ENTRY(debug_vector)
 ENTRY(debug_vector)
        DBG_FAULT(29)
 #ifdef XEN
-       REFLECT(29)
+       FAULT_OR_REFLECT(29)
 #endif
        FAULT(29)
 END(debug_vector)
@@ -1725,7 +1734,7 @@ ENTRY(unaligned_access)
 ENTRY(unaligned_access)
        DBG_FAULT(30)
 #ifdef XEN
-       REFLECT(30)
+       FAULT_OR_REFLECT(30)
 #endif
        mov r16=cr.ipsr
        mov r31=pr              // prepare to save predicates
@@ -1739,7 +1748,7 @@ ENTRY(unsupported_data_reference)
 ENTRY(unsupported_data_reference)
        DBG_FAULT(31)
 #ifdef XEN
-       REFLECT(31)
+       FAULT_OR_REFLECT(31)
 #endif
        FAULT(31)
 END(unsupported_data_reference)
@@ -1750,7 +1759,7 @@ ENTRY(floating_point_fault)
 ENTRY(floating_point_fault)
        DBG_FAULT(32)
 #ifdef XEN
-       REFLECT(32)
+       FAULT_OR_REFLECT(32)
 #endif
        FAULT(32)
 END(floating_point_fault)
@@ -1761,7 +1770,7 @@ ENTRY(floating_point_trap)
 ENTRY(floating_point_trap)
        DBG_FAULT(33)
 #ifdef XEN
-       REFLECT(33)
+       FAULT_OR_REFLECT(33)
 #endif
        FAULT(33)
 END(floating_point_trap)
@@ -1772,7 +1781,7 @@ ENTRY(lower_privilege_trap)
 ENTRY(lower_privilege_trap)
        DBG_FAULT(34)
 #ifdef XEN
-       REFLECT(34)
+       FAULT_OR_REFLECT(34)
 #endif
        FAULT(34)
 END(lower_privilege_trap)
@@ -1783,7 +1792,7 @@ ENTRY(taken_branch_trap)
 ENTRY(taken_branch_trap)
        DBG_FAULT(35)
 #ifdef XEN
-       REFLECT(35)
+       FAULT_OR_REFLECT(35)
 #endif
        FAULT(35)
 END(taken_branch_trap)
@@ -1794,7 +1803,7 @@ ENTRY(single_step_trap)
 ENTRY(single_step_trap)
        DBG_FAULT(36)
 #ifdef XEN
-       REFLECT(36)
+       FAULT_OR_REFLECT(36)
 #endif
        FAULT(36)
 END(single_step_trap)
@@ -1853,7 +1862,7 @@ ENTRY(ia32_exception)
 ENTRY(ia32_exception)
        DBG_FAULT(45)
 #ifdef XEN
-       REFLECT(45)
+       FAULT_OR_REFLECT(45)
 #endif
        FAULT(45)
 END(ia32_exception)
@@ -1864,7 +1873,7 @@ ENTRY(ia32_intercept)
 ENTRY(ia32_intercept)
        DBG_FAULT(46)
 #ifdef XEN
-       REFLECT(46)
+       FAULT_OR_REFLECT(46)
 #endif
 #ifdef CONFIG_IA32_SUPPORT
        mov r31=pr
@@ -1897,7 +1906,7 @@ ENTRY(ia32_interrupt)
 ENTRY(ia32_interrupt)
        DBG_FAULT(47)
 #ifdef XEN
-       REFLECT(47)
+       FAULT_OR_REFLECT(47)
 #endif
 #ifdef CONFIG_IA32_SUPPORT
        mov r31=pr
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/privop.c
--- a/xen/arch/ia64/xen/privop.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/privop.c        Tue May 30 14:30:34 2006 -0500
@@ -793,33 +793,6 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
                printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr);
        return fault;
 }
-
-
-// FIXME: Move these to include/public/arch-ia64?
-#define HYPERPRIVOP_RFI                        0x1
-#define HYPERPRIVOP_RSM_DT             0x2
-#define HYPERPRIVOP_SSM_DT             0x3
-#define HYPERPRIVOP_COVER              0x4
-#define HYPERPRIVOP_ITC_D              0x5
-#define HYPERPRIVOP_ITC_I              0x6
-#define HYPERPRIVOP_SSM_I              0x7
-#define HYPERPRIVOP_GET_IVR            0x8
-#define HYPERPRIVOP_GET_TPR            0x9
-#define HYPERPRIVOP_SET_TPR            0xa
-#define HYPERPRIVOP_EOI                        0xb
-#define HYPERPRIVOP_SET_ITM            0xc
-#define HYPERPRIVOP_THASH              0xd
-#define HYPERPRIVOP_PTC_GA             0xe
-#define HYPERPRIVOP_ITR_D              0xf
-#define HYPERPRIVOP_GET_RR             0x10
-#define HYPERPRIVOP_SET_RR             0x11
-#define HYPERPRIVOP_SET_KR             0x12
-#define HYPERPRIVOP_FC                 0x13
-#define HYPERPRIVOP_GET_CPUID          0x14
-#define HYPERPRIVOP_GET_PMD            0x15
-#define HYPERPRIVOP_GET_EFLAG          0x16
-#define HYPERPRIVOP_SET_EFLAG          0x17
-#define HYPERPRIVOP_MAX                        0x17
 
 static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = {
        0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/process.c       Tue May 30 14:30:34 2006 -0500
@@ -15,7 +15,6 @@
 #include <asm/ptrace.h>
 #include <xen/delay.h>
 
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
 #include <asm/sal.h>   /* FOR struct ia64_sal_retval */
 
 #include <asm/system.h>
@@ -40,7 +39,7 @@ extern void panic_domain(struct pt_regs 
 extern void panic_domain(struct pt_regs *, const char *, ...);
 extern long platform_is_hp_ski(void);
 extern int ia64_hyperprivop(unsigned long, REGS *);
-extern int ia64_hypercall(struct pt_regs *regs);
+extern IA64FAULT ia64_hypercall(struct pt_regs *regs);
 extern void vmx_do_launch(struct vcpu *);
 extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
 
@@ -195,10 +194,10 @@ void check_bad_nested_interruption(unsig
        }
        vector &= ~0xf;
        if (vector != IA64_DATA_TLB_VECTOR &&
-               vector != IA64_ALT_DATA_TLB_VECTOR &&
-               vector != IA64_VHPT_TRANS_VECTOR) {
-panic_domain(regs,"psr.ic off, delivering 
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
-       vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
+           vector != IA64_ALT_DATA_TLB_VECTOR &&
+           vector != IA64_VHPT_TRANS_VECTOR) {
+               panic_domain(regs,"psr.ic off, delivering 
fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n",
+                            
vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
        }
 }
 
@@ -265,7 +264,8 @@ void deliver_pending_interrupt(struct pt
 }
 unsigned long lazy_cover_count = 0;
 
-int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
+static int
+handle_lazy_cover(struct vcpu *v, struct pt_regs *regs)
 {
        if (!PSCB(v,interrupt_collection_enabled)) {
                PSCB(v,ifs) = regs->cr_ifs;
@@ -285,7 +285,7 @@ void ia64_do_page_fault (unsigned long a
        unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
        IA64FAULT fault;
 
-       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) 
return;
+       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return;
        if ((isr & IA64_ISR_SP)
            || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH))
        {
@@ -299,7 +299,7 @@ void ia64_do_page_fault (unsigned long a
        }
 
  again:
-       fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha);
+       fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
        if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
                u64 logps;
                pteval = translate_domain_pte(pteval, address, itir, &logps);
@@ -307,11 +307,7 @@ void ia64_do_page_fault (unsigned long a
                if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) {
                        /* dtlb has been purged in-between.  This dtlb was
                           matching.  Undo the work.  */
-#ifdef VHPT_GLOBAL
-                       vhpt_flush_address (address, 1);
-#endif
-                       ia64_ptcl(address, 1<<2);
-                       ia64_srlz_i();
+                       vcpu_flush_tlb_vhpt_range (address, 1);
                        goto again;
                }
                return;
@@ -357,7 +353,7 @@ ia64_fault (unsigned long vector, unsign
        struct pt_regs *regs = (struct pt_regs *) &stack;
        unsigned long code;
        char buf[128];
-       static const char * const reason[] = {
+       static const char *reason[] = {
                "IA-64 Illegal Operation fault",
                "IA-64 Privileged Operation fault",
                "IA-64 Privileged Register fault",
@@ -367,10 +363,10 @@ ia64_fault (unsigned long vector, unsign
                "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", 
"Unknown fault 12",
                "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
        };
-#if 0
-printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
- vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
-#endif
+
+       printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx, 
ipsr=0x%016lx, isr=0x%016lx\n",
+              vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
+
 
        if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH)) {
                /*
@@ -383,15 +379,48 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
        }
 
        switch (vector) {
-             case 24: /* General Exception */
+           case 0:
+               printk("VHPT Translation.\n");
+               break;
+         
+           case 4:
+               printk("Alt DTLB.\n");
+               break;
+         
+           case 6:
+               printk("Instruction Key Miss.\n");
+               break;
+
+           case 7: 
+               printk("Data Key Miss.\n");
+               break;
+
+           case 8: 
+               printk("Dirty-bit.\n");
+               break;
+
+           case 20:
+               printk("Page Not Found.\n");
+               break;
+
+           case 21:
+               printk("Key Permission.\n");
+               break;
+
+           case 22:
+               printk("Instruction Access Rights.\n");
+               break;
+
+           case 24: /* General Exception */
                code = (isr >> 4) & 0xf;
                sprintf(buf, "General Exception: %s%s", reason[code],
-                       (code == 3) ? ((isr & (1UL << 37))
-                                      ? " (RSE access)" : " (data access)") : 
"");
+                       (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" :
+                                      " (data access)") : "");
                if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
                        printk("%s[%d]: possible hazard @ ip=%016lx (pr = 
%016lx)\n",
-                              current->comm, current->pid, regs->cr_iip + 
ia64_psr(regs)->ri,
+                              current->comm, current->pid,
+                              regs->cr_iip + ia64_psr(regs)->ri,
                               regs->pr);
 # endif
                        printf("ia64_fault: returning on hazard\n");
@@ -399,162 +428,65 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
                }
                break;
 
-             case 25: /* Disabled FP-Register */
-               if (isr & 2) {
-                       //disabled_fph_fault(regs);
-                       //return;
-               }
-               sprintf(buf, "Disabled FPL fault---not supposed to happen!");
-               break;
-
-             case 26: /* NaT Consumption */
-               if (user_mode(regs)) {
-                       void *addr;
-
-                       if (((isr >> 4) & 0xf) == 2) {
-                               /* NaT page consumption */
-                               //sig = SIGSEGV;
-                               //code = SEGV_ACCERR;
-                               addr = (void *) ifa;
-                       } else {
-                               /* register NaT consumption */
-                               //sig = SIGILL;
-                               //code = ILL_ILLOPN;
-                               addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       }
-                       //siginfo.si_signo = sig;
-                       //siginfo.si_code = code;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_addr = addr;
-                       //siginfo.si_imm = vector;
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //force_sig_info(sig, &siginfo, current);
-                       //return;
-               } //else if (ia64_done_with_exception(regs))
-                       //return;
-               sprintf(buf, "NaT consumption");
-               break;
-
-             case 31: /* Unsupported Data Reference */
-               if (user_mode(regs)) {
-                       //siginfo.si_signo = SIGILL;
-                       //siginfo.si_code = ILL_ILLOPN;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       //siginfo.si_imm = vector;
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //force_sig_info(SIGILL, &siginfo, current);
-                       //return;
-               }
-               sprintf(buf, "Unsupported data reference");
-               break;
-
-             case 29: /* Debug */
-             case 35: /* Taken Branch Trap */
-             case 36: /* Single Step Trap */
-               //if (fsys_mode(current, regs)) {}
-               switch (vector) {
-                     case 29:
-                       //siginfo.si_code = TRAP_HWBKPT;
-#ifdef CONFIG_ITANIUM
-                       /*
-                        * Erratum 10 (IFA may contain incorrect address) now 
has
-                        * "NoFix" status.  There are no plans for fixing this.
-                        */
-                       if (ia64_psr(regs)->is == 0)
-                         ifa = regs->cr_iip;
-#endif
-                       break;
-                     case 35: ifa = 0; break;
-                     case 36: ifa = 0; break;
-                     //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
-                     //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
-               }
-               //siginfo.si_signo = SIGTRAP;
-               //siginfo.si_errno = 0;
-               //siginfo.si_addr  = (void *) ifa;
-               //siginfo.si_imm   = 0;
-               //siginfo.si_flags = __ISR_VALID;
-               //siginfo.si_isr   = isr;
-               //force_sig_info(SIGTRAP, &siginfo, current);
-               //return;
-
-             case 32: /* fp fault */
-             case 33: /* fp trap */
-               //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
-               //if ((result < 0) || (current->thread.flags & 
IA64_THREAD_FPEMU_SIGFPE)) {
-                       //siginfo.si_signo = SIGFPE;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_code = FPE_FLTINV;
-                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //siginfo.si_imm = 0;
-                       //force_sig_info(SIGFPE, &siginfo, current);
-               //}
-               //return;
-               sprintf(buf, "FP fault/trap");
-               break;
-
-             case 34:
-               if (isr & 0x2) {
-                       /* Lower-Privilege Transfer Trap */
-                       /*
-                        * Just clear PSR.lp and then return immediately: all 
the
-                        * interesting work (e.g., signal delivery is done in 
the kernel
-                        * exit path).
-                        */
-                       //ia64_psr(regs)->lp = 0;
-                       //return;
-                       sprintf(buf, "Lower-Privilege Transfer trap");
-               } else {
-                       /* Unimplemented Instr. Address Trap */
-                       if (user_mode(regs)) {
-                               //siginfo.si_signo = SIGILL;
-                               //siginfo.si_code = ILL_BADIADDR;
-                               //siginfo.si_errno = 0;
-                               //siginfo.si_flags = 0;
-                               //siginfo.si_isr = 0;
-                               //siginfo.si_imm = 0;
-                               //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                               //force_sig_info(SIGILL, &siginfo, current);
-                               //return;
-                       }
-                       sprintf(buf, "Unimplemented Instruction Address fault");
-               }
-               break;
-
-             case 45:
-               printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
-               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
-                      regs->cr_iip, ifa, isr);
-               //force_sig(SIGSEGV, current);
-               break;
-
-             case 46:
-               printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
-               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 
0x%lx\n",
-                      regs->cr_iip, ifa, isr, iim);
-               //force_sig(SIGSEGV, current);
-               return;
-
-             case 47:
-               sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
-               break;
-
-             default:
-               sprintf(buf, "Fault %lu", vector);
-               break;
-       }
-       //die_if_kernel(buf, regs, error);
-printk("ia64_fault: %s: reflecting\n",buf);
-PSCB(current,itir) = vcpu_get_itir_on_fault(current,ifa);
-PSCB(current,ifa) = ifa;
-reflect_interruption(isr,regs,IA64_GENEX_VECTOR);
-//while(1);
-       //force_sig(SIGILL, current);
+           case 25:
+               printk("Disabled FP-Register.\n");
+               break;
+
+           case 26:
+               printk("NaT consumption.\n");
+               break;
+
+           case 29:
+               printk("Debug.\n");
+               break;
+
+           case 30:
+               printk("Unaligned Reference.\n");
+               break;
+
+           case 31:
+               printk("Unsupported data reference.\n");
+               break;
+
+           case 32:
+               printk("Floating-Point Fault.\n");
+               break;
+
+           case 33:
+               printk("Floating-Point Trap.\n");
+               break;
+
+           case 34:
+               printk("Lower Privilege Transfer Trap.\n");
+               break;
+
+           case 35:
+               printk("Taken Branch Trap.\n");
+               break;
+
+           case 36:
+               printk("Single Step Trap.\n");
+               break;
+    
+           case 45:
+               printk("IA-32 Exception.\n");
+               break;
+
+           case 46:
+               printk("IA-32 Intercept.\n");
+               break;
+
+           case 47:
+               printk("IA-32 Interrupt.\n");
+               break;
+
+           default:
+               printk("Fault %lu\n", vector);
+               break;
+       }
+
+       show_registers(regs);
+       panic("Fault in Xen.\n");
 }
 
 unsigned long running_on_sim = 0;
@@ -679,6 +611,7 @@ ia64_handle_break (unsigned long ifa, st
 {
        struct domain *d = current->domain;
        struct vcpu *v = current;
+       IA64FAULT vector;
 
        if (first_break) {
                if (platform_is_hp_ski()) running_on_sim = 1;
@@ -699,9 +632,11 @@ ia64_handle_break (unsigned long ifa, st
                /* by default, do not continue */
                v->arch.hypercall_continuation = 0;
 
-               if (ia64_hypercall(regs) &&
-                   !PSCBX(v, hypercall_continuation))
-                       vcpu_increment_iip(current);
+               if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) {
+                       if (!PSCBX(v, hypercall_continuation))
+                               vcpu_increment_iip(current);
+               }
+               else reflect_interruption(isr, regs, vector);
        }
        else if (!PSCB(v,interrupt_collection_enabled)) {
                if (ia64_hyperprivop(iim,regs))
@@ -813,7 +748,7 @@ printf("*** Handled privop masquerading 
                while(vector);
                return;
        }
-       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
isr, regs)) return;
+       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
regs)) return;
        PSCB(current,ifa) = ifa;
        PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa);
        reflect_interruption(isr,regs,vector);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/regionreg.c     Tue May 30 14:30:34 2006 -0500
@@ -17,9 +17,7 @@
 #include <asm/vcpu.h>
 
 /* Defined in xemasm.S  */
-extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
-
-extern void *pal_vaddr;
+extern void ia64_new_rr7(unsigned long rid, void *shared_info, void 
*shared_arch_info, unsigned long shared_info_va, unsigned long p_vhpt);
 
 /* RID virtualization mechanism is really simple:  domains have less rid bits
    than the host and the host rid space is shared among the domains.  (Values
@@ -260,9 +258,9 @@ int set_one_rr(unsigned long rr, unsigne
                if (!PSCB(v,metaphysical_mode))
                        set_rr(rr,newrrv.rrval);
        } else if (rreg == 7) {
-               ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
-                            v->arch.privregs, __get_cpu_var(vhpt_paddr),
-                            (unsigned long) pal_vaddr);
+               ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info,
+                            v->arch.privregs, v->domain->arch.shared_info_va,
+                            __get_cpu_var(vhpt_paddr));
        } else {
                set_rr(rr,newrrv.rrval);
        }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vcpu.c  Tue May 30 14:30:34 2006 -0500
@@ -28,8 +28,6 @@ extern void setfpreg (unsigned long regn
 
 extern void panic_domain(struct pt_regs *, const char *, ...);
 extern unsigned long translate_domain_mpaddr(unsigned long);
-extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
-
 
 typedef        union {
        struct ia64_psr ia64_psr;
@@ -682,9 +680,9 @@ UINT64 vcpu_check_pending_interrupts(VCP
         */
 check_start:
        if (event_pending(vcpu) && 
-               !test_bit(vcpu->vcpu_info->arch.evtchn_vector,
+               !test_bit(vcpu->domain->shared_info->arch.evtchn_vector,
                        &PSCBX(vcpu, insvc[0])))
-               vcpu_pend_interrupt(vcpu, vcpu->vcpu_info->arch.evtchn_vector);
+               vcpu_pend_interrupt(vcpu, 
vcpu->domain->shared_info->arch.evtchn_vector);
 
        p = &PSCBX(vcpu,irr[3]);
        r = &PSCBX(vcpu,insvc[3]);
@@ -1290,8 +1288,7 @@ static inline int vcpu_match_tr_entry(TR
        return trp->pte.p && vcpu_match_tr_entry_no_p(trp, ifa, rid);
 }
 
-// in_tpa is not used when CONFIG_XEN_IA64_DOM0_VP
-IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, BOOLEAN 
in_tpa, UINT64 *pteval, UINT64 *itir, UINT64 *iha)
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 
*pteval, UINT64 *itir, UINT64 *iha)
 {
        unsigned long region = address >> 61;
        unsigned long pta, rid, rr;
@@ -1368,12 +1365,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN
        pte = trp->pte;
        if (/* is_data && */ pte.p
            && vcpu_match_tr_entry_no_p(trp,address,rid)) {
-#ifndef CONFIG_XEN_IA64_DOM0_VP
-               if (vcpu->domain==dom0 && !in_tpa)
-                       *pteval = pte.val;
-               else
-#endif
-               *pteval = vcpu->arch.dtlb_pte;
+               *pteval = pte.val;
                *itir = trp->itir;
                dtlb_translate_count++;
                return IA64_USE_TLB;
@@ -1422,7 +1414,7 @@ IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 va
        UINT64 pteval, itir, mask, iha;
        IA64FAULT fault;
 
-       fault = vcpu_translate(vcpu, vadr, TRUE, TRUE, &pteval, &itir, &iha);
+       fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
        if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB)
        {
                mask = itir_mask(itir);
@@ -1708,11 +1700,6 @@ IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT6
  VCPU translation register access routines
 **************************************************************************/
 
-void vcpu_purge_tr_entry(TR_ENTRY *trp)
-{
-       trp->pte.val = 0;
-}
-
 static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 
ifa)
 {
        UINT64 ps;
@@ -1800,12 +1787,10 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
        if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
                return;
        if (IorD & 0x1) {
-               vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
-               PSCBX(vcpu,itlb_pte) = mp_pte;
+               vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
        }
        if (IorD & 0x2) {
-               vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
-               PSCBX(vcpu,dtlb_pte) = mp_pte;
+               vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),mp_pte,ps<<2,vaddr);
        }
 }
 
@@ -1875,20 +1860,14 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vad
        return fault;
 }
 
-int ptce_count = 0;
 IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
 {
        // Note that this only needs to be called once, i.e. the
        // architected loop to purge the entire TLB, should use
        //  base = stride1 = stride2 = 0, count0 = count 1 = 1
 
-#ifdef VHPT_GLOBAL
-       vhpt_flush();   // FIXME: This is overdoing it
-#endif
-       local_flush_tlb_all();
-       // just invalidate the "whole" tlb
-       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       vcpu_flush_vtlb_all ();
+
        return IA64_NO_FAULT;
 }
 
@@ -1905,33 +1884,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 
        // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
 //printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
 
-#ifdef CONFIG_XEN_SMP
-       struct domain *d = vcpu->domain;
-       struct vcpu *v;
-
-       for_each_vcpu (d, v) {
-               if (v == vcpu)
-                       continue;
-
-               /* Purge TC entries.
-                  FIXME: clear only if match.  */
-               vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-               vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
-
-#ifdef VHPT_GLOBAL
-               /* Invalidate VHPT entries.  */
-               vhpt_flush_address_remote (v->processor, vadr, addr_range);
-#endif
-       }
-#endif
-
-#ifdef VHPT_GLOBAL
-       vhpt_flush_address(vadr,addr_range);
-#endif
-       ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
-       /* Purge tc.  */
-       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       domain_flush_vtlb_range (vcpu->domain, vadr, addr_range);
+
        return IA64_NO_FAULT;
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vhpt.c  Tue May 30 14:30:34 2006 -0500
@@ -12,32 +12,31 @@
 #include <asm/system.h>
 #include <asm/pgalloc.h>
 #include <asm/page.h>
-#include <asm/dma.h>
 #include <asm/vhpt.h>
+#include <asm/vcpu.h>
+
+/* Defined in tlb.c  */
+extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
 
 extern long running_on_sim;
 
 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
 DEFINE_PER_CPU (unsigned long, vhpt_pend);
 
-void vhpt_flush(void)
-{
-       struct vhpt_lf_entry *v =__va(__ia64_per_cpu_var(vhpt_paddr));
-       int i;
-#if 0
-static int firsttime = 2;
-
-if (firsttime) firsttime--;
-else {
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-}
-#endif
+static void vhpt_flush(void)
+{
+       struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr));
+       int i;
+
+       for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
+               v->ti_tag = INVALID_TI_TAG;
+}
+
+static void vhpt_erase(void)
+{
+       struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR;
+       int i;
+
        for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
                v->itir = 0;
                v->CChain = 0;
@@ -47,51 +46,6 @@ printf("vhpt_flush: ********************
        // initialize cache too???
 }
 
-#ifdef VHPT_GLOBAL
-void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
-{
-       struct vhpt_lf_entry *vlfe;
-
-       if ((vadr >> 61) == 7) {
-               // no vhpt for region 7 yet, see vcpu_itc_no_srlz
-               printf("vhpt_flush_address: region 7, spinning...\n");
-               while(1);
-       }
-#if 0
-       // this only seems to occur at shutdown, but it does occur
-       if ((!addr_range) || addr_range & (addr_range - 1)) {
-               printf("vhpt_flush_address: weird range, spinning...\n");
-               while(1);
-       }
-//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
-#endif
-       while ((long)addr_range > 0) {
-               vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
-               // FIXME: for now, just blow it away even if it belongs to
-               // another domain.  Later, use ttag to check for match
-//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
-//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
-//}
-               vlfe->ti_tag |= INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
-       }
-}
-
-void vhpt_flush_address_remote(int cpu,
-                              unsigned long vadr, unsigned long addr_range)
-{
-       while ((long)addr_range > 0) {
-               /* Get the VHPT entry.  */
-               unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
-               volatile struct vhpt_lf_entry *v;
-               v =__va(per_cpu(vhpt_paddr, cpu) + off);
-               v->ti_tag = INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
-       }
-}
-#endif
 
 static void vhpt_map(unsigned long pte)
 {
@@ -147,17 +101,11 @@ void vhpt_multiple_insert(unsigned long 
 
 void vhpt_init(void)
 {
-       unsigned long vhpt_total_size, vhpt_alignment;
        unsigned long paddr, pte;
        struct page_info *page;
 #if !VHPT_ENABLED
        return;
 #endif
-       // allocate a huge chunk of physical memory.... how???
-       vhpt_total_size = 1 << VHPT_SIZE_LOG2;  // 4MB, 16MB, 64MB, or 256MB
-       vhpt_alignment = 1 << VHPT_SIZE_LOG2;   // 4MB, 16MB, 64MB, or 256MB
-       printf("vhpt_init: vhpt size=0x%lx, align=0x%lx\n",
-               vhpt_total_size, vhpt_alignment);
        /* This allocation only holds true if vhpt table is unique for
         * all domains. Or else later new vhpt table should be allocated
         * from domain heap when each domain is created. Assume xen buddy
@@ -167,17 +115,135 @@ void vhpt_init(void)
        if (!page)
                panic("vhpt_init: can't allocate VHPT!\n");
        paddr = page_to_maddr(page);
+       if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
+               panic("vhpt_init: bad VHPT alignment!\n");
        __get_cpu_var(vhpt_paddr) = paddr;
-       __get_cpu_var(vhpt_pend) = paddr + vhpt_total_size - 1;
+       __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
        printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
                paddr, __get_cpu_var(vhpt_pend));
        pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL));
        vhpt_map(pte);
        ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
                VHPT_ENABLED);
-       vhpt_flush();
-}
-
+       vhpt_erase();
+}
+
+
+void vcpu_flush_vtlb_all (void)
+{
+       struct vcpu *v = current;
+
+       /* First VCPU tlb.  */
+       vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+       vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+       /* Then VHPT.  */
+       vhpt_flush ();
+
+       /* Then mTLB.  */
+       local_flush_tlb_all ();
+
+       /* We could clear bit in d->domain_dirty_cpumask only if domain d in
+          not running on this processor.  There is currently no easy way to
+          check this.  */
+}
+
+void domain_flush_vtlb_all (void)
+{
+       int cpu = smp_processor_id ();
+       struct vcpu *v;
+
+       for_each_vcpu (current->domain, v)
+               if (v->processor == cpu)
+                       vcpu_flush_vtlb_all ();
+               else
+                       smp_call_function_single
+                               (v->processor,
+                                (void(*)(void *))vcpu_flush_vtlb_all,
+                                NULL,1,1);
+}
+
+static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range)
+{
+       void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu));
+
+       while ((long)addr_range > 0) {
+               /* Get the VHPT entry.  */
+               unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
+               volatile struct vhpt_lf_entry *v;
+               v = vhpt_base + off;
+               v->ti_tag = INVALID_TI_TAG;
+               addr_range -= PAGE_SIZE;
+               vadr += PAGE_SIZE;
+       }
+}
+
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
+{
+       cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range);
+       ia64_ptcl(vadr, log_range << 2);
+       ia64_srlz_i();
+}
+
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
+{
+       struct vcpu *v;
+
+#if 0
+       // this only seems to occur at shutdown, but it does occur
+       if ((!addr_range) || addr_range & (addr_range - 1)) {
+               printf("vhpt_flush_address: weird range, spinning...\n");
+               while(1);
+       }
+#endif
+
+       for_each_vcpu (d, v) {
+               /* Purge TC entries.
+                  FIXME: clear only if match.  */
+               vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+               vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+               /* Invalidate VHPT entries.  */
+               cpu_flush_vhpt_range (v->processor, vadr, addr_range);
+       }
+
+       /* ptc.ga  */
+       ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+}
+
+static void flush_tlb_vhpt_all (struct domain *d)
+{
+       /* First VHPT.  */
+       vhpt_flush ();
+
+       /* Then mTLB.  */
+       local_flush_tlb_all ();
+}
+
+void domain_flush_destroy (struct domain *d)
+{
+       /* Very heavy...  */
+       on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
+       cpus_clear (d->domain_dirty_cpumask);
+}
+
+void flush_tlb_mask(cpumask_t mask)
+{
+    int cpu;
+
+    cpu = smp_processor_id();
+    if (cpu_isset (cpu, mask)) {
+        cpu_clear(cpu, mask);
+        flush_tlb_vhpt_all (NULL);
+    }
+
+    if (cpus_empty(mask))
+        return;
+
+    for_each_cpu_mask (cpu, mask)
+        smp_call_function_single
+            (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
+}
 
 void zero_vhpt_stats(void)
 {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenasm.S
--- a/xen/arch/ia64/xen/xenasm.S        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenasm.S        Tue May 30 14:30:34 2006 -0500
@@ -11,242 +11,160 @@
 #include <asm/pgtable.h>
 #include <asm/vhpt.h>
 
-#if 0
-// FIXME: there's gotta be a better way...
-// ski and spaski are different... moved to xenmisc.c
-#define RunningOnHpSki(rx,ry,pn)                       \
-       addl rx = 2, r0;                                \
-       addl ry = 3, r0;                                \
-       ;;                                              \
-       mov rx = cpuid[rx];                             \
-       mov ry = cpuid[ry];                             \
-       ;;                                              \
-       cmp.eq pn,p0 = 0, rx;                           \
-       ;;                                              \
-       (pn) movl rx = 0x7000004 ;                      \
-       ;;                                              \
-       (pn) cmp.ge pn,p0 = ry, rx;                     \
-       ;;
-
-//int platform_is_hp_ski(void)
-GLOBAL_ENTRY(platform_is_hp_ski)
-       mov r8 = 0
-       RunningOnHpSki(r3,r9,p8)
-(p8)   mov r8 = 1
-       br.ret.sptk.many b0
-END(platform_is_hp_ski)
-#endif
-
 // Change rr7 to the passed value while ensuring
 // Xen is mapped into the new region.
-//   in0: new rr7 value
-//   in1: Xen virtual address of shared info (to be pinned)
 #define PSR_BITS_TO_CLEAR                                              \
        (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |         \
         IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |        \
-        IA64_PSR_DFL | IA64_PSR_DFH)
+        IA64_PSR_DFL | IA64_PSR_DFH | IA64_PSR_IC)
 // FIXME? Note that this turns off the DB bit (debug)
 #define PSR_BITS_TO_SET        IA64_PSR_BN
 
-//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
+//extern void ia64_new_rr7(unsigned long rid,           /* in0 */
+//                         void *shared_info,           /* in1 */
+//                         void *shared_arch_info,      /* in2 */
+//                         unsigned long shared_info_va, /* in3 */
+//                         unsigned long p_vhpt)        /* in4 */
+//Local usage:
+//  loc0=rp, loc1=ar.pfs, loc2=percpu_paddr, loc3=psr, loc4=ar.rse
+//  loc5=pal_vaddr, loc6=xen_paddr, loc7=shared_archinfo_paddr,
 GLOBAL_ENTRY(ia64_new_rr7)
        // not sure this unwind statement is correct...
        .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
-       alloc loc1 = ar.pfs, 5, 9, 0, 0
+       alloc loc1 = ar.pfs, 5, 8, 0, 0
+       movl loc2=PERCPU_ADDR
 1:     {
-         mov r28  = in0                // copy procedure index
+         mov loc3 = psr                // save psr     
+         mov loc0 = rp                 // save rp
          mov r8   = ip                 // save ip to compute branch
-         mov loc0 = rp                 // save rp
        };;
        .body
-       movl loc2=PERCPU_ADDR
-       ;;
        tpa loc2=loc2                   // grab this BEFORE changing rr7
-       ;;
-       dep loc8=0,in4,60,4
-       ;;
-#if VHPT_ENABLED
-       mov loc6=in3
-       ;;
-       //tpa loc6=loc6                 // grab this BEFORE changing rr7
-       ;;
-#endif
-       mov loc5=in1
-       ;;
-       tpa loc5=loc5                   // grab this BEFORE changing rr7
-       ;;
-       mov loc7=in2                    // arch_vcpu_info_t
-       ;;
-       tpa loc7=loc7                   // grab this BEFORE changing rr7
-       ;;
-       mov loc3 = psr                  // save psr
-       adds r8  = 1f-1b,r8             // calculate return address for call
-       ;;
+       tpa in1=in1                     // grab shared_info BEFORE changing rr7
+       adds r8 = 1f-1b,r8              // calculate return address for call
+       ;;
+       tpa loc7=in2                    // grab arch_vcpu_info BEFORE chg rr7
+       movl r17=PSR_BITS_TO_SET
+       mov loc4=ar.rsc                 // save RSE configuration
+       movl r16=PSR_BITS_TO_CLEAR
+       ;; 
        tpa r8=r8                       // convert rp to physical
-       ;;
-       mov loc4=ar.rsc                 // save RSE configuration
-       ;;
        mov ar.rsc=0                    // put RSE in enforced lazy, LE mode
-       movl r16=PSR_BITS_TO_CLEAR
-       movl r17=PSR_BITS_TO_SET
-       ;;
        or loc3=loc3,r17                // add in psr the bits to set
        ;;
        andcm r16=loc3,r16              // removes bits to clear from psr
+       dep loc6=0,r8,0,KERNEL_TR_PAGE_SHIFT // Xen code paddr
        br.call.sptk.many rp=ia64_switch_mode_phys
 1:
        // now in physical mode with psr.i/ic off so do rr7 switch
-       dep     r16=-1,r0,61,3
-       ;;
+       movl r16=pal_vaddr              // Note: belong to region 7!
+       ;; 
        mov     rr[r16]=in0
+       ;; 
        srlz.d
-       ;;
+       dep     r16=0,r16,60,4          // Get physical address.
+       ;;
+       ld8 loc5=[r16]                  // read pal_vaddr
+       movl    r26=PAGE_KERNEL
+       ;; 
 
        // re-pin mappings for kernel text and data
-       mov r18=KERNEL_TR_PAGE_SHIFT<<2
+       mov r24=KERNEL_TR_PAGE_SHIFT<<2
        movl r17=KERNEL_START
        ;;
-       rsm psr.i | psr.ic
-       ;;
-       srlz.i
-       ;;
-       ptr.i   r17,r18
-       ptr.d   r17,r18
-       ;;
-       mov cr.itir=r18
+       ptr.i   r17,r24
+       ptr.d   r17,r24
+       mov r16=IA64_TR_KERNEL
+       mov cr.itir=r24
        mov cr.ifa=r17
-       mov r16=IA64_TR_KERNEL
-       //mov r3=ip
-       movl r18=PAGE_KERNEL
-       ;;
-       dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
-       ;;
-       or r18=r2,r18
-       ;;
-       srlz.i
+       or r18=loc6,r26
        ;;
        itr.i itr[r16]=r18
-       ;;
+       ;; 
        itr.d dtr[r16]=r18
-       ;;
-
-       // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
+
+       // re-pin mappings for stack (current)
 
        // unless overlaps with KERNEL_TR
        dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
        ;;
        cmp.eq p7,p0=r17,r18
 (p7)   br.cond.sptk    .stack_overlaps
-       ;;
-       movl r25=PAGE_KERNEL
+       mov r25=IA64_GRANULE_SHIFT<<2
        dep r21=0,r13,60,4              // physical address of "current"
        ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r25=IA64_GRANULE_SHIFT<<2
-       ;;
        ptr.d   r13,r25
-       ;;
+       or r23=r21,r26                  // construct PA | page properties
        mov cr.itir=r25
        mov cr.ifa=r13                  // VA of next task...
-       ;;
-       mov r25=IA64_TR_CURRENT_STACK
+       mov r21=IA64_TR_CURRENT_STACK
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+
+       //  Per-cpu     
+.stack_overlaps:
+       mov r24=PERCPU_PAGE_SHIFT<<2
+       movl r22=PERCPU_ADDR
+       ;;
+       ptr.d   r22,r24
+       or r23=loc2,r26                 // construct PA | page properties
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       mov r25=IA64_TR_PERCPU_DATA
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-.stack_overlaps:
-
-       movl r22=PERCPU_ADDR
-       ;;
-       movl r25=PAGE_KERNEL
-       ;;
-       mov r21=loc2                    // saved percpu physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PERCPU_PAGE_SHIFT<<2
+
+       // VHPT
+#if VHPT_ENABLED
+       mov r24=VHPT_SIZE_LOG2<<2
+       movl r22=VHPT_ADDR
+       mov r21=IA64_TR_VHPT
        ;;
        ptr.d   r22,r24
-       ;;
+       or r23=in4,r26                  // construct PA | page properties
        mov cr.itir=r24
        mov cr.ifa=r22
        ;;
-       mov r25=IA64_TR_PERCPU_DATA
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-
-#if VHPT_ENABLED
-       movl r22=VHPT_ADDR
-       ;;
-       movl r25=PAGE_KERNEL
-       ;;
-       mov r21=loc6                    // saved vhpt physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=VHPT_SIZE_LOG2<<2
+       itr.d dtr[r21]=r23              // wire in new mapping...
+#endif
+
+       //  Shared info
+       mov r24=PAGE_SHIFT<<2
+       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+       ;;
+       ptr.d   in3,r24
+       or r23=in1,r25                  // construct PA | page properties
+       mov cr.itir=r24
+       mov cr.ifa=in3
+       mov r21=IA64_TR_SHARED_INFO
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+       
+       // Map for arch_vcpu_info_t
+       movl r22=XSI_OFS
+       mov r24=PAGE_SHIFT<<2
+       ;; 
+       add r22=r22,in3
        ;;
        ptr.d   r22,r24
-       ;;
+       or r23=loc7,r25                 // construct PA | page properties
        mov cr.itir=r24
        mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_VHPT
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-#endif
-
-       movl r22=SHAREDINFO_ADDR
-       ;;
-       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
-       ;;
-       mov r21=loc5                    // saved sharedinfo physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PAGE_SHIFT<<2
-       ;;
-       ptr.d   r22,r24
-       ;;
-       mov cr.itir=r24
-       mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_SHARED_INFO
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-       // Map for arch_vcpu_info_t
-       movl r22=SHARED_ARCHINFO_ADDR
-       ;;
-       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
-       ;;
-       mov r21=loc7                    // saved sharedinfo physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PAGE_SHIFT<<2
-       ;;
-       ptr.d   r22,r24
-       ;;
-       mov cr.itir=r24
-       mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_ARCH_INFO
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-
-       //Purge/insert PAL TR
+       mov r21=IA64_TR_ARCH_INFO
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+
+       // Purge/insert PAL TR
        mov r24=IA64_TR_PALCODE
-       movl r25=PAGE_KERNEL
-       ;;
-       or loc8=r25,loc8
        mov r23=IA64_GRANULE_SHIFT<<2
-       ;;
-       ptr.i   in4,r23
-       ;;
+       dep r25=0,loc5,60,4             // convert pal vaddr to paddr
+       ;;
+       ptr.i   loc5,r23
+       or r25=r25,r26          // construct PA | page properties
        mov cr.itir=r23
-       mov cr.ifa=in4
-       ;;
-       itr.i itr[r24]=loc8
-       ;;
+       mov cr.ifa=loc5
+       ;;
+       itr.i itr[r24]=r25
 
        // done, switch back to virtual and return
        mov r16=loc3                    // r16= original psr
@@ -261,6 +179,7 @@ 1:
        br.ret.sptk.many rp
 END(ia64_new_rr7)
 
+#if 0 /* Not used */
 #include "minstate.h"
 
 GLOBAL_ENTRY(ia64_prepare_handle_privop)
@@ -301,6 +220,7 @@ GLOBAL_ENTRY(ia64_prepare_handle_reflect
        DO_LOAD_SWITCH_STACK
        br.cond.sptk.many rp                    // goes to ia64_leave_kernel
 END(ia64_prepare_handle_reflection)
+#endif
 
 GLOBAL_ENTRY(__get_domain_bundle)
        EX(.failure_in_get_bundle,ld8 r8=[r32],8)
@@ -331,80 +251,9 @@ GLOBAL_ENTRY(dorfirfi)
         mov cr.ipsr=r17
         mov cr.ifs=r18
        ;;
-        // fall through
+        rfi
+       ;;
 END(dorfirfi)
-
-GLOBAL_ENTRY(dorfi)
-        rfi
-       ;;
-END(dorfirfi)
-
-//
-// Long's Peak UART Offsets
-//
-#define COM_TOP 0xff5e0000
-#define COM_BOT 0xff5e2000
-
-// UART offsets        
-#define UART_TX                0       /* Out: Transmit buffer (DLAB=0) */
-#define UART_INT_ENB   1       /* interrupt enable (DLAB=0) */ 
-#define UART_INT_ID    2       /* Interrupt ID register */
-#define UART_LINE_CTL  3       /* Line control register */
-#define UART_MODEM_CTL 4       /* Modem Control Register */
-#define UART_LSR       5       /* In:  Line Status Register */
-#define UART_MSR       6       /* Modem status register */     
-#define UART_DLATCH_LOW UART_TX
-#define UART_DLATCH_HIGH UART_INT_ENB
-#define COM1   0x3f8
-#define COM2   0x2F8
-#define COM3   0x3E8
-
-/* interrupt enable bits (offset 1) */
-#define DATA_AVAIL_INT 1
-#define XMIT_HOLD_EMPTY_INT 2
-#define LINE_STAT_INT 4
-#define MODEM_STAT_INT 8
-
-/* line status bits (offset 5) */
-#define REC_DATA_READY 1
-#define OVERRUN 2
-#define PARITY_ERROR 4
-#define FRAMING_ERROR 8
-#define BREAK_INTERRUPT 0x10
-#define XMIT_HOLD_EMPTY 0x20
-#define XMIT_SHIFT_EMPTY 0x40
-
-// Write a single character
-// input: r32 = character to be written
-// output: none
-GLOBAL_ENTRY(longs_peak_putc)  
-       rsm psr.dt
-        movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
-       ;;
-       srlz.i
-       ;;
-
-.Chk_THRE_p:
-        ld1.acq r18=[r16]
-        ;;
-       
-       and r18 = XMIT_HOLD_EMPTY, r18
-       ;;
-       cmp4.eq p6,p0=0,r18
-       ;;
-       
-(p6)    br .Chk_THRE_p
-       ;;
-        movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
-       ;;
-       st1.rel [r16]=r32
-       ;;
-       ssm psr.dt
-       ;;
-       srlz.i
-       ;;
-       br.ret.sptk.many b0
-END(longs_peak_putc)   
 
 /* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
 GLOBAL_ENTRY(pal_emulator_static)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenmisc.c       Tue May 30 14:30:34 2006 -0500
@@ -267,6 +267,9 @@ void context_switch(struct vcpu *prev, s
            vmx_load_state(next);
     /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
     prev = ia64_switch_to(next);
+
+    //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
+
     if (!VMX_DOMAIN(current)){
            vcpu_set_next_timer(current);
     }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xensetup.c      Tue May 30 14:30:34 2006 -0500
@@ -415,8 +415,7 @@ printk("About to call domain_create()\n"
     printk("About to call construct_dom0()\n");
     dom0_memory_start = (unsigned long) __va(initial_images_start);
     dom0_memory_size = ia64_boot_param->domain_size;
-    dom0_initrd_start = (unsigned long) __va(initial_images_start +
-                            PAGE_ALIGN(ia64_boot_param->domain_size));
+    dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
     dom0_initrd_size = ia64_boot_param->initrd_size;
  
     if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_size,
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/dom0_ops.c   Tue May 30 14:30:34 2006 -0500
@@ -404,27 +404,6 @@ long arch_do_dom0_op(struct dom0_op *op,
     }
     break;
 
-    case DOM0_PHYSICAL_MEMORY_MAP:
-    {
-        struct dom0_memory_map_entry entry;
-        int i;
-
-        for ( i = 0; i < e820.nr_map; i++ )
-        {
-            if ( i >= op->u.physical_memory_map.max_map_entries )
-                break;
-            entry.start  = e820.map[i].addr;
-            entry.end    = e820.map[i].addr + e820.map[i].size;
-            entry.is_ram = (e820.map[i].type == E820_RAM);
-            (void)copy_to_guest_offset(
-                op->u.physical_memory_map.memory_map, i, &entry, 1);
-        }
-
-        op->u.physical_memory_map.nr_map_entries = i;
-        (void)copy_to_guest(u_dom0_op, op, 1);
-    }
-    break;
-
     case DOM0_HYPERCALL_INIT:
     {
         struct domain *d; 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain.c     Tue May 30 14:30:34 2006 -0500
@@ -146,6 +146,8 @@ struct vcpu *alloc_vcpu_struct(struct do
     v->arch.guest_vl4table = __linear_l4_table;
 #endif
 
+    pae_l3_cache_init(&v->arch.pae_l3_cache);
+
     return v;
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain_build.c       Tue May 30 14:30:34 2006 -0500
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
-    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+    /*
+     * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+     * mapping covers the allocation.
+     */
     if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/hvm.c    Tue May 30 14:30:34 2006 -0500
@@ -185,11 +185,16 @@ void hvm_setup_platform(struct domain* d
 void hvm_setup_platform(struct domain* d)
 {
     struct hvm_domain *platform;
-
-    if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+    struct vcpu *v=current;
+
+    if ( !hvm_guest(v) || (v->vcpu_id != 0) )
         return;
 
-    shadow_direct_map_init(d);
+    if ( shadow_direct_map_init(d) == 0 )
+    {
+        printk("Can not allocate shadow direct map for HVM domain.\n");
+        domain_crash_synchronous();
+    }
 
     hvm_map_io_shared_page(d);
     hvm_get_info(d);
@@ -204,7 +209,8 @@ void hvm_setup_platform(struct domain* d
         hvm_vioapic_init(d);
     }
 
-    pit_init(&platform->vpit, current);
+    init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, 
v->processor);
+    pit_init(v, cpu_khz);
 }
 
 void pic_irq_request(void *data, int level)
@@ -234,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
         } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
         do_pic_irqs(pic, irqs);
     }
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    u64    host_tsc;
+    
+    rdtscll(host_tsc);
+    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/i8254.c  Tue May 30 14:30:34 2006 -0500
@@ -22,11 +22,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
  * Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
  * move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
  */
 
 #include <xen/config.h>
@@ -42,184 +41,117 @@
 #include <asm/hvm/vpit.h>
 #include <asm/current.h>
 
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
 
 #define RW_STATE_LSB 1
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
 
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP 
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
-    /* TODO: add pause/unpause support */
-    return NOW();
+#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif            
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+    uint64_t  gtsc;
+    gtsc = hvm_get_guest_time(v);
+    return gtsc;
 }
 
 static int pit_get_count(PITChannelState *s)
 {
-    u64 d;
-    u64 counter;
-
-    d = hvm_get_clock() - s->count_load_time;
+    uint64_t d;
+    int  counter;
+
+    d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     case 0:
     case 1:
     case 4:
     case 5:
-        counter = (s->period - d) & 0xffff;
+        counter = (s->count - d) & 0xffff;
         break;
     case 3:
         /* XXX: may be incorrect for odd counts */
-        counter = s->period - ((2 * d) % s->period);
+        counter = s->count - ((2 * d) % s->count);
         break;
     default:
-        /* mod 2 counter handle */
-        d = hvm_get_clock() - s->hvm_time->count_point;
-        d += s->hvm_time->count_advance;
-        counter = s->period - (d % s->period);
-        break;
-    }
-    /* change from ns to pit counter */
-    counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+        counter = s->count - (d % s->count);
+        break;
+    }
     return counter;
 }
 
 /* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
-    u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+    uint64_t d;
     int out;
 
-    d = current_time - s->count_load_time;
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     default:
     case 0:
-        out = (d >= s->period);
+        out = (d >= s->count);
         break;
     case 1:
-        out = (d < s->period);
+        out = (d < s->count);
         break;
     case 2:
-        /* mod2 out is no meaning, since intr are generated in background */
-        if ((d % s->period) == 0 && d != 0)
+        if ((d % s->count) == 0 && d != 0)
             out = 1;
         else
             out = 0;
         break;
     case 3:
-        out = (d % s->period) < ((s->period + 1) >> 1);
+        out = (d % s->count) < ((s->count + 1) >> 1);
         break;
     case 4:
     case 5:
-        out = (d == s->period);
+        out = (d == s->count);
         break;
     }
     return out;
 }
 
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
 {
     PITChannelState *s = &pit->channels[channel];
     return pit_get_out1(s, current_time);
 }
 
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
-    struct hvm_time_info *hvm_time = s->hvm_time;
-    struct domain *d = (void *) s - 
-        offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
-    /* ticks from current time(expected time) to NOW */ 
-    int missed_ticks;
-    /* current_time is expected time for next intr, check if it's true
-     * (actimer has a TIMER_SLOP in advance)
-     */
-    s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
-    if (missed_time >= 0) {
-        missed_ticks = missed_time/(s_time_t)s->period + 1;
-        if (test_bit(_DOMF_debugging, &d->domain_flags)) {
-            hvm_time->pending_intr_nr++;
-        } else {
-            hvm_time->pending_intr_nr += missed_ticks;
-        }
-        s->next_transition_time = current_time + (missed_ticks ) * s->period;
-    }
-
-    return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- *  -2: init state
- *  -1: the mode has expired
- *   0: current VCPU is not running
- *  >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s, 
-                                            s64 current_time)
-{
-    s64 d, next_time, base;
-    int period2;
-    struct hvm_time_info *hvm_time = s->hvm_time;
-
-    d = current_time - s->count_load_time;
-    switch(s->mode) {
-    default:
-    case 0:
-    case 1:
-        if (d < s->period)
-            next_time = s->period;
-        else
-            return -1;
-        break;
-    case 2:
-        next_time = missed_ticks(s, current_time);
-        if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
-            return 0;
-        break;
-    case 3:
-        base = (d / s->period) * s->period;
-        period2 = ((s->period + 1) >> 1);
-        if ((d - base) < period2) 
-            next_time = base + period2;
-        else
-            next_time = base + s->period;
-        break;
-    case 4:
-    case 5:
-        if (d < s->period)
-            next_time = s->period;
-        else if (d == s->period)
-            next_time = s->period + 1;
-        else
-            return -1;
-        break;
-    case 0xff:
-        return -2;      /* for init state */ 
-        break;
-    }
-    /* XXX: better solution: use a clock at PIT_FREQ Hz */
-    if (next_time <= current_time){
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:next_time <= current_time. next=0x%llx, 
current=0x%llx!\n",next_time, current_time);
-#endif
-        next_time = current_time + 1;
-    }
-    return next_time;
-}
-
 /* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
 {
     PITChannelState *s = &pit->channels[channel];
 
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
     case 5:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         break;
     case 2:
     case 3:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         /* XXX: disable/enable counting */
         break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
     s->gate = val;
 }
 
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
 {
     PITChannelState *s = &pit->channels[channel];
     return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
 
 static inline void pit_load_count(PITChannelState *s, int val)
 {
+    u32   period;
     if (val == 0)
         val = 0x10000;
-
-    s->count_load_time = hvm_get_clock();
+    s->count_load_time = hvm_get_clock(s->vcpu);
     s->count = val;
-    s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
 
 #ifdef DEBUG_PIT
-    printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, 
load_time=%lld\n",
+    printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, 
load_time=%lld\n",
+            s,
             val,
-            s->period / 1000,
+            period,
             s->mode,
-            s->count_load_time);
+            (long long)s->count_load_time);
 #endif
 
-    if (s->mode == HVM_PIT_ACCEL_MODE) {
-        if (!s->hvm_time) {
-            printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
-            return;
-        }
-        s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
-        s->hvm_time->first_injected = 0;
-
-        if (s->period < 900000) { /* < 0.9 ms */
-            printk("HVM_PIT: guest programmed too small an count: %x\n",
-                    s->count);
-            s->period = 1000000;
-        }
-    }
-        
-    pit_irq_timer_update(s, s->count_load_time);
+    switch (s->mode) {
+        case 2:
+            /* create periodic time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+            break;
+        case 1:
+            /* create one shot time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+            printk("HVM_PIT: create one shot time.\n");
+#endif
+            break;
+        default:
+            break;
+    }
 }
 
 /* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
     }
 }
 
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
-    hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PITState *pit = opaque;
     int channel, access;
     PITChannelState *s;
     val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
                     if (!(val & 0x10) && !s->status_latched) {
                         /* status latch */
                         /* XXX: add BCD and null count */
-                        s->status =  (pit_get_out1(s, hvm_get_clock()) << 7) |
+                        s->status =  (pit_get_out1(s, hvm_get_clock(s->vcpu)) 
<< 7) |
                             (s->rw_mode << 4) |
                             (s->mode << 1) |
                             s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
     }
 }
 
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
-    hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+    PITState *pit = opaque;
     int ret, count;
     PITChannelState *s;
     
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
     return ret;
 }
 
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
-    s64 expire_time;
-    int irq_level;
-    struct vcpu *v = current;
-    struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
-    if (!s->hvm_time || s->mode == 0xff)
-        return;
-
-    expire_time = pit_get_next_transition_time(s, current_time);
-    /* not generate intr by direct pic_set_irq in mod 2
-     * XXX:mod 3 should be same as mod 2
-     */
-    if (s->mode != HVM_PIT_ACCEL_MODE) {
-        irq_level = pit_get_out1(s, current_time);
-        pic_set_irq(pic, s->irq, irq_level);
-        s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
-                irq_level, 
-                (expire_time - current_time));
-#endif
-    }
-
-    if (expire_time > 0)
-        set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
-    PITChannelState *s = data;
-
-    pit_irq_timer_update(s, s->next_transition_time);
-}
-
 static void pit_reset(void *opaque)
 {
-    hvm_virpit *pit = opaque;
+    PITState *pit = opaque;
     PITChannelState *s;
     int i;
 
     for(i = 0;i < 3; i++) {
         s = &pit->channels[i];
+        if ( s -> pt ) {
+            destroy_periodic_time (s->pt);
+            s->pt = NULL;
+        }
         s->mode = 0xff; /* the init mode */
         s->gate = (i != 2);
         pit_load_count(s, 0);
     }
 }
 
-/* hvm_io_assist light-weight version, specific to PIT DM */ 
-static void resume_pit_io(ioreq_t *p)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax = regs->eax;
-    p->state = STATE_INVALID;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        BUG();
-    }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+    PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+    PITChannelState *s;
+
+    s = &pit->channels[0];
+    /* the timer 0 is connected to an IRQ */
+    s->vcpu = v;
+    s++; s->vcpu = v;
+    s++; s->vcpu = v;
+
+    register_portio_handler(PIT_BASE, 4, handle_pit_io);
+    /* register the speaker port */
+    register_portio_handler(0x61, 1, handle_speaker_io);
+    ticks_per_sec(v) = cpu_khz * (int64_t)1000; 
+#ifdef DEBUG_PIT
+    printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+    pit_reset(pit);
+    return;
 }
 
 /* the intercept action for PIT DM retval:0--not handled; 1--handled */  
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
     if (p->dir == 0) {/* write */
         pit_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) { /* read */
-        p->u.data = pit_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
-    }
-
-    /* always return 1, since PIT sit in HV now */
+        if ( (p->addr & 3) != 3 ) {
+            p->u.data = pit_ioport_read(vpit, p->addr);
+        } else {
+            printk("HVM_PIT: read A1:A0=3!\n");
+        }
+    }
     return 1;
 }
 
 static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
-    hvm_virpit *pit = opaque;
-    val &= 0xff;
+    PITState *pit = opaque;
     pit->speaker_data_on = (val >> 1) & 1;
     pit_set_gate(pit, 2, val & 1);
 }
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
 static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
 {
     int out;
-    hvm_virpit *pit = opaque;
-    out = pit_get_out(pit, 2, hvm_get_clock());
+    PITState *pit = opaque;
+    out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
     pit->dummy_refresh_clock ^= 1;
 
     return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
       (pit->dummy_refresh_clock << 4);
 }
 
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
         speaker_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) {/* read */
         p->u.data = speaker_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
     }
 
     return 1;
 }
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
-    s64 next_time;
-    PITChannelState *s = &(vpit->channels[0]);
-    if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
-        next_time = pit_get_next_transition_time(s, s->next_transition_time); 
-        if (next_time >= 0)
-            set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-    }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
-    PITChannelState *s;
-    struct hvm_time_info *hvm_time;
-
-    s = &pit->channels[0];
-    /* the timer 0 is connected to an IRQ */
-    s->irq = 0;
-    /* channel 0 need access the related time info for intr injection */
-    hvm_time = s->hvm_time = &pit->time_info;
-    hvm_time->vcpu = v;
-
-    init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
-    register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
-    /* register the speaker port */
-    register_portio_handler(0x61, 1, handle_speaker_io);
-
-    pit_reset(pit);
-
-    return;
-
-}
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/intercept.c      Tue May 30 14:30:34 2006 -0500
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
     evtchn_set_pending(v, iopacket_port(v));
 }
 
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+    int missed_ticks;
+
+    missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+    if ( missed_ticks++ >= 0 ) {
+        if ( missed_ticks > 1000 ) {
+            /* TODO: Adjust guest time togther */
+            pt->pending_intr_nr ++;
+        }
+        else {
+            pt->pending_intr_nr += missed_ticks;
+        }
+        pt->scheduled += missed_ticks * pt->period;
+    }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    /* pick up missed timer tick */
+    missed_ticks(pt);
+    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+    if ( !active_timer(&(pt->timer)) ) {
+        missed_ticks(pt);
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+        struct vcpu *v, 
+        u32 period, 
+        char irq,
+        char one_shot)
+{
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+    if ( pt->enabled ) {
+        if ( v->vcpu_id != 0 ) {
+            printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+        }
+        stop_timer (&pt->timer);
+        pt->enabled = 0;
+    }
+    pt->pending_intr_nr = 0;
+    pt->first_injected = 0;
+    if (period < 900000) { /* < 0.9 ms */
+        printk("HVM_PlatformTime: program too small period %u\n",period);
+        period = 900000;   /* force to 0.9ms */
+    }
+    pt->period = period;
+    pt->irq = irq;
+    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->one_shot = one_shot;
+    if ( one_shot ) {
+        printk("HVM_PL: No support for one shot platform time yet\n");
+    }
+    pt->scheduled = NOW() + period;
+    set_timer (&pt->timer,pt->scheduled);
+    pt->enabled = 1;
+    return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+    if ( pt->enabled ) {
+        stop_timer(&pt->timer);
+        pt->enabled = 0;
+    }
+}
 
 /*
  * Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/intr.c       Tue May 30 14:30:34 2006 -0500
@@ -44,45 +44,33 @@
  */
 #define BSP_CPU(v)    (!(v->vcpu_id))
 
-u64 svm_get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void svm_set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct  periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = svm_get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            svm_set_guest_time(v, pt->last_plt_gtime);
         }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        svm_set_guest_time(v, time_info->last_pit_gtime);
     }
 
     switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
-    struct hvm_virpit *vpit = &plat->vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     int intr_type = VLAPIC_DELIV_MODE_EXT;
     int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
       if ( cpu_has_pending_irq(v) ) {
            intr_vector = cpu_get_interrupt(v, &intr_type);
       }
-      else  if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-          pic_set_irq(pic, 0, 0);
-          pic_set_irq(pic, 0, 1);
+      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
           intr_vector = cpu_get_interrupt(v, &intr_type);
       }
     }
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
             /* Re-injecting a PIT interruptt? */
             if (re_injecting && 
                 is_pit_irq(v, intr_vector, intr_type)) {
-                    ++time_info->pending_intr_nr;
+                    ++pt->pending_intr_nr;
             }
             /* let's inject this interrupt */
             TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue May 30 14:30:34 2006 -0500
@@ -51,13 +51,6 @@
 
 #define SVM_EXTRA_DEBUG
 
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
 /* Useful define */
 #define MAX_INST_SIZE  15
 
@@ -458,6 +451,9 @@ int start_svm(void)
     
     if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
         return 0;
+    svm_globals[cpu].hsa = alloc_host_save_area();
+    if (! svm_globals[cpu].hsa)
+        return 0;
     
     rdmsr(MSR_EFER, eax, edx);
     eax |= EFER_SVME;
@@ -466,7 +462,6 @@ int start_svm(void)
     printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
 
     /* Initialize the HSA for this core */
-    svm_globals[cpu].hsa = alloc_host_save_area();
     phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa ); 
     phys_hsa_lo = (u32) phys_hsa;
     phys_hsa_hi = (u32) (phys_hsa >> 32);    
@@ -670,12 +665,11 @@ static void arch_svm_do_launch(struct vc
 
 static void svm_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -752,7 +746,7 @@ static void svm_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -782,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
 
 void svm_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer( &pt->timer, v->processor );
+        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC( v ))
         migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
 }
@@ -814,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
 
         handle_mmio(va, va);
-        TRACE_VMEXIT(2,2);
         return 1;
     }
 
@@ -840,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
         }
 
-        TRACE_VMEXIT (2,2);
         handle_mmio(va, gpa);
 
         return 1;
@@ -852,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
         /* Let's make sure that the Guest TLB is flushed */
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
-
-    TRACE_VMEXIT (2,result);
 
     return result;
 }
@@ -1899,14 +1891,8 @@ static inline void svm_do_msr_access(str
         regs->edx = 0;
         switch (regs->ecx) {
         case MSR_IA32_TIME_STAMP_COUNTER:
-        {
-            struct hvm_time_info *time_info;
-
-            rdtscll(msr_content);
-            time_info = &v->domain->arch.hvm_domain.vpit.time_info;
-            msr_content += time_info->cache_tsc_offset;
+            msr_content = hvm_get_guest_time(v);
             break;
-        }
         case MSR_IA32_SYSENTER_CS:
             msr_content = vmcb->sysenter_cs;
             break;
@@ -1973,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     s_time_t  next_pit = -1, next_wakeup;
 
     __update_guest_eip(vmcb, 1);
@@ -1983,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
        return; 
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v, vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue May 30 14:30:34 2006 -0500
@@ -139,17 +139,20 @@ static int construct_vmcb_controls(struc
 
     /* The following is for I/O and MSR permision map */
     iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
-
-    ASSERT(iopm);
-    memset(iopm, 0xff, IOPM_SIZE);
-    clear_bit(PC_DEBUG_PORT, iopm);
+    if (iopm)
+    {
+        memset(iopm, 0xff, IOPM_SIZE);
+        clear_bit(PC_DEBUG_PORT, iopm);
+    }
     msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
-
-    ASSERT(msrpm);
-    memset(msrpm, 0xff, MSRPM_SIZE);
+    if (msrpm)
+        memset(msrpm, 0xff, MSRPM_SIZE);
 
     arch_svm->iopm = iopm;
     arch_svm->msrpm = msrpm;
+
+    if (! iopm || ! msrpm)
+        return 1;
 
     vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm);
     vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm);
@@ -439,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
 void svm_do_resume(struct vcpu *v) 
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
 
     svm_stts(v);
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            time_info->count_point = NOW();
-            v->domain->arch.hvm_domain.guest_time = 0;
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
         }
-        pickup_deactive_ticks(vpit);
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/io.c Tue May 30 14:30:34 2006 -0500
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64  offset)
 #endif
 }
 
-u64 get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    __set_tsc_offset(time_info->cache_tsc_offset);
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
-        }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        set_guest_time(v, time_info->last_pit_gtime);
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            set_guest_time(v, pt->last_plt_gtime);
+        }
     }
 
     switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
     unsigned long eflags;
     struct vcpu *v = current;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain;
-    struct hvm_time_info *time_info = &plat->vpit.time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
     if ( v->vcpu_id == 0 )
         hvm_pic_assist(v);
 
-    if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-        pic_set_irq(pic, 0, 0);
-        pic_set_irq(pic, 0, 1);
+    if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+        pic_set_irq(pic, pt->irq, 0);
+        pic_set_irq(pic, pt->irq, 1);
     }
 
     has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
 void vmx_do_resume(struct vcpu *v)
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
 
     vmx_stts();
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            time_info->count_point = NOW();
-            set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            v->domain->arch.hvm_domain.guest_time = 0;
-        }
-        pickup_deactive_ticks(vpit);
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
+        }
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue May 30 14:30:34 2006 -0500
@@ -47,7 +47,7 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
 
 static void vmx_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
 
 void vmx_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer(&pt->timer, v->processor);
+        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC(v))
         migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
 }
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
                 (unsigned long)regs->edx);
     switch (regs->ecx) {
     case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct hvm_time_info *time_info;
-
-        rdtscll(msr_content);
-        time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
-        msr_content += time_info->cache_tsc_offset;
-        break;
-    }
+        msr_content = hvm_get_guest_time(v);
+        break;
     case MSR_IA32_SYSENTER_CS:
         __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
         break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
 void vmx_vmexit_do_hlt(void)
 {
     struct vcpu *v=current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     s_time_t   next_pit=-1,next_wakeup;
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v,vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/mm.c Tue May 30 14:30:34 2006 -0500
@@ -260,9 +260,82 @@ void share_xen_page_with_privileged_gues
     share_xen_page_with_guest(page, dom_xen, readonly);
 }
 
+#if defined(CONFIG_X86_PAE)
+
+#ifdef NDEBUG
+/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
+#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#else
+/* In debug builds we aggressively shadow PDPTs to exercise code paths. */
+#define l3tab_needs_shadow(mfn) ((mfn << PAGE_SHIFT) != __pa(idle_pg_table))
+#endif
+
+static l1_pgentry_t *fix_pae_highmem_pl1e;
+
+/* Cache the address of PAE high-memory fixmap page tables. */
+static int __init cache_pae_fixmap_address(void)
+{
+    unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
+    l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
+    fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
+    return 0;
+}
+__initcall(cache_pae_fixmap_address);
+
+static void __write_ptbase(unsigned long mfn)
+{
+    l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+    struct pae_l3_cache *cache = &current->arch.pae_l3_cache;
+    unsigned int cpu = smp_processor_id();
+
+    /* Fast path 1: does this mfn need a shadow at all? */
+    if ( !l3tab_needs_shadow(mfn) )
+    {
+        write_cr3(mfn << PAGE_SHIFT);
+        return;
+    }
+
+    /* Caching logic is not interrupt safe. */
+    ASSERT(!in_irq());
+
+    /* Fast path 2: is this mfn already cached? */
+    if ( cache->high_mfn == mfn )
+    {
+        write_cr3(__pa(cache->table[cache->inuse_idx]));
+        return;
+    }
+
+    /* Protects against pae_flush_pgd(). */
+    spin_lock(&cache->lock);
+
+    cache->inuse_idx ^= 1;
+    cache->high_mfn   = mfn;
+
+    /* Map the guest L3 table and copy to the chosen low-memory cache. */
+    *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+    highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+    lowmem_l3tab  = cache->table[cache->inuse_idx];
+    memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
+    *(fix_pae_highmem_pl1e - cpu) = l1e_empty();
+
+    /* Install the low-memory L3 table in CR3. */
+    write_cr3(__pa(lowmem_l3tab));
+
+    spin_unlock(&cache->lock);
+}
+
+#else /* !CONFIG_X86_PAE */
+
+static void __write_ptbase(unsigned long mfn)
+{
+    write_cr3(mfn << PAGE_SHIFT);
+}
+
+#endif /* !CONFIG_X86_PAE */
+
 void write_ptbase(struct vcpu *v)
 {
-    write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
 }
 
 void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +474,7 @@ static int get_page_and_type_from_pagenr
     return 1;
 }
 
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -456,6 +530,7 @@ get_linear_pagetable(
 
     return 1;
 }
+#endif /* !CONFIG_X86_PAE */
 
 int
 get_page_from_l1e(
@@ -564,10 +639,6 @@ get_page_from_l3e(
     rc = get_page_and_type_from_pagenr(
         l3e_get_pfn(l3e),
         PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
-    if ( unlikely(!rc) )
-        rc = get_linear_pagetable(l3e, pfn, d);
-#endif
     return rc;
 }
 #endif /* 3 level */
@@ -773,6 +844,41 @@ static int create_pae_xen_mappings(l3_pg
     return 1;
 }
 
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+    unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+    struct domain *d = page_get_owner(mfn_to_page(mfn));
+    struct vcpu   *v;
+    intpte_t       _ol3e, _nl3e, _pl3e;
+    l3_pgentry_t  *l3tab_ptr;
+    struct pae_l3_cache *cache;
+
+    /* If below 4GB then the pgdir is not shadowed in low memory. */
+    if ( !l3tab_needs_shadow(mfn) )
+        return;
+
+    for_each_vcpu ( d, v )
+    {
+        cache = &v->arch.pae_l3_cache;
+
+        spin_lock(&cache->lock);
+
+        if ( cache->high_mfn == mfn )
+        {
+            l3tab_ptr = &cache->table[cache->inuse_idx][idx];
+            _ol3e = l3e_get_intpte(*l3tab_ptr);
+            _nl3e = l3e_get_intpte(nl3e);
+            _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+            BUG_ON(_pl3e != _ol3e);
+        }
+
+        spin_unlock(&cache->lock);
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
 {
@@ -787,6 +893,7 @@ static inline int l1_backptr(
 
 #elif CONFIG_X86_64
 # define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
 
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +993,6 @@ static int alloc_l3_table(struct page_in
 
     ASSERT(!shadow_mode_refcounts(d));
 
-#ifdef CONFIG_X86_PAE
-    if ( pfn >= 0x100000 )
-    {
-        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
-        return 0;
-    }
-#endif
-
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
@@ -1240,6 +1339,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     okay = create_pae_xen_mappings(pl3e);
     BUG_ON(!okay);
+
+    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
     put_page_from_l3e(ol3e, pfn);
     return 1;
@@ -2811,6 +2912,8 @@ long do_update_descriptor(u64 pa, u64 de
     return ret;
 }
 
+typedef struct e820entry e820entry_t;
+DEFINE_XEN_GUEST_HANDLE(e820entry_t);
 
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
@@ -2869,6 +2972,39 @@ long arch_memory_op(int op, XEN_GUEST_HA
         break;
     }
 
+    case XENMEM_memory_map:
+    {
+        return -ENOSYS;
+    }
+
+    case XENMEM_machine_memory_map:
+    {
+        struct xen_memory_map memmap;
+        XEN_GUEST_HANDLE(e820entry_t) buffer;
+        int count;
+
+        if ( !IS_PRIV(current->domain) )
+            return -EINVAL;
+
+        if ( copy_from_guest(&memmap, arg, 1) )
+            return -EFAULT;
+        if ( memmap.nr_entries < e820.nr_map + 1 )
+            return -EINVAL;
+
+        buffer = guest_handle_cast(memmap.buffer, e820entry_t);
+
+        count = min((unsigned int)e820.nr_map, memmap.nr_entries);
+        if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+            return -EFAULT;
+
+        memmap.nr_entries = count;
+
+        if ( copy_to_guest(arg, &memmap, 1) )
+            return -EFAULT;
+
+        return 0;
+    }
+
     default:
         return subarch_memory_op(op, arg);
     }
@@ -3074,7 +3210,7 @@ void ptwr_flush(struct domain *d, const 
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
         /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
-        write_cr3(pagetable_get_paddr(
+        __write_ptbase(pagetable_get_pfn(
             d->arch.ptwr[which].vcpu->arch.guest_table));
     else
         TOGGLE_MODE();
@@ -3185,15 +3321,16 @@ static int ptwr_emulated_update(
     /* Turn a sub-word access into a full-word access. */
     if ( bytes != sizeof(paddr_t) )
     {
-        int           rc;
-        paddr_t    full;
-        unsigned int  offset = addr & (sizeof(paddr_t)-1);
+        paddr_t      full;
+        unsigned int offset = addr & (sizeof(paddr_t)-1);
 
         /* Align address; read full word. */
         addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                        sizeof(paddr_t))) )
-            return rc; 
+        if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+        {
+            propagate_page_fault(addr, 4); /* user mode, read fault */
+            return X86EMUL_PROPAGATE_FAULT;
+        }
         /* Mask out bits provided by caller. */
         full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
         /* Shift the caller value and OR in the missing bits. */
@@ -3271,7 +3408,8 @@ static int ptwr_emulated_write(
 static int ptwr_emulated_write(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, 0, val, bytes, 0);
 }
@@ -3280,7 +3418,8 @@ static int ptwr_emulated_cmpxchg(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, old, new, bytes, 1);
 }
@@ -3290,7 +3429,8 @@ static int ptwr_emulated_cmpxchg8b(
     unsigned long old,
     unsigned long old_hi,
     unsigned long new,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( CONFIG_PAGING_LEVELS == 2 )
         return X86EMUL_UNHANDLEABLE;
@@ -3299,7 +3439,7 @@ static int ptwr_emulated_cmpxchg8b(
             addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
 }
 
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
     .read_std           = x86_emulate_read_std,
     .write_std          = x86_emulate_write_std,
     .read_emulated      = x86_emulate_read_std,
@@ -3318,6 +3458,7 @@ int ptwr_do_page_fault(struct domain *d,
     l2_pgentry_t    *pl2e, l2e;
     int              which, flags;
     unsigned long    l2_idx;
+    struct x86_emulate_ctxt emul_ctxt;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         return 0;
@@ -3472,8 +3613,10 @@ int ptwr_do_page_fault(struct domain *d,
     return EXCRET_fault_fixed;
 
  emulate:
-    if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+    emul_ctxt.regs = guest_cpu_user_regs();
+    emul_ctxt.cr2  = addr;
+    emul_ctxt.mode = X86EMUL_MODE_HOST;
+    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
@@ -3596,11 +3739,10 @@ int map_pages_to_xen(
 }
 
 void __set_fixmap(
-    enum fixed_addresses idx, unsigned long p, unsigned long flags)
-{
-    if ( unlikely(idx >= __end_of_fixed_addresses) )
-        BUG();
-    map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
+    enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
+{
+    BUG_ON(idx >= __end_of_fixed_addresses);
+    map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
 }
 
 #ifdef MEMORY_GUARD
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow.c     Tue May 30 14:30:34 2006 -0500
@@ -430,7 +430,8 @@ no_shadow_page:
            perfc_value(shadow_l2_pages),
            perfc_value(hl2_table_pages),
            perfc_value(snapshot_pages));
-    BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+    /* XXX FIXME: try a shadow flush to free up some memory. */
+    domain_crash_synchronous();
 
     return 0;
 }
@@ -3064,7 +3065,8 @@ static inline unsigned long init_bl2(
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
     {
         printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
-        BUG(); /* XXX Deal gracefully with failure. */
+        /* XXX Deal gracefully with failure. */
+        domain_crash_synchronous();
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow32.c   Tue May 30 14:30:34 2006 -0500
@@ -246,7 +246,8 @@ alloc_shadow_page(struct domain *d,
                perfc_value(shadow_l2_pages),
                perfc_value(hl2_table_pages),
                perfc_value(snapshot_pages));
-        BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+        /* XXX FIXME: try a shadow flush to free up some memory. */
+        domain_crash_synchronous();
     }
 
     smfn = page_to_mfn(page);
@@ -983,6 +984,11 @@ alloc_p2m_table(struct domain *d)
     else
     {
         page = alloc_domheap_page(NULL);
+        if (!page)
+        {
+            printk("Alloc p2m table fail\n");
+            domain_crash(d);
+        }
 
         l1tab = map_domain_page(page_to_mfn(page));
         memset(l1tab, 0, PAGE_SIZE);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow_public.c      Tue May 30 14:30:34 2006 -0500
@@ -324,6 +324,11 @@ static void alloc_monitor_pagetable(stru
 
     mmfn_info = alloc_domheap_page(NULL);
     ASSERT( mmfn_info );
+    if (!mmfn_info)
+    {
+        printk("Fail to allocate monitor pagetable\n");
+        domain_crash(v->domain);
+    }
 
     mmfn = page_to_mfn(mmfn_info);
     mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/traps.c      Tue May 30 14:30:34 2006 -0500
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
         }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/x86_emulate.c        Tue May 30 14:30:34 2006 -0500
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (              
 #endif /* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
-   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
-       goto done; \
-   (_eip) += (_size); \
-   (_type)_x; \
+#define insn_fetch(_type, _size, _eip)                                  \
+({ unsigned long _x;                                                    \
+   rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt);       \
+   if ( rc != 0 )                                                       \
+       goto done;                                                       \
+   (_eip) += (_size);                                                   \
+   (_type)_x;                                                           \
 })
 
 /* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
     return p;
 }
 
-int 
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode)
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops)
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
     unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
+    unsigned long cr2 = ctxt->cr2;
+    int mode = ctxt->mode;
 
     /* Shadow copy of register state. Committed on successful emulation. */
-    struct cpu_user_regs _regs = *regs;
+    struct cpu_user_regs _regs = *ctxt->regs;
 
     switch ( mode )
     {
@@ -628,7 +629,7 @@ x86_emulate_memop(
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
              ((rc = ops->read_emulated((unsigned long)dst.ptr,
-                                       &dst.val, dst.bytes)) != 0) )
+                                       &dst.val, dst.bytes, ctxt)) != 0) )
              goto done;
         break;
     }
@@ -670,7 +671,7 @@ x86_emulate_memop(
         src.type  = OP_MEM;
         src.ptr   = (unsigned long *)cr2;
         if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
-                                      &src.val, src.bytes)) != 0 )
+                                      &src.val, src.bytes, ctxt)) != 0 )
             goto done;
         src.orig_val = src.val;
         break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
         if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
         if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
-                                 &dst.val, dst.bytes)) != 0 )
+                                 &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(_regs.esp, dst.bytes);
         break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
-                                         &dst.val, 8)) != 0 )
+                                         &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -dst.bytes);
+            register_address_increment(_regs.esp, -(int)dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
-                                      dst.val, dst.bytes)) != 0 )
+                                      dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
         case OP_MEM:
             if ( lock_prefix )
                 rc = ops->cmpxchg_emulated(
-                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.orig_val,
+                    dst.val, dst.bytes, ctxt);
             else
                 rc = ops->write_emulated(
-                    (unsigned long)dst.ptr, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
             if ( rc != 0 )
                 goto done;
         default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
     }
 
     /* Commit shadow register state. */
-    *regs = _regs;
+    *ctxt->regs = _regs;
 
  done:
     return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
     {
         if ( _regs.ecx == 0 )
         {
-            regs->eip = _regs.eip;
+            ctxt->regs->eip = _regs.eip;
             goto done;
         }
         _regs.ecx--;
-        _regs.eip = regs->eip;
+        _regs.eip = ctxt->regs->eip;
     }
     switch ( b )
     {
@@ -928,20 +930,21 @@ x86_emulate_memop(
             dst.ptr = (unsigned long *)cr2;
             if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
                                                       _regs.esi),
-                                     &dst.val, dst.bytes)) != 0 )
+                                     &dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
             dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
-            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+            if ( (rc = ops->read_emulated(cr2, &dst.val,
+                                          dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)&_regs.eax;
-        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
 #if defined(__i386__)
     {
         unsigned long old_lo, old_hi;
-        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
-             ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
             goto done;
         if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
         {
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
         }
         else
         {
-            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
-                                               _regs.ebx, _regs.ecx)) != 0 )
+            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+                                               _regs.ecx, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
 #elif defined(__x86_64__)
     {
         unsigned long old, new;
-        if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
             goto done;
         if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
              ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
         else
         {
             new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
diff -r e74246451527 -r f54d38cea8ac xen/common/Makefile
--- a/xen/common/Makefile       Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/Makefile       Tue May 30 14:30:34 2006 -0500
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
 obj-y += page_alloc.o
 obj-y += rangeset.o
 obj-y += sched_bvt.o
+obj-y += sched_credit.o
 obj-y += sched_sedf.o
 obj-y += schedule.o
 obj-y += softirq.o
diff -r e74246451527 -r f54d38cea8ac xen/common/acm_ops.c
--- a/xen/common/acm_ops.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/acm_ops.c      Tue May 30 14:30:34 2006 -0500
@@ -32,100 +32,94 @@
 
 #ifndef ACM_SECURITY
 
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
 {
     return -ENOSYS;
 }
 
+
 #else
 
-enum acm_operation {
-    POLICY,                     /* access to policy interface (early drop) */
-    GETPOLICY,                  /* dump policy cache */
-    SETPOLICY,                  /* set policy cache (controls security) */
-    DUMPSTATS,                  /* dump policy statistics */
-    GETSSID,                    /* retrieve ssidref for domain id (decide 
inside authorized domains) */
-    GETDECISION                 /* retrieve ACM decision from authorized 
domains */
-};
-
-int acm_authorize_acm_ops(struct domain *d, enum acm_operation pops)
+
+int acm_authorize_acm_ops(struct domain *d)
 {
     /* currently, policy management functions are restricted to privileged 
domains */
     if (!IS_PRIV(d))
         return -EPERM;
-
     return 0;
 }
 
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
-{
-    long ret = 0;
-    struct acm_op curop, *op = &curop;
-
-    if (acm_authorize_acm_ops(current->domain, POLICY))
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
+{
+    long rc = -EFAULT;
+
+    if (acm_authorize_acm_ops(current->domain))
         return -EPERM;
 
-    if (copy_from_guest(op, u_acm_op, 1))
-        return -EFAULT;
-
-    if (op->interface_version != ACM_INTERFACE_VERSION)
-        return -EACCES;
-
-    switch (op->cmd)
+    switch ( cmd )
     {
-    case ACM_SETPOLICY:
-    {
-        ret = acm_authorize_acm_ops(current->domain, SETPOLICY);
-        if (!ret)
-            ret = acm_set_policy(op->u.setpolicy.pushcache,
-                                 op->u.setpolicy.pushcache_size, 1);
-    }
-    break;
-
-    case ACM_GETPOLICY:
-    {
-        ret = acm_authorize_acm_ops(current->domain, GETPOLICY);
-        if (!ret)
-            ret = acm_get_policy(op->u.getpolicy.pullcache,
-                                 op->u.getpolicy.pullcache_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_DUMPSTATS:
-    {
-        ret = acm_authorize_acm_ops(current->domain, DUMPSTATS);
-        if (!ret)
-            ret = acm_dump_statistics(op->u.dumpstats.pullcache,
-                                      op->u.dumpstats.pullcache_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_GETSSID:
-    {
+
+    case ACMOP_setpolicy: {
+        struct acm_setpolicy setpolicy;
+        if (copy_from_guest(&setpolicy, arg, 1) != 0)
+            return -EFAULT;
+        if (setpolicy.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_set_policy(setpolicy.pushcache,
+                            setpolicy.pushcache_size, 1);
+        break;
+    }
+
+    case ACMOP_getpolicy: {
+        struct acm_getpolicy getpolicy;
+        if (copy_from_guest(&getpolicy, arg, 1) != 0)
+            return -EFAULT;
+        if (getpolicy.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_get_policy(getpolicy.pullcache,
+                            getpolicy.pullcache_size);
+        break;
+    }
+
+    case ACMOP_dumpstats: {
+        struct acm_dumpstats dumpstats;
+        if (copy_from_guest(&dumpstats, arg, 1) != 0)
+            return -EFAULT;
+        if (dumpstats.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_dump_statistics(dumpstats.pullcache,
+                                 dumpstats.pullcache_size);
+        break;
+    }
+
+    case ACMOP_getssid: {
+        struct acm_getssid getssid;
         ssidref_t ssidref;
 
-        ret = acm_authorize_acm_ops(current->domain, GETSSID);
-        if (ret)
-            break;
-
-        if (op->u.getssid.get_ssid_by == SSIDREF)
-            ssidref = op->u.getssid.id.ssidref;
-        else if (op->u.getssid.get_ssid_by == DOMAINID)
-        {
-            struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
-                break;
-            }
-            if (subj->ssid == NULL)
-            {
-                put_domain(subj);
-                ret = -ESRCH;
+        if (copy_from_guest(&getssid, arg, 1) != 0)
+            return -EFAULT;
+        if (getssid.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        if (getssid.get_ssid_by == SSIDREF)
+            ssidref = getssid.id.ssidref;
+        else if (getssid.get_ssid_by == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getssid.id.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
+                break;
+            }
+            if (subj->ssid == NULL)
+            {
+                put_domain(subj);
+                rc = -ESRCH;
                 break;
             }
             ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -133,39 +127,36 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        ret = acm_get_ssid(ssidref,
-                           op->u.getssid.ssidbuf,
-                           op->u.getssid.ssidbuf_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_GETDECISION:
-    {
+            rc = -ESRCH;
+            break;
+        }
+        rc = acm_get_ssid(ssidref, getssid.ssidbuf, getssid.ssidbuf_size);
+        break;
+    }
+
+    case ACMOP_getdecision: {
+        struct acm_getdecision getdecision;
         ssidref_t ssidref1, ssidref2;
 
-        ret = acm_authorize_acm_ops(current->domain, GETDECISION);
-        if (ret)
-            break;
-
-        if (op->u.getdecision.get_decision_by1 == SSIDREF)
-            ssidref1 = op->u.getdecision.id1.ssidref;
-        else if (op->u.getdecision.get_decision_by1 == DOMAINID)
-        {
-            struct domain *subj = 
find_domain_by_id(op->u.getdecision.id1.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
-                break;
-            }
-            if (subj->ssid == NULL)
-            {
-                put_domain(subj);
-                ret = -ESRCH;
+        if (copy_from_guest(&getdecision, arg, 1) != 0)
+            return -EFAULT;
+        if (getdecision.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        if (getdecision.get_decision_by1 == SSIDREF)
+            ssidref1 = getdecision.id1.ssidref;
+        else if (getdecision.get_decision_by1 == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getdecision.id1.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
+                break;
+            }
+            if (subj->ssid == NULL)
+            {
+                put_domain(subj);
+                rc = -ESRCH;
                 break;
             }
             ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -173,23 +164,23 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        if (op->u.getdecision.get_decision_by2 == SSIDREF)
-            ssidref2 = op->u.getdecision.id2.ssidref;
-        else if (op->u.getdecision.get_decision_by2 == DOMAINID)
-        {
-            struct domain *subj = 
find_domain_by_id(op->u.getdecision.id2.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
+            rc = -ESRCH;
+            break;
+        }
+        if (getdecision.get_decision_by2 == SSIDREF)
+            ssidref2 = getdecision.id2.ssidref;
+        else if (getdecision.get_decision_by2 == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getdecision.id2.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
                 break;;
             }
             if (subj->ssid == NULL)
             {
                 put_domain(subj);
-                ret = -ESRCH;
+                rc = -ESRCH;
                 break;
             }
             ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -197,34 +188,35 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        ret = acm_get_decision(ssidref1, ssidref2, op->u.getdecision.hook);
-
-        if (ret == ACM_ACCESS_PERMITTED)
-        {
-            op->u.getdecision.acm_decision = ACM_ACCESS_PERMITTED;
-            ret = 0;
-        }
-        else if  (ret == ACM_ACCESS_DENIED)
-        {
-            op->u.getdecision.acm_decision = ACM_ACCESS_DENIED;
-            ret = 0;
-        }
-        else
-            ret = -ESRCH;
-
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
+            rc = -ESRCH;
+            break;
+        }
+        rc = acm_get_decision(ssidref1, ssidref2, getdecision.hook);
+
+        if (rc == ACM_ACCESS_PERMITTED)
+        {
+            getdecision.acm_decision = ACM_ACCESS_PERMITTED;
+            rc = 0;
+        }
+        else if  (rc == ACM_ACCESS_DENIED)
+        {
+            getdecision.acm_decision = ACM_ACCESS_DENIED;
+            rc = 0;
+        }
+        else
+            rc = -ESRCH;
+
+        if ( (rc == 0) && (copy_to_guest(arg, &getdecision, 1) != 0) )
+            rc = -EFAULT;
+        break;
+    }
 
     default:
-        ret = -ESRCH;
-    }
-
-    return ret;
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
 }
 
 #endif
diff -r e74246451527 -r f54d38cea8ac xen/common/elf.c
--- a/xen/common/elf.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/elf.c  Tue May 30 14:30:34 2006 -0500
@@ -23,10 +23,10 @@ int parseelfimage(struct domain_setup_in
     Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
     char *shstrtab, *guestinfo=NULL, *p;
     char *elfbase = (char *)dsi->image_addr;
-    int h;
+    int h, virt_base_defined, elf_pa_off_defined;
 
     if ( !elf_sanity_check(ehdr) )
         return -EINVAL;
@@ -84,29 +84,40 @@ int parseelfimage(struct domain_setup_in
     if ( guestinfo == NULL )
         guestinfo = "";
 
-    virt_base = 0;
-    if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
-        virt_base = simple_strtoul(p+10, &p, 0);
-    dsi->elf_paddr_offset = virt_base;
-    if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
-        dsi->elf_paddr_offset = simple_strtoul(p+17, &p, 0);
+    /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+    p = strstr(guestinfo, "VIRT_BASE=");
+    virt_base_defined = (p != NULL);
+    virt_base = virt_base_defined ? simple_strtoul(p+10, &p, 0) : 0;
+
+    /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+    p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+    elf_pa_off_defined = (p != NULL);
+    elf_pa_off = elf_pa_off_defined ? simple_strtoul(p+17, &p, 0) : virt_base;
+
+    if ( elf_pa_off_defined && !virt_base_defined )
+        goto bad_image;
 
     for ( h = 0; h < ehdr->e_phnum; h++ )
     {
         phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+        vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+        if ( (vaddr + phdr->p_memsz) < vaddr )
+            goto bad_image;
         if ( vaddr < kernstart )
             kernstart = vaddr;
         if ( (vaddr + phdr->p_memsz) > kernend )
             kernend = vaddr + phdr->p_memsz;
     }
 
-    if ( virt_base )
-        dsi->v_start = virt_base;
-    else
-        dsi->v_start = kernstart;
+    /*
+     * Legacy compatibility and images with no __xen_guest section: assume
+     * header addresses are virtual addresses, and that guest memory should be
+     * mapped starting at kernel load address.
+     */
+    dsi->v_start          = virt_base_defined  ? virt_base  : kernstart;
+    dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
 
     dsi->v_kernentry = ehdr->e_entry;
     if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -114,11 +125,9 @@ int parseelfimage(struct domain_setup_in
 
     if ( (kernstart > kernend) || 
          (dsi->v_kernentry < kernstart) ||
-         (dsi->v_kernentry > kernend) )
-    {
-        printk("Malformed ELF image.\n");
-        return -EINVAL;
-    }
+         (dsi->v_kernentry > kernend) ||
+         (dsi->v_start > kernstart) )
+        goto bad_image;
 
     if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
             dsi->load_symtab = 1;
@@ -130,6 +139,10 @@ int parseelfimage(struct domain_setup_in
     loadelfsymtab(dsi, 0);
 
     return 0;
+
+ bad_image:
+    printk("Malformed ELF image.\n");
+    return -EINVAL;
 }
 
 int loadelfimage(struct domain_setup_info *dsi)
diff -r e74246451527 -r f54d38cea8ac xen/common/grant_table.c
--- a/xen/common/grant_table.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/grant_table.c  Tue May 30 14:30:34 2006 -0500
@@ -505,15 +505,12 @@ gnttab_setup_table(
         goto out;
     }
 
-    if ( op.nr_frames <= NR_GRANT_FRAMES )
-    {
-        ASSERT(d->grant_table != NULL);
-        op.status = GNTST_okay;
-        for ( i = 0; i < op.nr_frames; i++ )
-        {
-            gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
-            (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
-        }
+    ASSERT(d->grant_table != NULL);
+    op.status = GNTST_okay;
+    for ( i = 0; i < op.nr_frames; i++ )
+    {
+        gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+        (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
     }
 
     put_domain(d);
diff -r e74246451527 -r f54d38cea8ac xen/common/kernel.c
--- a/xen/common/kernel.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/kernel.c       Tue May 30 14:30:34 2006 -0500
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
-                    (1U << XENFEAT_auto_translated_physmap) |
-                    (1U << XENFEAT_pae_pgdir_above_4gb);
+                    (1U << XENFEAT_auto_translated_physmap);
             if ( supervisor_mode_kernel )
                 fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
             break;
diff -r e74246451527 -r f54d38cea8ac xen/common/schedule.c
--- a/xen/common/schedule.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/schedule.c     Tue May 30 14:30:34 2006 -0500
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
 
 extern struct scheduler sched_bvt_def;
 extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
 static struct scheduler *schedulers[] = { 
     &sched_bvt_def,
     &sched_sedf_def,
+    &sched_credit_def,
     NULL
 };
 
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
 
     page_scrub_schedule_work();
 
+    SCHED_OP(tick, cpu);
+
     set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
 }
 
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
         printk("Could not find scheduler: %s\n", opt_sched);
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+    SCHED_OP(init);
 
     if ( idle_vcpu[0] != NULL )
     {
diff -r e74246451527 -r f54d38cea8ac xen/common/trace.c
--- a/xen/common/trace.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/trace.c        Tue May 30 14:30:34 2006 -0500
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
     if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
     {
         printk("Xen trace buffers: memory allocation failed\n");
+        opt_tbuf_size = 0;
         return -EINVAL;
     }
 
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
 
     opt_tbuf_size = size;
     if ( alloc_trace_bufs() != 0 )
-    {
-        opt_tbuf_size = 0;
-        return -EINVAL;
-    }
+        return -EINVAL;
 
     printk("Xen trace buffers: initialized\n");
     return 0;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/config.h     Tue May 30 14:30:34 2006 -0500
@@ -97,6 +97,13 @@ extern char _end[]; /* standard ELF symb
 //#define HZ 1000
 // FIXME SMP: leave SMP for a later time
 
+/* A power-of-two value greater than or equal to number of hypercalls. */
+#define NR_hypercalls 64
+
+#if NR_hypercalls & (NR_hypercalls - 1)
+#error "NR_hypercalls must be a power-of-two value"
+#endif
+
 ///////////////////////////////////////////////////////////////
 // xen/include/asm/config.h
 // Natural boundary upon TR size to define xenheap space
@@ -239,6 +246,10 @@ void dummy_called(char *function);
 // these declarations got moved at some point, find a better place for them
 extern int ht_per_core;
 
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#define CONFIG_SHADOW  1
+#endif
+
 // xen/include/asm/config.h
 /******************************************************************************
  * config.h
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/dom_fw.h
--- a/xen/include/asm-ia64/dom_fw.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/dom_fw.h     Tue May 30 14:30:34 2006 -0500
@@ -5,7 +5,7 @@
  *     Dan Magenheimer (dan.magenheimer@xxxxxx)
  */
 
-extern unsigned long dom_fw_setup(struct domain *, const char *, int);
+#include <linux/efi.h>
 
 #ifndef MB
 #define MB (1024*1024)
@@ -55,7 +55,7 @@ extern unsigned long dom_fw_setup(struct
 
 #define FW_HYPERCALL_SAL_CALL_INDEX    0x82UL
 #define FW_HYPERCALL_SAL_CALL_PADDR    
FW_HYPERCALL_PADDR(FW_HYPERCALL_SAL_CALL_INDEX)
-#define FW_HYPERCALL_SAL_CALL          0x1001UL
+#define FW_HYPERCALL_SAL_CALL          0x1100UL
 
 /*
  * EFI is accessed via the EFI system table, which contains:
@@ -94,6 +94,7 @@ extern unsigned long dom_fw_setup(struct
 #define FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX            9UL
 
 /* these are hypercall numbers */
+#define FW_HYPERCALL_EFI_CALL                          0x300UL
 #define FW_HYPERCALL_EFI_GET_TIME                      0x300UL
 #define FW_HYPERCALL_EFI_SET_TIME                      0x301UL
 #define FW_HYPERCALL_EFI_GET_WAKEUP_TIME               0x302UL
@@ -125,7 +126,7 @@ extern unsigned long dom_fw_setup(struct
 */
 #define FW_HYPERCALL_FIRST_ARCH                0x300UL
 
-#define FW_HYPERCALL_IPI               0x380UL
+#define FW_HYPERCALL_IPI               0x400UL
 
 /* Xen/ia64 user hypercalls.  Only used for debugging.  */
 #define FW_HYPERCALL_FIRST_USER                0xff00UL
@@ -133,9 +134,16 @@ extern unsigned long dom_fw_setup(struct
 /* Interrupt vector used for os boot rendez vous.  */
 #define XEN_SAL_BOOT_RENDEZ_VEC        0xF3
 
+#define FW_HYPERCALL_NUM_MASK_HIGH     ~0xffUL
+#define FW_HYPERCALL_NUM_MASK_LOW       0xffUL
+
+#define EFI_MEMDESC_VERSION            1
+
 extern struct ia64_pal_retval xen_pal_emulator(UINT64, u64, u64, u64);
 extern struct sal_ret_values sal_emulator (long index, unsigned long in1, 
unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, 
unsigned long in6, unsigned long in7);
 extern struct ia64_pal_retval pal_emulator_static (unsigned long);
+extern unsigned long dom_fw_setup (struct domain *, const char *, int);
+extern efi_status_t efi_emulator (struct pt_regs *regs, unsigned long *fault);
 
 extern void build_pal_hypercall_bundles(unsigned long *imva, unsigned long 
brkimm, unsigned long hypnum);
 extern void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, 
UINT64 ret);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/domain.h     Tue May 30 14:30:34 2006 -0500
@@ -22,8 +22,13 @@ extern void panic_domain(struct pt_regs 
 extern void panic_domain(struct pt_regs *, const char *, ...)
      __attribute__ ((noreturn, format (printf, 2, 3)));
 
+struct mm_struct {
+       pgd_t * pgd;
+    // atomic_t mm_users;                      /* How many users with user 
space? */
+};
+
 struct arch_domain {
-    struct mm_struct *mm;
+    struct mm_struct mm;
     unsigned long metaphysical_rr0;
     unsigned long metaphysical_rr4;
 
@@ -54,10 +59,11 @@ struct arch_domain {
     unsigned long initrd_start;
     unsigned long initrd_len;
     char *cmdline;
+    int efi_virt_mode;         /* phys : 0 , virt : 1 */
+    void *efi_runtime;
 };
 #define xen_vastart arch.xen_vastart
 #define xen_vaend arch.xen_vaend
-#define shared_info_va arch.shared_info_va
 #define INT_ENABLE_OFFSET(v)             \
     (sizeof(vcpu_info_t) * (v)->vcpu_id + \
     offsetof(vcpu_info_t, evtchn_upcall_mask))
@@ -69,8 +75,6 @@ struct arch_vcpu {
        TR_ENTRY dtlb;
        unsigned int itr_regions;
        unsigned int dtr_regions;
-       unsigned long itlb_pte;
-       unsigned long dtlb_pte;
        unsigned long irr[4];
        unsigned long insvc[4];
        unsigned long tc_regions;
@@ -106,27 +110,15 @@ struct arch_vcpu {
     struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
 };
 
-//#define thread arch._thread
-
-// FOLLOWING FROM linux-2.6.7/include/sched.h
-
-struct mm_struct {
-       pgd_t * pgd;
-    // atomic_t mm_users;                      /* How many users with user 
space? */
-       struct list_head pt_list;               /* List of pagetable */
-};
-
-extern struct mm_struct init_mm;
-
 struct page_info * assign_new_domain_page(struct domain *d, unsigned long 
mpaddr);
 void assign_new_domain0_page(struct domain *d, unsigned long mpaddr);
+void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned 
long physaddr);
 void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long 
physaddr);
 void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned 
long flags);
 #ifdef CONFIG_XEN_IA64_DOM0_VP
 unsigned long assign_domain_mmio_page(struct domain *d, unsigned long mpaddr, 
unsigned long size);
 unsigned long assign_domain_mach_page(struct domain *d, unsigned long mpaddr, 
unsigned long size);
 unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned 
long arg1, unsigned long arg2, unsigned long arg3);
-unsigned long dom0vp_populate_physmap(struct domain *d, unsigned long gpfn, 
unsigned int extent_order, unsigned int address_bits);
 unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, 
unsigned int extent_order);
 unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, 
unsigned long mfn, unsigned int flags, domid_t domid);
 #endif
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/event.h      Tue May 30 14:30:34 2006 -0500
@@ -29,7 +29,7 @@ static inline void evtchn_notify(struct 
         smp_send_event_check_cpu(v->processor);
 
     if(!VMX_DOMAIN(v))
-       vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector);
+       vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
 }
 
 /* Note: Bitwise operations result in fast code with no branches. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/grant_table.h        Tue May 30 14:30:34 2006 -0500
@@ -7,12 +7,33 @@
 
 #define ORDER_GRANT_FRAMES 0
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+// for grant map/unmap
 #define create_grant_host_mapping(a, f, fl)  0
 #define destroy_grant_host_mapping(a, f, fl) 0
 
+// for grant transfer
 #define steal_page_for_grant_transfer(d, p)  0
 
-#define gnttab_create_shared_page(d, t, i) ((void)0)
+#else
+// for grant map/unmap
+int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, 
unsigned int flags);
+int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, 
unsigned int flags);
+
+// for grant transfer
+int steal_page_for_grant_transfer(struct domain *d, struct page_info *page);
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+
+#endif
+
+// for grant table shared page
+#define gnttab_create_shared_page(d, t, i)                              \
+    do {                                                                \
+        share_xen_page_with_guest(                                      \
+            virt_to_page((char *)(t)->shared + ((i) << PAGE_SHIFT)),    \
+            (d), XENSHARE_writable);                                    \
+    } while (0)
+
 
 /* Guest physical address of the grant table.  */
 #define IA64_GRANT_TABLE_PADDR (1UL << 40)
@@ -20,13 +41,21 @@
 #define gnttab_shared_maddr(d, t, i)                        \
     virt_to_maddr((char*)(t)->shared + ((i) << PAGE_SHIFT))
 
-#define gnttab_shared_gmfn(d, t, i)                                          \
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+# define gnttab_shared_gmfn(d, t, i)                                         \
     ({ ((d) == dom0) ?                                                       \
             (virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i):                \
             assign_domain_page((d),                                          \
                                IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
                                gnttab_shared_maddr(d, t, i)),                \
             (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#else
+# define gnttab_shared_gmfn(d, t, i)                                    \
+    ({ assign_domain_page((d),                                          \
+                          IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
+                          gnttab_shared_maddr((d), (t), (i)));          \
+        (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#endif
 
 #define gnttab_log_dirty(d, f) ((void)0)
 
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/pgalloc.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Tue May 30 12:52:02 
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Tue May 30 14:30:34 
2006 -0500
@@ -139,12 +139,14 @@ static inline void pte_free(struct page 
 {
        pgtable_quicklist_free(page_address(pte));
 }
+#endif
 
 static inline void pte_free_kernel(pte_t * pte)
 {
        pgtable_quicklist_free(pte);
 }
 
+#ifndef XEN
 #define __pte_free_tlb(tlb, pte)       pte_free(pte)
 #endif
 
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Tue May 30 12:52:02 
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Tue May 30 14:30:34 
2006 -0500
@@ -383,6 +383,7 @@ ptep_test_and_clear_dirty (struct vm_are
        return 1;
 #endif
 }
+#endif
 
 static inline pte_t
 ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -396,6 +397,19 @@ ptep_get_and_clear(struct mm_struct *mm,
 #endif
 }
 
+static inline pte_t
+ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
+{
+#ifdef CONFIG_SMP
+       return __pte(xchg((long *) ptep, pte_val(npte)));
+#else
+       pte_t pte = *ptep;
+       set_pte (ptep, npte);
+       return pte;
+#endif
+}
+
+#ifndef XEN
 static inline void
 ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/mm.h Tue May 30 14:30:34 2006 -0500
@@ -12,7 +12,7 @@
 
 #include <asm/processor.h>
 #include <asm/atomic.h>
-#include <asm/flushtlb.h>
+#include <asm/tlbflush.h>
 #include <asm/io.h>
 
 #include <public/xen.h>
@@ -128,8 +128,10 @@ static inline u32 pickle_domptr(struct d
 #define page_get_owner(_p)     (unpickle_domptr((_p)->u.inuse._domain))
 #define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d))
 
-/* Dummy now */
-#define share_xen_page_with_guest(p, d, r) do { } while (0)
+#define XENSHARE_writable 0
+#define XENSHARE_readonly 1
+void share_xen_page_with_guest(struct page_info *page,
+                               struct domain *d, int readonly);
 #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
 extern struct page_info *frame_table;
@@ -471,6 +473,4 @@ extern unsigned long ____lookup_domain_m
 /* Arch-specific portion of memory_op hypercall. */
 #define arch_memory_op(op, arg) (-ENOSYS)
 
-extern void assign_domain_page(struct domain *d, unsigned long mpaddr,
-                              unsigned long physaddr);
 #endif /* __ASM_IA64_MM_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/shadow.h     Tue May 30 14:30:34 2006 -0500
@@ -1,2 +1,57 @@
-/* empty */
+/******************************************************************************
+ * include/asm-ia64/shadow.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
 
+#ifndef _XEN_SHADOW_H
+#define _XEN_SHADOW_H
+
+#include <xen/config.h>
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#ifndef CONFIG_SHADOW
+# error "CONFIG_SHADOW must be defined"
+#endif
+
+#define shadow_drop_references(d, p)          ((void)0)
+
+// this is used only x86-specific code
+//#define shadow_sync_and_drop_references(d, p) ((void)0)
+
+#define shadow_mode_translate(d)              (1)
+
+// for granttab transfer. XENMEM_populate_physmap
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+// for balloon driver. XENMEM_decrease_reservation
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+#endif
+
+#endif // _XEN_SHADOW_H
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vcpu.h
--- a/xen/include/asm-ia64/vcpu.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vcpu.h       Tue May 30 14:30:34 2006 -0500
@@ -135,7 +135,10 @@ extern IA64FAULT vcpu_set_pkr(VCPU *vcpu
 extern IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val);
 extern IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key);
 /* TLB */
-extern void vcpu_purge_tr_entry(TR_ENTRY *trp);
+static inline void vcpu_purge_tr_entry(TR_ENTRY *trp)
+{
+       trp->pte.val = 0;
+}
 extern IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 padr,
                UINT64 itir, UINT64 ifa);
 extern IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 padr,
@@ -148,8 +151,7 @@ extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu,
 extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
-extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address,
-                               BOOLEAN is_data, BOOLEAN in_tpa,
+extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data,
                                UINT64 *pteval, UINT64 *itir, UINT64 *iha);
 extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
 extern IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vhpt.h
--- a/xen/include/asm-ia64/vhpt.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vhpt.h       Tue May 30 14:30:34 2006 -0500
@@ -4,18 +4,17 @@
 #define VHPT_ENABLED 1
 
 /* Size of the VHPT.  */
-#define        VHPT_SIZE_LOG2                  24
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+// XXX work around to avoid trigerring xenLinux software lock up detection.
+# define       VHPT_SIZE_LOG2                  16      // 64KB
+#else
+# define       VHPT_SIZE_LOG2                  24      // 16MB default
+#endif
 
 /* Number of entries in the VHPT.  The size of an entry is 4*8B == 32B */
 #define        VHPT_NUM_ENTRIES                (1 << (VHPT_SIZE_LOG2 - 5))
 
-#ifdef CONFIG_SMP
-# define vhpt_flush_all()      smp_vhpt_flush_all()
-#else
-# define vhpt_flush_all()      vhpt_flush()
-#endif
 // FIXME: These should be automatically generated
-
 #define        VLE_PGFLAGS_OFFSET              0
 #define        VLE_ITIR_OFFSET                 8
 #define        VLE_TITAG_OFFSET                16
@@ -37,15 +36,10 @@ extern void vhpt_init (void);
 extern void vhpt_init (void);
 extern void zero_vhpt_stats(void);
 extern int dump_vhpt_stats(char *buf);
-extern void vhpt_flush_address(unsigned long vadr, unsigned long addr_range);
-extern void vhpt_flush_address_remote(int cpu, unsigned long vadr,
-                                     unsigned long addr_range);
 extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
                                 unsigned long logps);
 extern void vhpt_insert (unsigned long vadr, unsigned long pte,
                         unsigned long logps);
-extern void vhpt_flush(void);
-extern void smp_vhpt_flush_all(void);
 
 /* Currently the VHPT is allocated per CPU.  */
 DECLARE_PER_CPU (unsigned long, vhpt_paddr);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h   Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vmx_vcpu.h   Tue May 30 14:30:34 2006 -0500
@@ -359,7 +359,7 @@ IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu,
     // TODO: unimplemented DBRs return a reserved register fault
     // TODO: Should set Logical CPU state, not just physical
     if(reg > 4){
-        panic("there are only five cpuid registers");
+        panic_domain(vcpu_regs(vcpu),"there are only five cpuid registers");
     }
     *pval=VCPU(vcpu,vcpuid[reg]);
     return (IA64_NO_FAULT);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/domain.h      Tue May 30 14:30:34 2006 -0500
@@ -114,11 +114,32 @@ struct arch_domain
     unsigned long first_reserved_pfn;
 } __cacheline_aligned;
 
+#ifdef CONFIG_X86_PAE
+struct pae_l3_cache {
+    /*
+     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+     * an L3 table while we are currently running on it (without using
+     * expensive atomic 64-bit operations).
+     */
+    l3_pgentry_t  table[2][4] __attribute__((__aligned__(32)));
+    unsigned long high_mfn;  /* The >=4GB MFN being shadowed. */
+    unsigned int  inuse_idx; /* Which of the two cache slots is in use? */
+    spinlock_t    lock;
+};
+#define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
+#else /* !CONFIG_X86_PAE */
+struct pae_l3_cache { };
+#define pae_l3_cache_init(c) ((void)0)
+#endif
+
 struct arch_vcpu
 {
     /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
     struct vcpu_guest_context guest_context
     __attribute__((__aligned__(16)));
+
+    struct pae_l3_cache pae_l3_cache;
 
     unsigned long      flags; /* TF_ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/fixmap.h      Tue May 30 14:30:34 2006 -0500
@@ -25,6 +25,10 @@
  * from the end of virtual memory backwards.
  */
 enum fixed_addresses {
+#ifdef CONFIG_X86_PAE
+    FIX_PAE_HIGHMEM_0,
+    FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
+#endif
     FIX_APIC_BASE,
     FIX_IO_APIC_BASE_0,
     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
@@ -40,13 +44,13 @@ enum fixed_addresses {
 #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
 
 extern void __set_fixmap(
-    enum fixed_addresses idx, unsigned long p, unsigned long flags);
+    enum fixed_addresses idx, unsigned long mfn, unsigned long flags);
 
 #define set_fixmap(idx, phys) \
-    __set_fixmap(idx, phys, PAGE_HYPERVISOR)
+    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR)
 
 #define set_fixmap_nocache(idx, phys) \
-    __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE)
+    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE)
 
 #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/domain.h  Tue May 30 14:30:34 2006 -0500
@@ -35,9 +35,9 @@ struct hvm_domain {
     unsigned int           nr_vcpus;
     unsigned int           apic_enabled;
     unsigned int           pae_enabled;
-
-    struct hvm_virpit      vpit;
-    u64                    guest_time;
+    s64                    tsc_frequency;
+    struct pl_time         pl_time;
+    
     struct hvm_virpic      vpic;
     struct hvm_vioapic     vioapic;
     struct hvm_io_handler  io_handler;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/intr.h        Tue May 30 14:30:34 2006 -0500
@@ -21,7 +21,6 @@
 #ifndef __ASM_X86_HVM_SVM_INTR_H__
 #define __ASM_X86_HVM_SVM_INTR_H__
 
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
 extern void svm_intr_assist(void);
 extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
 extern void svm_intr_assist_test_valid(struct vcpu *v, 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 14:30:34 2006 -0500
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
 extern void svm_do_launch(struct vcpu *v);
 extern void svm_do_resume(struct vcpu *v);
 extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
 extern void arch_svm_do_resume(struct vcpu *v);
 extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
 /* For debugging. Remove when no longer needed. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vcpu.h    Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,9 @@ struct hvm_vcpu {
     unsigned long   ioflags;
     struct mmio_op  mmio_op;
     struct vlapic   *vlapic;
+    s64             cache_tsc_offset;
+    u64             guest_time;
+
     /* For AP startup */
     unsigned long   init_sipi_sipi_state;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 14:30:34 2006 -0500
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
 extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64  get_guest_time(struct vcpu *v);
 
 extern unsigned int cpu_rev;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vpit.h    Tue May 30 14:30:34 2006 -0500
@@ -29,9 +29,7 @@
 #include <asm/hvm/vpic.h>
 
 #define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE        0x40
 
 typedef struct PITChannelState {
     int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
     u8 gate; /* timer start */
     s64 count_load_time;
     /* irq handling */
-    s64 next_transition_time;
-    int irq;
-    struct hvm_time_info *hvm_time;
-    u32 period; /* period(ns) based on count */
+    struct vcpu      *vcpu;
+    struct periodic_time *pt;
 } PITChannelState;
-
-struct hvm_time_info {
-    /* extra info for the mode 2 channel */
-    struct timer pit_timer;
-    struct vcpu *vcpu;          /* which vcpu the ac_timer bound to */
-    u64 period_cycles;          /* pit frequency in cpu cycles */
-    s_time_t count_advance;     /* accumulated count advance since last fire */
-    s_time_t count_point;        /* last point accumulating count advance */
-    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
-    int first_injected;         /* flag to prevent shadow window */
-    s64 cache_tsc_offset;       /* cache of VMCS TSC_OFFSET offset */
-    u64 last_pit_gtime;         /* guest time when last pit is injected */
+   
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+    char enabled;               /* enabled */
+    char one_shot;              /* one shot time */
+    char irq;
+    char first_injected;        /* flag to prevent shadow window */
+    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
+    u32 period;                 /* frequency in ns */
+    u64 period_cycles;          /* frequency in cpu cycles */
+    s_time_t scheduled;         /* scheduled timer interrupt */
+    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
+    struct timer timer;         /* ac_timer */
 };
 
-typedef struct hvm_virpit {
+typedef struct PITState {
     PITChannelState channels[3];
-    struct hvm_time_info time_info;
     int speaker_data_on;
     int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
 
+struct pl_time {    /* platform time */
+    struct periodic_time periodic_tm;
+    struct PITState      vpit;
+    /* TODO: RTC/ACPI time */
+};
 
-static __inline__ s_time_t get_pit_scheduled(
-    struct vcpu *v,
-    struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+    struct vcpu *v, int irq,
+    struct periodic_time *pt)
 {
-    struct PITChannelState *s = &(vpit->channels[0]);
-    if ( is_irq_enabled(v, 0) ) {
-        return s->next_transition_time;
+    if ( is_irq_enabled(v, irq) ) {
+        return pt->scheduled;
     }
     else
         return -1;
 }
 
 /* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, 
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
 
 #endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/string.h      Tue May 30 14:30:34 2006 -0500
@@ -2,152 +2,6 @@
 #define __X86_STRING_H__
 
 #include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
-    long d0, d1, d2;
-    __asm__ __volatile__ (
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2)
-        : "0" (src), "1" (dest) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "1: dec  %2        \n"
-        "   js   2f        \n"
-        "   lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        "   rep ; stosb    \n"
-        "2:                \n"
-        : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-        : "0" (src), "1" (dest), "2" (count) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   dec  %1        \n"
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb   \n"
-        "   dec  %1         \n"
-        "   mov  %8,%3      \n"
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   stosb           \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   stosb"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
-        : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
-    long d0, d1;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: lodsb           \n"
-        "   scasb           \n"
-        "   jne  2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   xor  %%eax,%%eax\n"
-        "   jmp  3f         \n"
-        "2: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "3:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1)
-        : "1" (cs), "2" (ct) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
-    long d0, d1, d2;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   scasb           \n"
-        "   jne  3f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   jmp  4f         \n"
-        "3: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "4:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-        : "1" (cs), "2" (ct), "3" (count) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
-    long d0;
-    register char *__res;
-    __asm__ __volatile__ (
-        "   mov  %%al,%%ah  \n"
-        "1: lodsb           \n"
-        "   cmp  %%ah,%%al  \n"
-        "   je   2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   mov  $1,%1      \n"
-        "2: mov  %1,%0      \n"
-        "   dec  %0         \n"
-        : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
-    long d0;
-    register int __res;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   notl %0        \n"
-        "   decl %0        \n"
-        : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
-    return __res;
-}
 
 static inline void *__variable_memcpy(void *to, const void *from, size_t n)
 {
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
 #define __HAVE_ARCH_MEMCMP
 #define memcmp __builtin_memcmp
 
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
-    long d0;
-    register void *__res;
-    if ( count == 0 )
-        return NULL;
-    __asm__ __volatile__ (
-        "   repne ; scasb\n"
-        "   je   1f      \n"
-        "   mov  $1,%0   \n"
-        "1: dec  %0      \n"
-        : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
-    return __res;
-}
-
 static inline void *__memset_generic(void *s, char c, size_t count)
 {
     long d0, d1;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/x86_emulate.h Tue May 30 14:30:34 2006 -0500
@@ -9,8 +9,10 @@
 #ifndef __X86_EMULATE_H__
 #define __X86_EMULATE_H__
 
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
  * 
  * These operations represent the instruction emulator's interface to memory.
  * There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
 #define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
 #define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
 {
     /*
      * read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
     int (*read_std)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
     int (*write_std)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
     int (*read_emulated)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
     int (*write_emulated)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
         unsigned long addr,
         unsigned long old,
         unsigned long new,
-        unsigned int bytes);
-
-    /*
-     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
-     *                   emulated/special memory area.
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+     *                     emulated/special memory area.
      *  @addr:  [IN ] Linear address to access.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
         unsigned long old_lo,
         unsigned long old_hi,
         unsigned long new_lo,
-        unsigned long new_hi);
+        unsigned long new_hi,
+        struct x86_emulate_ctxt *ctxt);
 };
 
 /* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 extern int
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 
 struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+    /* Register state before/after emulation. */
+    struct cpu_user_regs   *regs;
+
+    /* Linear faulting address (if emulating a page-faulting instruction). */
+    unsigned long           cr2;
+
+    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+    int                     mode;
+};
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0 /* Real mode.             */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
- *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address within an emulated/special memory area.
- *  @ops:  Interface to access special memory.
- *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
  * Returns -1 on failure, 0 on success.
  */
-extern int
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode);
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops);
 
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-extern void *
+void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/acm_ops.h
--- a/xen/include/public/acm_ops.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/acm_ops.h      Tue May 30 14:30:34 2006 -0500
@@ -2,7 +2,7 @@
  * acm_ops.h: Xen access control module hypervisor commands
  *
  * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Copyright (c) 2005, International Business Machines Corporation.
+ * Copyright (c) 2005,2006 International Business Machines Corporation.
  */
 
 #ifndef __XEN_PUBLIC_ACM_OPS_H__
@@ -17,36 +17,50 @@
  * This makes sure that old versions of acm tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define ACM_INTERFACE_VERSION   0xAAAA0006
+#define ACM_INTERFACE_VERSION   0xAAAA0007
 
 /************************************************************************/
 
-#define ACM_SETPOLICY         4
+/*
+ * Prototype for this hypercall is:
+ *  int acm_op(int cmd, void *args)
+ * @cmd  == ACMOP_??? (access control module operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+
+#define ACMOP_setpolicy         1
 struct acm_setpolicy {
-    /* OUT variables */
+    /* IN */
+    uint32_t interface_version;
     void *pushcache;
     uint32_t pushcache_size;
 };
 
 
-#define ACM_GETPOLICY         5
+#define ACMOP_getpolicy         2
 struct acm_getpolicy {
-    /* OUT variables */
+    /* IN */
+    uint32_t interface_version;
     void *pullcache;
     uint32_t pullcache_size;
 };
 
 
-#define ACM_DUMPSTATS         6
+#define ACMOP_dumpstats         3
 struct acm_dumpstats {
+    /* IN */
+    uint32_t interface_version;
     void *pullcache;
     uint32_t pullcache_size;
 };
 
 
-#define ACM_GETSSID           7
+#define ACMOP_getssid           4
 enum get_type {UNSET=0, SSIDREF, DOMAINID};
 struct acm_getssid {
+    /* IN */
+    uint32_t interface_version;
     enum get_type get_ssid_by;
     union {
         domaintype_t domainid;
@@ -56,9 +70,11 @@ struct acm_getssid {
     uint32_t ssidbuf_size;
 };
 
-#define ACM_GETDECISION        8
+#define ACMOP_getdecision      5
 struct acm_getdecision {
-    enum get_type get_decision_by1; /* in */
+    /* IN */
+    uint32_t interface_version;
+    enum get_type get_decision_by1;
     enum get_type get_decision_by2;
     union {
         domaintype_t domainid;
@@ -69,23 +85,11 @@ struct acm_getdecision {
         ssidref_t    ssidref;
     } id2;
     enum acm_hook_type hook;
-    int acm_decision;           /* out */
+    /* OUT */
+    int acm_decision;
 };
 
-typedef struct acm_op {
-    uint32_t cmd;
-    uint32_t interface_version;      /* ACM_INTERFACE_VERSION */
-    union {
-        struct acm_setpolicy setpolicy;
-        struct acm_getpolicy getpolicy;
-        struct acm_dumpstats dumpstats;
-        struct acm_getssid getssid;
-        struct acm_getdecision getdecision;
-    } u;
-} acm_op_t;
-DEFINE_XEN_GUEST_HANDLE(acm_op_t);
-
-#endif                          /* __XEN_PUBLIC_ACM_OPS_H__ */
+#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
 
 /*
  * Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-ia64.h    Tue May 30 14:30:34 2006 -0500
@@ -38,15 +38,17 @@ DEFINE_XEN_GUEST_HANDLE(void);
 #ifndef __ASSEMBLY__
 
 #define MAX_NR_SECTION  32  /* at most 32 memory holes */
-typedef struct {
+struct mm_section {
     unsigned long start;  /* start of memory hole */
     unsigned long end;    /* end of memory hole */
-} mm_section_t;
-
-typedef struct {
+};
+typedef struct mm_section mm_section_t;
+
+struct pmt_entry {
     unsigned long mfn : 56;
     unsigned long type: 8;
-} pmt_entry_t;
+};
+typedef struct pmt_entry pmt_entry_t;
 
 #define GPFN_MEM          (0UL << 56) /* Guest pfn is normal mem */
 #define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */
@@ -93,10 +95,11 @@ typedef struct {
  * NB. This may become a 64-bit count with no shift. If this happens then the 
  * structure size will still be 8 bytes, so no other alignments will change.
  */
-typedef struct {
+struct tsc_timestamp {
     unsigned int  tsc_bits;      /* 0: 32 bits read from the CPU's TSC. */
     unsigned int  tsc_bitshift;  /* 4: 'tsc_bits' uses N:N+31 of TSC.   */
-} tsc_timestamp_t; /* 8 bytes */
+}; /* 8 bytes */
+typedef struct tsc_timestamp tsc_timestamp_t;
 
 struct pt_fpreg {
     union {
@@ -105,7 +108,7 @@ struct pt_fpreg {
     } u;
 };
 
-typedef struct cpu_user_regs{
+struct cpu_user_regs {
     /* The following registers are saved by SAVE_MIN: */
     unsigned long b6;  /* scratch */
     unsigned long b7;  /* scratch */
@@ -179,9 +182,10 @@ typedef struct cpu_user_regs{
     unsigned long eml_unat;    /* used for emulating instruction */
     unsigned long rfi_pfs;     /* used for elulating rfi */
 
-}cpu_user_regs_t;
-
-typedef union {
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+
+union vac {
     unsigned long value;
     struct {
         int a_int:1;
@@ -193,9 +197,10 @@ typedef union {
         int a_bsw:1;
         long reserved:57;
     };
-} vac_t;
-
-typedef union {
+};
+typedef union vac vac_t;
+
+union vdc {
     unsigned long value;
     struct {
         int d_vmsw:1;
@@ -206,11 +211,12 @@ typedef union {
         int d_itm:1;
         long reserved:58;
     };
-} vdc_t;
-
-typedef struct {
-    vac_t   vac;
-    vdc_t   vdc;
+};
+typedef union vdc vdc_t;
+
+struct mapped_regs {
+    union vac   vac;
+    union vdc   vdc;
     unsigned long  virt_env_vaddr;
     unsigned long  reserved1[29];
     unsigned long  vhpi;
@@ -290,27 +296,32 @@ typedef struct {
     unsigned long  reserved6[3456];
     unsigned long  vmm_avail[128];
     unsigned long  reserved7[4096];
-} mapped_regs_t;
-
-typedef struct {
-    mapped_regs_t *privregs;
-    int evtchn_vector;
-} arch_vcpu_info_t;
+};
+typedef struct mapped_regs mapped_regs_t;
+
+struct arch_vcpu_info {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
 
 typedef mapped_regs_t vpd_t;
 
-typedef struct {
+struct arch_shared_info {
     unsigned int flags;
     unsigned long start_info_pfn;
-} arch_shared_info_t;
-
-typedef struct {
+
+    /* Interrupt vector for event channel.  */
+    int evtchn_vector;
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_initrd_info {
     unsigned long start;
     unsigned long size;
-} arch_initrd_info_t;
+};
+typedef struct arch_initrd_info arch_initrd_info_t;
 
 #define IA64_COMMAND_LINE_SIZE 512
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
 #define VGCF_FPU_VALID (1<<0)
 #define VGCF_VMX_GUEST (1<<1)
 #define VGCF_IN_KERNEL (1<<2)
@@ -320,19 +331,17 @@ typedef struct vcpu_guest_context {
     unsigned long sys_pgnr;    /* System pages out of domain memory */
     unsigned long vm_assist;   /* VMASST_TYPE_* bitmap, now none on IPF */
 
-    cpu_user_regs_t regs;
-    arch_vcpu_info_t vcpu;
-    arch_shared_info_t shared;
-    arch_initrd_info_t initrd;
+    struct cpu_user_regs regs;
+    struct mapped_regs *privregs;
+    struct arch_shared_info shared;
+    struct arch_initrd_info initrd;
     char cmdline[IA64_COMMAND_LINE_SIZE];
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
 // dom0 vp op
-#define __HYPERVISOR_ia64_dom0vp_op     256 // XXX sufficient large
-                                            // TODO
-                                            // arch specific hypercall
-                                            // number conversion
+#define __HYPERVISOR_ia64_dom0vp_op     __HYPERVISOR_arch_0
 #define IA64_DOM0VP_ioremap             0       // map io space in machine
                                                 // address to dom0 physical
                                                 // address space.
@@ -352,10 +361,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
                                                 // to the corresponding
                                                 // pseudo physical page frame
                                                 // number of the caller domain
-#define IA64_DOM0VP_populate_physmap    16      // allocate machine-contigusous
-                                                // memory region and
-                                                // map it to pseudo physical
-                                                // address
 #define IA64_DOM0VP_zap_physmap         17      // unmap and free pages
                                                 // contained in the specified
                                                 // pseudo physical region
@@ -364,6 +369,32 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
                                                 // address space.
 
 #endif /* !__ASSEMBLY__ */
+
+/* Hyperprivops.  */
+#define HYPERPRIVOP_RFI                        0x1
+#define HYPERPRIVOP_RSM_DT             0x2
+#define HYPERPRIVOP_SSM_DT             0x3
+#define HYPERPRIVOP_COVER              0x4
+#define HYPERPRIVOP_ITC_D              0x5
+#define HYPERPRIVOP_ITC_I              0x6
+#define HYPERPRIVOP_SSM_I              0x7
+#define HYPERPRIVOP_GET_IVR            0x8
+#define HYPERPRIVOP_GET_TPR            0x9
+#define HYPERPRIVOP_SET_TPR            0xa
+#define HYPERPRIVOP_EOI                        0xb
+#define HYPERPRIVOP_SET_ITM            0xc
+#define HYPERPRIVOP_THASH              0xd
+#define HYPERPRIVOP_PTC_GA             0xe
+#define HYPERPRIVOP_ITR_D              0xf
+#define HYPERPRIVOP_GET_RR             0x10
+#define HYPERPRIVOP_SET_RR             0x11
+#define HYPERPRIVOP_SET_KR             0x12
+#define HYPERPRIVOP_FC                 0x13
+#define HYPERPRIVOP_GET_CPUID          0x14
+#define HYPERPRIVOP_GET_PMD            0x15
+#define HYPERPRIVOP_GET_EFLAG          0x16
+#define HYPERPRIVOP_SET_EFLAG          0x17
+#define HYPERPRIVOP_MAX                        0x17
 
 #endif /* __HYPERVISOR_IF_IA64_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_32.h  Tue May 30 14:30:34 2006 -0500
@@ -95,15 +95,16 @@ DEFINE_XEN_GUEST_HANDLE(void);
 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
     uint16_t      cs;      /* code selector                                 */
     unsigned long address; /* code offset                                   */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
     uint32_t ebx;
     uint32_t ecx;
     uint32_t edx;
@@ -124,7 +125,8 @@ typedef struct cpu_user_regs {
     uint16_t ds, _pad3;
     uint16_t fs, _pad4;
     uint16_t gs, _pad5;
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
@@ -133,14 +135,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
  */
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
 #define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
-    cpu_user_regs_t user_regs;              /* User-level CPU registers     */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -152,25 +154,29 @@ typedef struct vcpu_guest_context {
     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
     unsigned long failsafe_callback_eip;
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-typedef struct arch_shared_info {
+struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
     unsigned long cr2;
     unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
-
-typedef struct {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
     unsigned long cs;
     unsigned long eip;
-} xen_callback_t;
+};
+typedef struct xen_callback xen_callback_t;
 
 #endif /* !__ASSEMBLY__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_64.h  Tue May 30 14:30:34 2006 -0500
@@ -150,12 +150,13 @@ struct iret_context {
 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
     uint16_t      cs;      /* code selector                                 */
     unsigned long address; /* code offset                                   */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
 #ifdef __GNUC__
@@ -166,7 +167,7 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 #define __DECL_REG(name) uint64_t r ## name
 #endif
 
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
     uint64_t r15;
     uint64_t r14;
     uint64_t r13;
@@ -195,7 +196,8 @@ typedef struct cpu_user_regs {
     uint16_t ds, _pad4[3];
     uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
     uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
 
 #undef __DECL_REG
@@ -206,14 +208,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
  */
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
 #define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
-    cpu_user_regs_t user_regs;              /* User-level CPU registers     */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -228,20 +230,23 @@ typedef struct vcpu_guest_context {
     uint64_t      fs_base;
     uint64_t      gs_base_kernel;
     uint64_t      gs_base_user;
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-typedef struct arch_shared_info {
+struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
     unsigned long cr2;
     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
+};
+typedef struct arch_vcpu_info  arch_vcpu_info_t;
 
 typedef unsigned long xen_callback_t;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/callback.h
--- a/xen/include/public/callback.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/callback.h     Tue May 30 14:30:34 2006 -0500
@@ -32,10 +32,11 @@
  * Register a callback.
  */
 #define CALLBACKOP_register                0
-typedef struct callback_register {
+struct callback_register {
      int type;
      xen_callback_t address;
-} callback_register_t;
+};
+typedef struct callback_register callback_register_t;
 DEFINE_XEN_GUEST_HANDLE(callback_register_t);
 
 /*
@@ -45,9 +46,10 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
  * you attempt to unregister such a callback.
  */
 #define CALLBACKOP_unregister              1
-typedef struct callback_unregister {
+struct callback_unregister {
      int type;
-} callback_unregister_t;
+};
+typedef struct callback_unregister callback_unregister_t;
 DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
 
 #endif /* __XEN_PUBLIC_CALLBACK_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/dom0_ops.h     Tue May 30 14:30:34 2006 -0500
@@ -24,14 +24,15 @@
 /************************************************************************/
 
 #define DOM0_GETMEMLIST        2
-typedef struct dom0_getmemlist {
+struct dom0_getmemlist {
     /* IN variables. */
     domid_t       domain;
     unsigned long max_pfns;
     XEN_GUEST_HANDLE(ulong) buffer;
     /* OUT variables. */
     unsigned long num_pfns;
-} dom0_getmemlist_t;
+};
+typedef struct dom0_getmemlist dom0_getmemlist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
 
 #define DOM0_SCHEDCTL          6
@@ -45,39 +46,43 @@ DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t
 DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t);
 
 #define DOM0_CREATEDOMAIN      8
-typedef struct dom0_createdomain {
+struct dom0_createdomain {
     /* IN parameters */
     uint32_t ssidref;
     xen_domain_handle_t handle;
     /* IN/OUT parameters. */
     /* Identifier for new domain (auto-allocate if zero is specified). */
     domid_t domain;
-} dom0_createdomain_t;
+};
+typedef struct dom0_createdomain dom0_createdomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_createdomain_t);
 
 #define DOM0_DESTROYDOMAIN     9
-typedef struct dom0_destroydomain {
-    /* IN variables. */
-    domid_t domain;
-} dom0_destroydomain_t;
+struct dom0_destroydomain {
+    /* IN variables. */
+    domid_t domain;
+};
+typedef struct dom0_destroydomain dom0_destroydomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_destroydomain_t);
 
 #define DOM0_PAUSEDOMAIN      10
-typedef struct dom0_pausedomain {
+struct dom0_pausedomain {
     /* IN parameters. */
     domid_t domain;
-} dom0_pausedomain_t;
+};
+typedef struct dom0_pausedomain dom0_pausedomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_pausedomain_t);
 
 #define DOM0_UNPAUSEDOMAIN    11
-typedef struct dom0_unpausedomain {
+struct dom0_unpausedomain {
     /* IN parameters. */
     domid_t domain;
-} dom0_unpausedomain_t;
+};
+typedef struct dom0_unpausedomain dom0_unpausedomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_unpausedomain_t);
 
 #define DOM0_GETDOMAININFO    12
-typedef struct dom0_getdomaininfo {
+struct dom0_getdomaininfo {
     /* IN variables. */
     domid_t  domain;                  /* NB. IN/OUT variable. */
     /* OUT variables. */
@@ -99,21 +104,23 @@ typedef struct dom0_getdomaininfo {
     uint32_t max_vcpu_id;         /* Maximum VCPUID in use by this domain. */
     uint32_t ssidref;
     xen_domain_handle_t handle;
-} dom0_getdomaininfo_t;
+};
+typedef struct dom0_getdomaininfo dom0_getdomaininfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfo_t);
 
 #define DOM0_SETVCPUCONTEXT   13
-typedef struct dom0_setvcpucontext {
+struct dom0_setvcpucontext {
     /* IN variables. */
     domid_t               domain;
     uint32_t              vcpu;
     /* IN/OUT parameters */
     XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_setvcpucontext_t;
+};
+typedef struct dom0_setvcpucontext dom0_setvcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setvcpucontext_t);
 
 #define DOM0_MSR              15
-typedef struct dom0_msr {
+struct dom0_msr {
     /* IN variables. */
     uint32_t write;
     cpumap_t cpu_mask;
@@ -123,7 +130,8 @@ typedef struct dom0_msr {
     /* OUT variables. */
     uint32_t out1;
     uint32_t out2;
-} dom0_msr_t;
+};
+typedef struct dom0_msr dom0_msr_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
 
 /*
@@ -131,12 +139,13 @@ DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
  * 1 January, 1970 if the current system time was <system_time>.
  */
 #define DOM0_SETTIME          17
-typedef struct dom0_settime {
+struct dom0_settime {
     /* IN variables. */
     uint32_t secs;
     uint32_t nsecs;
     uint64_t system_time;
-} dom0_settime_t;
+};
+typedef struct dom0_settime dom0_settime_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 
 #define DOM0_GETPAGEFRAMEINFO 18
@@ -151,44 +160,47 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 #define LTAB_MASK XTAB
 #define LTABTYPE_MASK (0x7<<LTAB_SHIFT)
 
-typedef struct dom0_getpageframeinfo {
+struct dom0_getpageframeinfo {
     /* IN variables. */
     unsigned long mfn;     /* Machine page frame number to query.       */
     domid_t domain;        /* To which domain does the frame belong?    */
     /* OUT variables. */
     /* Is the page PINNED to a type? */
     uint32_t type;         /* see above type defs */
-} dom0_getpageframeinfo_t;
+};
+typedef struct dom0_getpageframeinfo dom0_getpageframeinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo_t);
 
 /*
  * Read console content from Xen buffer ring.
  */
 #define DOM0_READCONSOLE      19
-typedef struct dom0_readconsole {
+struct dom0_readconsole {
     /* IN variables. */
     uint32_t clear;            /* Non-zero -> clear after reading. */
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(char) buffer; /* In: Buffer start; Out: Used buffer start 
*/
     uint32_t count;            /* In: Buffer size;  Out: Used buffer size  */
-} dom0_readconsole_t;
+};
+typedef struct dom0_readconsole dom0_readconsole_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_readconsole_t);
 
 /*
  * Set which physical cpus a vcpu can execute on.
  */
 #define DOM0_SETVCPUAFFINITY  20
-typedef struct dom0_setvcpuaffinity {
+struct dom0_setvcpuaffinity {
     /* IN variables. */
     domid_t   domain;
     uint32_t  vcpu;
     cpumap_t  cpumap;
-} dom0_setvcpuaffinity_t;
+};
+typedef struct dom0_setvcpuaffinity dom0_setvcpuaffinity_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setvcpuaffinity_t);
 
 /* Get trace buffers machine base address */
 #define DOM0_TBUFCONTROL       21
-typedef struct dom0_tbufcontrol {
+struct dom0_tbufcontrol {
     /* IN variables */
 #define DOM0_TBUF_GET_INFO     0
 #define DOM0_TBUF_SET_CPU_MASK 1
@@ -203,14 +215,15 @@ typedef struct dom0_tbufcontrol {
     /* OUT variables */
     unsigned long buffer_mfn;
     uint32_t size;
-} dom0_tbufcontrol_t;
+};
+typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_tbufcontrol_t);
 
 /*
  * Get physical information about the host machine
  */
 #define DOM0_PHYSINFO         22
-typedef struct dom0_physinfo {
+struct dom0_physinfo {
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
     uint32_t sockets_per_node;
@@ -219,17 +232,19 @@ typedef struct dom0_physinfo {
     unsigned long total_pages;
     unsigned long free_pages;
     uint32_t hw_cap[8];
-} dom0_physinfo_t;
+};
+typedef struct dom0_physinfo dom0_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t);
 
 /*
  * Get the ID of the current scheduler.
  */
 #define DOM0_SCHED_ID        24
-typedef struct dom0_sched_id {
+struct dom0_sched_id {
     /* OUT variable */
     uint32_t sched_id;
-} dom0_sched_id_t;
+};
+typedef struct dom0_physinfo dom0_sched_id_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t);
 
 /*
@@ -246,15 +261,16 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t)
 #define DOM0_SHADOW_CONTROL_OP_CLEAN       11
 #define DOM0_SHADOW_CONTROL_OP_PEEK        12
 
-typedef struct dom0_shadow_control_stats {
+struct dom0_shadow_control_stats {
     uint32_t fault_count;
     uint32_t dirty_count;
     uint32_t dirty_net_count;
     uint32_t dirty_block_count;
-} dom0_shadow_control_stats_t;
+};
+typedef struct dom0_shadow_control_stats dom0_shadow_control_stats_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_stats_t);
 
-typedef struct dom0_shadow_control {
+struct dom0_shadow_control {
     /* IN variables. */
     domid_t        domain;
     uint32_t       op;
@@ -262,26 +278,29 @@ typedef struct dom0_shadow_control {
     /* IN/OUT variables. */
     unsigned long  pages;        /* size of buffer, updated with actual size */
     /* OUT variables. */
-    dom0_shadow_control_stats_t stats;
-} dom0_shadow_control_t;
+    struct dom0_shadow_control_stats stats;
+};
+typedef struct dom0_shadow_control dom0_shadow_control_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_t);
 
 #define DOM0_SETDOMAINMAXMEM   28
-typedef struct dom0_setdomainmaxmem {
+struct dom0_setdomainmaxmem {
     /* IN variables. */
     domid_t       domain;
     unsigned long max_memkb;
-} dom0_setdomainmaxmem_t;
+};
+typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
 
 #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
-typedef struct dom0_getpageframeinfo2 {
+struct dom0_getpageframeinfo2 {
     /* IN variables. */
     domid_t        domain;
     unsigned long  num;
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(ulong) array;
-} dom0_getpageframeinfo2_t;
+};
+typedef struct dom0_getpageframeinfo2 dom0_getpageframeinfo2_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo2_t);
 
 /*
@@ -292,7 +311,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
  * (x86-specific).
  */
 #define DOM0_ADD_MEMTYPE         31
-typedef struct dom0_add_memtype {
+struct dom0_add_memtype {
     /* IN variables. */
     unsigned long mfn;
     unsigned long nr_mfns;
@@ -300,7 +319,8 @@ typedef struct dom0_add_memtype {
     /* OUT variables. */
     uint32_t      handle;
     uint32_t      reg;
-} dom0_add_memtype_t;
+};
+typedef struct dom0_add_memtype dom0_add_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
 
 /*
@@ -311,23 +331,25 @@ DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype
  * (x86-specific).
  */
 #define DOM0_DEL_MEMTYPE         32
-typedef struct dom0_del_memtype {
+struct dom0_del_memtype {
     /* IN variables. */
     uint32_t handle;
     uint32_t reg;
-} dom0_del_memtype_t;
+};
+typedef struct dom0_del_memtype dom0_del_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_del_memtype_t);
 
 /* Read current type of an MTRR (x86-specific). */
 #define DOM0_READ_MEMTYPE        33
-typedef struct dom0_read_memtype {
+struct dom0_read_memtype {
     /* IN variables. */
     uint32_t reg;
     /* OUT variables. */
     unsigned long mfn;
     unsigned long nr_mfns;
     uint32_t type;
-} dom0_read_memtype_t;
+};
+typedef struct dom0_read_memtype dom0_read_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_read_memtype_t);
 
 /* Interface for controlling Xen software performance counters. */
@@ -335,50 +357,56 @@ DEFINE_XEN_GUEST_HANDLE(dom0_read_memtyp
 /* Sub-operations: */
 #define DOM0_PERFCCONTROL_OP_RESET 1   /* Reset all counters to zero. */
 #define DOM0_PERFCCONTROL_OP_QUERY 2   /* Get perfctr information. */
-typedef struct dom0_perfc_desc {
+struct dom0_perfc_desc {
     char         name[80];             /* name of perf counter */
     uint32_t     nr_vals;              /* number of values for this counter */
     uint32_t     vals[64];             /* array of values */
-} dom0_perfc_desc_t;
+};
+typedef struct dom0_perfc_desc dom0_perfc_desc_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_perfc_desc_t);
-typedef struct dom0_perfccontrol {
+
+struct dom0_perfccontrol {
     /* IN variables. */
     uint32_t       op;                /*  DOM0_PERFCCONTROL_OP_??? */
     /* OUT variables. */
     uint32_t       nr_counters;       /*  number of counters */
     XEN_GUEST_HANDLE(dom0_perfc_desc_t) desc; /*  counter information (or 
NULL) */
-} dom0_perfccontrol_t;
+};
+typedef struct dom0_perfccontrol dom0_perfccontrol_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_perfccontrol_t);
 
 #define DOM0_MICROCODE           35
-typedef struct dom0_microcode {
+struct dom0_microcode {
     /* IN variables. */
     XEN_GUEST_HANDLE(void) data;          /* Pointer to microcode data */
     uint32_t length;                  /* Length of microcode data. */
-} dom0_microcode_t;
+};
+typedef struct dom0_microcode dom0_microcode_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_microcode_t);
 
 #define DOM0_IOPORT_PERMISSION   36
-typedef struct dom0_ioport_permission {
+struct dom0_ioport_permission {
     domid_t  domain;                  /* domain to be affected */
     uint32_t first_port;              /* first port int range */
     uint32_t nr_ports;                /* size of port range */
     uint8_t  allow_access;            /* allow or deny access to range? */
-} dom0_ioport_permission_t;
+};
+typedef struct dom0_ioport_permission dom0_ioport_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_ioport_permission_t);
 
 #define DOM0_GETVCPUCONTEXT      37
-typedef struct dom0_getvcpucontext {
+struct dom0_getvcpucontext {
     /* IN variables. */
     domid_t  domain;                  /* domain to be affected */
     uint32_t vcpu;                    /* vcpu # */
     /* OUT variables. */
     XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_getvcpucontext_t;
+};
+typedef struct dom0_getvcpucontext dom0_getvcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getvcpucontext_t);
 
 #define DOM0_GETVCPUINFO         43
-typedef struct dom0_getvcpuinfo {
+struct dom0_getvcpuinfo {
     /* IN variables. */
     domid_t  domain;                  /* domain to be affected */
     uint32_t vcpu;                    /* vcpu # */
@@ -389,92 +417,104 @@ typedef struct dom0_getvcpuinfo {
     uint64_t cpu_time;                /* total cpu time consumed (ns) */
     uint32_t cpu;                     /* current mapping   */
     cpumap_t cpumap;                  /* allowable mapping */
-} dom0_getvcpuinfo_t;
+};
+typedef struct dom0_getvcpuinfo dom0_getvcpuinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getvcpuinfo_t);
 
 #define DOM0_GETDOMAININFOLIST   38
-typedef struct dom0_getdomaininfolist {
+struct dom0_getdomaininfolist {
     /* IN variables. */
     domid_t               first_domain;
     uint32_t              max_domains;
     XEN_GUEST_HANDLE(dom0_getdomaininfo_t) buffer;
     /* OUT variables. */
     uint32_t              num_domains;
-} dom0_getdomaininfolist_t;
+};
+typedef struct dom0_getdomaininfolist dom0_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfolist_t);
 
 #define DOM0_PLATFORM_QUIRK      39
 #define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
 #define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
 #define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
-typedef struct dom0_platform_quirk {
+struct dom0_platform_quirk {
     /* IN variables. */
     uint32_t quirk_id;
-} dom0_platform_quirk_t;
+};
+typedef struct dom0_platform_quirk dom0_platform_quirk_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_platform_quirk_t);
 
-#define DOM0_PHYSICAL_MEMORY_MAP 40
-typedef struct dom0_memory_map_entry {
+#define DOM0_PHYSICAL_MEMORY_MAP 40   /* Unimplemented from 3.0.3 onwards */
+struct dom0_memory_map_entry {
     uint64_t start, end;
     uint32_t flags; /* reserved */
     uint8_t  is_ram;
-} dom0_memory_map_entry_t;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
-typedef struct dom0_physical_memory_map {
+
+struct dom0_physical_memory_map {
     /* IN variables. */
     uint32_t max_map_entries;
     /* OUT variables. */
     uint32_t nr_map_entries;
     XEN_GUEST_HANDLE(dom0_memory_map_entry_t) memory_map;
-} dom0_physical_memory_map_t;
+};
+typedef struct dom0_physical_memory_map dom0_physical_memory_map_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physical_memory_map_t);
 
 #define DOM0_MAX_VCPUS 41
-typedef struct dom0_max_vcpus {
+struct dom0_max_vcpus {
     domid_t  domain;        /* domain to be affected */
     uint32_t max;           /* maximum number of vcpus */
-} dom0_max_vcpus_t;
+};
+typedef struct dom0_max_vcpus dom0_max_vcpus_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_max_vcpus_t);
 
 #define DOM0_SETDOMAINHANDLE 44
-typedef struct dom0_setdomainhandle {
+struct dom0_setdomainhandle {
     domid_t domain;
     xen_domain_handle_t handle;
-} dom0_setdomainhandle_t;
+};
+typedef struct dom0_setdomainhandle dom0_setdomainhandle_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainhandle_t);
 
 #define DOM0_SETDEBUGGING 45
-typedef struct dom0_setdebugging {
+struct dom0_setdebugging {
     domid_t domain;
     uint8_t enable;
-} dom0_setdebugging_t;
+};
+typedef struct dom0_setdebugging dom0_setdebugging_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdebugging_t);
 
 #define DOM0_IRQ_PERMISSION 46
-typedef struct dom0_irq_permission {
+struct dom0_irq_permission {
     domid_t domain;          /* domain to be affected */
     uint8_t pirq;
     uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */
-} dom0_irq_permission_t;
+};
+typedef struct dom0_irq_permission dom0_irq_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_irq_permission_t);
 
 #define DOM0_IOMEM_PERMISSION 47
-typedef struct dom0_iomem_permission {
+struct dom0_iomem_permission {
     domid_t  domain;          /* domain to be affected */
     unsigned long first_mfn;  /* first page (physical page number) in range */
     unsigned long nr_mfns;    /* number of pages in range (>0) */
     uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
-} dom0_iomem_permission_t;
+};
+typedef struct dom0_iomem_permission dom0_iomem_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permission_t);
 
 #define DOM0_HYPERCALL_INIT   48
-typedef struct dom0_hypercall_init {
+struct dom0_hypercall_init {
     domid_t  domain;          /* domain to be affected */
     unsigned long mfn;        /* machine frame to be initialised */
-} dom0_hypercall_init_t;
+};
+typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
 
-typedef struct dom0_op {
+struct dom0_op {
     uint32_t cmd;
     uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
     union {
@@ -517,7 +557,8 @@ typedef struct dom0_op {
         struct dom0_hypercall_init    hypercall_init;
         uint8_t                       pad[128];
     } u;
-} dom0_op_t;
+};
+typedef struct dom0_op dom0_op_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
 
 #endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/event_channel.h
--- a/xen/include/public/event_channel.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/event_channel.h        Tue May 30 14:30:34 2006 -0500
@@ -28,12 +28,13 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
  *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
  */
 #define EVTCHNOP_alloc_unbound    6
-typedef struct evtchn_alloc_unbound {
+struct evtchn_alloc_unbound {
     /* IN parameters */
     domid_t dom, remote_dom;
     /* OUT parameters */
     evtchn_port_t port;
-} evtchn_alloc_unbound_t;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
 
 /*
  * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
@@ -45,13 +46,14 @@ typedef struct evtchn_alloc_unbound {
  *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
  */
 #define EVTCHNOP_bind_interdomain 0
-typedef struct evtchn_bind_interdomain {
+struct evtchn_bind_interdomain {
     /* IN parameters. */
     domid_t remote_dom;
     evtchn_port_t remote_port;
     /* OUT parameters. */
     evtchn_port_t local_port;
-} evtchn_bind_interdomain_t;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
 
 /*
  * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
@@ -66,13 +68,14 @@ typedef struct evtchn_bind_interdomain {
  *     binding cannot be changed.
  */
 #define EVTCHNOP_bind_virq        1
-typedef struct evtchn_bind_virq {
+struct evtchn_bind_virq {
     /* IN parameters. */
     uint32_t virq;
     uint32_t vcpu;
     /* OUT parameters. */
     evtchn_port_t port;
-} evtchn_bind_virq_t;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
 
 /*
  * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
@@ -81,14 +84,15 @@ typedef struct evtchn_bind_virq {
  *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
  */
 #define EVTCHNOP_bind_pirq        2
-typedef struct evtchn_bind_pirq {
+struct evtchn_bind_pirq {
     /* IN parameters. */
     uint32_t pirq;
 #define BIND_PIRQ__WILL_SHARE 1
     uint32_t flags; /* BIND_PIRQ__* */
     /* OUT parameters. */
     evtchn_port_t port;
-} evtchn_bind_pirq_t;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
 
 /*
  * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
@@ -97,11 +101,12 @@ typedef struct evtchn_bind_pirq {
  *     may not be changed.
  */
 #define EVTCHNOP_bind_ipi         7
-typedef struct evtchn_bind_ipi {
-    uint32_t vcpu;
-    /* OUT parameters. */
-    evtchn_port_t port;
-} evtchn_bind_ipi_t;
+struct evtchn_bind_ipi {
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
 
 /*
  * EVTCHNOP_close: Close a local event channel <port>. If the channel is
@@ -109,20 +114,22 @@ typedef struct evtchn_bind_ipi {
  * (EVTCHNSTAT_unbound), awaiting a new connection.
  */
 #define EVTCHNOP_close            3
-typedef struct evtchn_close {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_close_t;
+struct evtchn_close {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
 
 /*
  * EVTCHNOP_send: Send an event to the remote end of the channel whose local
  * endpoint is <port>.
  */
 #define EVTCHNOP_send             4
-typedef struct evtchn_send {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_send_t;
+struct evtchn_send {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
 
 /*
  * EVTCHNOP_status: Get the current status of the communication channel which
@@ -133,7 +140,7 @@ typedef struct evtchn_send {
  *     channel for which <dom> is not DOMID_SELF.
  */
 #define EVTCHNOP_status           5
-typedef struct evtchn_status {
+struct evtchn_status {
     /* IN parameters */
     domid_t  dom;
     evtchn_port_t port;
@@ -157,7 +164,8 @@ typedef struct evtchn_status {
         uint32_t pirq;      /* EVTCHNSTAT_pirq        */
         uint32_t virq;      /* EVTCHNSTAT_virq        */
     } u;
-} evtchn_status_t;
+};
+typedef struct evtchn_status evtchn_status_t;
 
 /*
  * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
@@ -172,41 +180,44 @@ typedef struct evtchn_status {
  *     has its binding reset to vcpu0).
  */
 #define EVTCHNOP_bind_vcpu        8
-typedef struct evtchn_bind_vcpu {
-    /* IN parameters. */
-    evtchn_port_t port;
-    uint32_t vcpu;
-} evtchn_bind_vcpu_t;
+struct evtchn_bind_vcpu {
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
 
 /*
  * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
  * a notification to the appropriate VCPU if an event is pending.
  */
 #define EVTCHNOP_unmask           9
-typedef struct evtchn_unmask {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_unmask_t;
+struct evtchn_unmask {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
 
 /*
  * Argument to event_channel_op_compat() hypercall. Superceded by new
  * event_channel_op() hypercall since 0x00030202.
  */
-typedef struct evtchn_op {
+struct evtchn_op {
     uint32_t cmd; /* EVTCHNOP_* */
     union {
-        evtchn_alloc_unbound_t    alloc_unbound;
-        evtchn_bind_interdomain_t bind_interdomain;
-        evtchn_bind_virq_t        bind_virq;
-        evtchn_bind_pirq_t        bind_pirq;
-        evtchn_bind_ipi_t         bind_ipi;
-        evtchn_close_t            close;
-        evtchn_send_t             send;
-        evtchn_status_t           status;
-        evtchn_bind_vcpu_t        bind_vcpu;
-        evtchn_unmask_t           unmask;
+        struct evtchn_alloc_unbound    alloc_unbound;
+        struct evtchn_bind_interdomain bind_interdomain;
+        struct evtchn_bind_virq        bind_virq;
+        struct evtchn_bind_pirq        bind_pirq;
+        struct evtchn_bind_ipi         bind_ipi;
+        struct evtchn_close            close;
+        struct evtchn_send             send;
+        struct evtchn_status           status;
+        struct evtchn_bind_vcpu        bind_vcpu;
+        struct evtchn_unmask           unmask;
     } u;
-} evtchn_op_t;
+};
+typedef struct evtchn_op evtchn_op_t;
 DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
 
 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/grant_table.h  Tue May 30 14:30:34 2006 -0500
@@ -71,7 +71,7 @@
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-typedef struct grant_entry {
+struct grant_entry {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
 #if defined(__powerpc__)
     ulong flags;
@@ -85,7 +85,8 @@ typedef struct grant_entry {
      * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
      */
     uint32_t frame;
-} grant_entry_t;
+};
+typedef struct grant_entry grant_entry_t;
 
 /*
  * Type of grant entry.
@@ -160,7 +161,7 @@ typedef uint32_t grant_handle_t;
  *     to be accounted to the correct grant reference!
  */
 #define GNTTABOP_map_grant_ref        0
-typedef struct gnttab_map_grant_ref {
+struct gnttab_map_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
     uint32_t flags;               /* GNTMAP_* */
@@ -170,7 +171,8 @@ typedef struct gnttab_map_grant_ref {
     int16_t  status;              /* GNTST_* */
     grant_handle_t handle;
     uint64_t dev_bus_addr;
-} gnttab_map_grant_ref_t;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
 
 /*
@@ -185,14 +187,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant
  *     mappings will remain in the device or host TLBs.
  */
 #define GNTTABOP_unmap_grant_ref      1
-typedef struct gnttab_unmap_grant_ref {
+struct gnttab_unmap_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
     uint64_t dev_bus_addr;
     grant_handle_t handle;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
-} gnttab_unmap_grant_ref_t;
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
 
 /*
@@ -205,14 +208,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_gra
  *  3. Xen may not support more than a single grant-table page per domain.
  */
 #define GNTTABOP_setup_table          2
-typedef struct gnttab_setup_table {
+struct gnttab_setup_table {
     /* IN parameters. */
     domid_t  dom;
     uint32_t nr_frames;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
     XEN_GUEST_HANDLE(ulong) frame_list;
-} gnttab_setup_table_t;
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
 
 /*
@@ -220,12 +224,13 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_setup_tab
  * xen console. Debugging use only.
  */
 #define GNTTABOP_dump_table           3
-typedef struct gnttab_dump_table {
+struct gnttab_dump_table {
     /* IN parameters. */
     domid_t dom;
     /* OUT parameters. */
     int16_t status;               /* GNTST_* */
-} gnttab_dump_table_t;
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
 
 /*
@@ -237,14 +242,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
  * to the calling domain *unless* the error is GNTST_bad_page.
  */
 #define GNTTABOP_transfer                4
-typedef struct gnttab_transfer {
+struct gnttab_transfer {
     /* IN parameters. */
     unsigned long mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
     int16_t       status;
-} gnttab_transfer_t;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/ioreq.h    Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@
  * prepare this structure and notify service OS and DM by sending
  * virq
  */
-typedef struct {
+struct ioreq {
     uint64_t addr;          /*  physical address            */
     uint64_t size;          /*  size in bytes               */
     uint64_t count;         /*  for rep prefixes            */
@@ -55,31 +55,35 @@ typedef struct {
     uint8_t df:1;
     uint8_t type;           /* I/O type                     */
     uint64_t io_count;      /* How many IO done on a vcpu   */
-} ioreq_t;
+};
+typedef struct ioreq ioreq_t;
 
 #define MAX_VECTOR      256
 #define BITS_PER_BYTE   8
 #define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
 #define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
 
-typedef struct {
+struct global_iodata {
     uint16_t    pic_elcr;
     uint16_t    pic_irr;
     uint16_t    pic_last_irr;
     uint16_t    pic_clear_irr;
-} global_iodata_t;
+};
+typedef struct global_iodata global_iodata_t;
 
-typedef struct {
-    ioreq_t         vp_ioreq;
+struct vcpu_iodata {
+    struct ioreq         vp_ioreq;
     /* Event channel port */
     unsigned int    vp_eport;   /* VMX vcpu uses this to notify DM */
     unsigned int    dm_eport;   /* DM uses this to notify VMX vcpu */
-} vcpu_iodata_t;
+};
+typedef struct vcpu_iodata vcpu_iodata_t;
 
-typedef struct {
-    global_iodata_t sp_global;
-    vcpu_iodata_t   vcpu_iodata[1];
-} shared_iopage_t;
+struct shared_iopage {
+    struct global_iodata sp_global;
+    struct vcpu_iodata   vcpu_iodata[1];
+};
+typedef struct shared_iopage shared_iopage_t;
 
 #endif /* _IOREQ_H_ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/vmx_assist.h
--- a/xen/include/public/hvm/vmx_assist.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/vmx_assist.h       Tue May 30 14:30:34 2006 -0500
@@ -37,7 +37,7 @@ union vmcs_arbytes {
 /*
  * World switch state
  */
-typedef struct vmx_assist_context {
+struct vmx_assist_context {
     uint32_t  eip;        /* execution pointer */
     uint32_t  esp;        /* stack pointer */
     uint32_t  eflags;     /* flags register */
@@ -80,7 +80,8 @@ typedef struct vmx_assist_context {
     uint32_t  ldtr_limit;
     uint32_t  ldtr_base;
     union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
 
 #endif /* __ASSEMBLY__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/blkif.h     Tue May 30 14:30:34 2006 -0500
@@ -39,7 +39,7 @@
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
-typedef struct blkif_request {
+struct blkif_request {
     uint8_t        operation;    /* BLKIF_OP_???                         */
     uint8_t        nr_segments;  /* number of segments                   */
     blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -51,13 +51,15 @@ typedef struct blkif_request {
         /* @last_sect: last sector in frame to transfer (inclusive).     */
         uint8_t     first_sect, last_sect;
     } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-} blkif_request_t;
+};
+typedef struct blkif_request blkif_request_t;
 
-typedef struct blkif_response {
+struct blkif_response {
     uint64_t        id;              /* copied from request */
     uint8_t         operation;       /* copied from request */
     int16_t         status;          /* BLKIF_RSP_???       */
-} blkif_response_t;
+};
+typedef struct blkif_response blkif_response_t;
 
 #define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
 #define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
@@ -66,7 +68,7 @@ typedef struct blkif_response {
  * Generate blkif ring structures and types.
  */
 
-DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
 
 #define VDISK_CDROM        0x1
 #define VDISK_REMOVABLE    0x2
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/netif.h     Tue May 30 14:30:34 2006 -0500
@@ -13,10 +13,10 @@
 #include "../grant_table.h"
 
 /*
- * Note that there is *never* any need to notify the backend when enqueuing
- * receive requests (netif_rx_request_t). Notifications after enqueuing any
- * other type of message should be conditional on the appropriate req_event
- * or rsp_event field in the shared ring.
+ * Note that there is *never* any need to notify the backend when
+ * enqueuing receive requests (struct netif_rx_request). Notifications
+ * after enqueuing any other type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
  */
 
 /* Protocol checksum field is blank in the packet (hardware offload)? */
@@ -27,23 +27,26 @@
 #define _NETTXF_data_validated (1)
 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
 
-typedef struct netif_tx_request {
+struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
     uint16_t offset;       /* Offset within buffer page */
     uint16_t flags;        /* NETTXF_* */
     uint16_t id;           /* Echoed in response message. */
     uint16_t size;         /* Packet size in bytes.       */
-} netif_tx_request_t;
+};
+typedef struct netif_tx_request netif_tx_request_t;
 
-typedef struct netif_tx_response {
+struct netif_tx_response {
     uint16_t id;
     int16_t  status;       /* NETIF_RSP_* */
-} netif_tx_response_t;
+};
+typedef struct netif_tx_response netif_tx_response_t;
 
-typedef struct {
+struct netif_rx_request {
     uint16_t    id;        /* Echoed in response message.        */
     grant_ref_t gref;      /* Reference to incoming granted frame */
-} netif_rx_request_t;
+};
+typedef struct netif_rx_request netif_rx_request_t;
 
 /* Packet data has been validated against protocol checksum. */
 #define _NETRXF_data_validated (0)
@@ -53,19 +56,20 @@ typedef struct {
 #define _NETRXF_csum_blank     (1)
 #define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
 
-typedef struct {
+struct netif_rx_response {
     uint16_t id;
     uint16_t offset;       /* Offset in page of start of received packet  */
     uint16_t flags;        /* NETRXF_* */
     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
-} netif_rx_response_t;
+};
+typedef struct netif_rx_response netif_rx_response_t;
 
 /*
  * Generate netif ring structures and types.
  */
 
-DEFINE_RING_TYPES(netif_tx, netif_tx_request_t, netif_tx_response_t);
-DEFINE_RING_TYPES(netif_rx, netif_rx_request_t, netif_rx_response_t);
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
 
 #define NETIF_RSP_DROPPED         -2
 #define NETIF_RSP_ERROR           -1
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/tpmif.h
--- a/xen/include/public/io/tpmif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/tpmif.h     Tue May 30 14:30:34 2006 -0500
@@ -18,12 +18,13 @@
 
 #include "../grant_table.h"
 
-typedef struct {
+struct tpmif_tx_request {
     unsigned long addr;   /* Machine address of packet.   */
     grant_ref_t ref;      /* grant table access reference */
     uint16_t unused;
     uint16_t size;        /* Packet size in bytes.        */
-} tpmif_tx_request_t;
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
 
 /*
  * The TPMIF_TX_RING_SIZE defines the number of pages the
@@ -35,13 +36,15 @@ typedef uint32_t TPMIF_RING_IDX;
 
 /* This structure must fit in a memory page. */
 
-typedef struct {
-    tpmif_tx_request_t req;
-} tpmif_ring_t;
+struct tpmif_ring {
+    struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
 
-typedef struct {
-    tpmif_ring_t ring[TPMIF_TX_RING_SIZE];
-} tpmif_tx_interface_t;
+struct tpmif_tx_interface {
+    struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
 
 #endif
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/xenbus.h    Tue May 30 14:30:34 2006 -0500
@@ -9,34 +9,37 @@
 #ifndef _XEN_PUBLIC_IO_XENBUS_H
 #define _XEN_PUBLIC_IO_XENBUS_H
 
-/* The state of either end of the Xenbus, i.e. the current communication
-   status of initialisation across the bus.  States here imply nothing about
-   the state of the connection between the driver and the kernel's device
-   layers.  */
-typedef enum
-{
-  XenbusStateUnknown      = 0,
-  XenbusStateInitialising = 1,
-  XenbusStateInitWait     = 2,  /* Finished early initialisation, but waiting
-                                   for information from the peer or hotplug
-                                  scripts. */
-  XenbusStateInitialised  = 3,  /* Initialised and waiting for a connection
-                                  from the peer. */
-  XenbusStateConnected    = 4,
-  XenbusStateClosing      = 5,  /* The device is being closed due to an error
-                                  or an unplug event. */
-  XenbusStateClosed       = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
 
-} XenbusState;
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed       = 6
+};
+typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/memory.h
--- a/xen/include/public/memory.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/memory.h       Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,7 @@
 #define XENMEM_increase_reservation 0
 #define XENMEM_decrease_reservation 1
 #define XENMEM_populate_physmap     6
-typedef struct xen_memory_reservation {
+struct xen_memory_reservation {
 
     /*
      * XENMEM_increase_reservation:
@@ -49,7 +49,8 @@ typedef struct xen_memory_reservation {
      */
     domid_t        domid;
 
-} xen_memory_reservation_t;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
 
 /*
@@ -74,7 +75,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser
  * arg == addr of xen_machphys_mfn_list_t.
  */
 #define XENMEM_machphys_mfn_list    5
-typedef struct xen_machphys_mfn_list {
+struct xen_machphys_mfn_list {
     /*
      * Size of the 'extent_start' array. Fewer entries will be filled if the
      * machphys table is smaller than max_extents * 2MB.
@@ -93,7 +94,8 @@ typedef struct xen_machphys_mfn_list {
      * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
      */
     unsigned int nr_extents;
-} xen_machphys_mfn_list_t;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
 
 /*
@@ -102,7 +104,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
  * arg == addr of xen_add_to_physmap_t.
  */
 #define XENMEM_add_to_physmap      7
-typedef struct xen_add_to_physmap {
+struct xen_add_to_physmap {
     /* Which domain to change the mapping for. */
     domid_t domid;
 
@@ -116,7 +118,8 @@ typedef struct xen_add_to_physmap {
 
     /* GPFN where the source mapping page should appear. */
     unsigned long gpfn;
-} xen_add_to_physmap_t;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
 
 /*
@@ -124,7 +127,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physm
  * code on failure. This call only works for auto-translated guests.
  */
 #define XENMEM_translate_gpfn_list  8
-typedef struct xen_translate_gpfn_list {
+struct xen_translate_gpfn_list {
     /* Which domain to translate for? */
     domid_t domid;
 
@@ -139,8 +142,37 @@ typedef struct xen_translate_gpfn_list {
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
     XEN_GUEST_HANDLE(ulong) mfn_list;
-} xen_translate_gpfn_list_t;
+};
+typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ */
+#define XENMEM_machine_memory_map      10
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/nmi.h
--- a/xen/include/public/nmi.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/nmi.h  Tue May 30 14:30:34 2006 -0500
@@ -34,10 +34,11 @@
  * arg == pointer to xennmi_callback structure.
  */
 #define XENNMI_register_callback   0
-typedef struct xennmi_callback {
+struct xennmi_callback {
     unsigned long handler_address;
     unsigned long pad;
-} xennmi_callback_t;
+};
+typedef struct xennmi_callback xennmi_callback_t;
 DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/physdev.h      Tue May 30 14:30:34 2006 -0500
@@ -14,10 +14,11 @@
  * @arg == pointer to physdev_eoi structure.
  */
 #define PHYSDEVOP_eoi                   12
-typedef struct physdev_eoi {
+struct physdev_eoi {
     /* IN */
     uint32_t irq;
-} physdev_eoi_t;
+};
+typedef struct physdev_eoi physdev_eoi_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
 
 /*
@@ -25,12 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
  * @arg == pointer to physdev_irq_status_query structure.
  */
 #define PHYSDEVOP_irq_status_query       5
-typedef struct physdev_irq_status_query {
+struct physdev_irq_status_query {
     /* IN */
     uint32_t irq;
     /* OUT */
     uint32_t flags; /* XENIRQSTAT_* */
-} physdev_irq_status_query_t;
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
 
 /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
@@ -42,10 +44,11 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_stat
  * @arg == pointer to physdev_set_iopl structure.
  */
 #define PHYSDEVOP_set_iopl               6
-typedef struct physdev_set_iopl {
+struct physdev_set_iopl {
     /* IN */
     uint32_t iopl;
-} physdev_set_iopl_t;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
 
 /*
@@ -53,11 +56,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl
  * @arg == pointer to physdev_set_iobitmap structure.
  */
 #define PHYSDEVOP_set_iobitmap           7
-typedef struct physdev_set_iobitmap {
+struct physdev_set_iobitmap {
     /* IN */
     uint8_t *bitmap;
     uint32_t nr_ports;
-} physdev_set_iobitmap_t;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
 
 /*
@@ -66,13 +70,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iobi
  */
 #define PHYSDEVOP_apic_read              8
 #define PHYSDEVOP_apic_write             9
-typedef struct physdev_apic {
+struct physdev_apic {
     /* IN */
     unsigned long apic_physbase;
     uint32_t reg;
     /* IN or OUT */
     uint32_t value;
-} physdev_apic_t;
+};
+typedef struct physdev_apic physdev_apic_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
 
 /*
@@ -81,28 +86,30 @@ DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
  */
 #define PHYSDEVOP_alloc_irq_vector      10
 #define PHYSDEVOP_free_irq_vector       11
-typedef struct physdev_irq {
+struct physdev_irq {
     /* IN */
     uint32_t irq;
     /* IN or OUT */
     uint32_t vector;
-} physdev_irq_t;
+};
+typedef struct physdev_irq physdev_irq_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 
 /*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
  */
-typedef struct physdev_op {
+struct physdev_op {
     uint32_t cmd;
     union {
-        physdev_irq_status_query_t      irq_status_query;
-        physdev_set_iopl_t              set_iopl;
-        physdev_set_iobitmap_t          set_iobitmap;
-        physdev_apic_t                  apic_op;
-        physdev_irq_t                   irq_op;
+        struct physdev_irq_status_query      irq_status_query;
+        struct physdev_set_iopl              set_iopl;
+        struct physdev_set_iobitmap          set_iobitmap;
+        struct physdev_apic                  apic_op;
+        struct physdev_irq                   irq_op;
     } u;
-} physdev_op_t;
+};
+typedef struct physdev_op physdev_op_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched.h
--- a/xen/include/public/sched.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched.h        Tue May 30 14:30:34 2006 -0500
@@ -46,9 +46,10 @@
  * @arg == pointer to sched_shutdown structure.
  */
 #define SCHEDOP_shutdown    2
-typedef struct sched_shutdown {
+struct sched_shutdown {
     unsigned int reason; /* SHUTDOWN_* */
-} sched_shutdown_t;
+};
+typedef struct sched_shutdown sched_shutdown_t;
 DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
 
 /*
@@ -57,11 +58,12 @@ DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t
  * @arg == pointer to sched_poll structure.
  */
 #define SCHEDOP_poll        3
-typedef struct sched_poll {
+struct sched_poll {
     XEN_GUEST_HANDLE(evtchn_port_t) ports;
     unsigned int nr_ports;
     uint64_t timeout;
-} sched_poll_t;
+};
+typedef struct sched_poll sched_poll_t;
 DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
 
 /*
@@ -71,10 +73,11 @@ DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
  * @arg == pointer to sched_remote_shutdown structure.
  */
 #define SCHEDOP_remote_shutdown        4
-typedef struct sched_remote_shutdown {
+struct sched_remote_shutdown {
     domid_t domain_id;         /* Remote domain ID */
     unsigned int reason;       /* SHUTDOWN_xxx reason */
-} sched_remote_shutdown_t;
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
 DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched_ctl.h    Tue May 30 14:30:34 2006 -0500
@@ -10,6 +10,7 @@
 /* Scheduler types. */
 #define SCHED_BVT      0
 #define SCHED_SEDF     4
+#define SCHED_CREDIT   5
 
 /* Set or get info? */
 #define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
             uint32_t extratime;
             uint32_t weight;
         } sedf;
+        struct sched_credit_adjdom {
+            uint16_t weight;
+            uint16_t cap;
+        } credit;
     } u;
 };
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -56,7 +56,7 @@
  * @extra_arg == pointer to vcpu_runstate_info structure.
  */
 #define VCPUOP_get_runstate_info    4
-typedef struct vcpu_runstate_info {
+struct vcpu_runstate_info {
     /* VCPU's current state (RUNSTATE_*). */
     int      state;
     /* When was current state entered (system time, ns)? */
@@ -66,7 +66,8 @@ typedef struct vcpu_runstate_info {
      * guaranteed not to drift from system time.
      */
     uint64_t time[4];
-} vcpu_runstate_info_t;
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
 
 /* VCPU is currently running on a physical CPU. */
 #define RUNSTATE_running  0
@@ -99,12 +100,13 @@ typedef struct vcpu_runstate_info {
  * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
  */
 #define VCPUOP_register_runstate_memory_area 5
-typedef struct vcpu_register_runstate_memory_area {
+struct vcpu_register_runstate_memory_area {
     union {
         struct vcpu_runstate_info *v;
         uint64_t p;
     } addr;
-} vcpu_register_runstate_memory_area_t;
+};
+typedef struct vcpu_register_runstate_memory_area 
vcpu_register_runstate_memory_area_t;
 
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/version.h
--- a/xen/include/public/version.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/version.h      Tue May 30 14:30:34 2006 -0500
@@ -22,12 +22,13 @@ typedef char xen_extraversion_t[16];
 
 /* arg == xen_compile_info_t. */
 #define XENVER_compile_info 2
-typedef struct xen_compile_info {
+struct xen_compile_info {
     char compiler[64];
     char compile_by[16];
     char compile_domain[32];
     char compile_date[32];
-} xen_compile_info_t;
+};
+typedef struct xen_compile_info xen_compile_info_t;
 
 #define XENVER_capabilities 3
 typedef char xen_capabilities_info_t[1024];
@@ -38,15 +39,17 @@ typedef char xen_changeset_info_t[64];
 #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
 
 #define XENVER_platform_parameters 5
-typedef struct xen_platform_parameters {
+struct xen_platform_parameters {
     unsigned long virt_start;
-} xen_platform_parameters_t;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
 
 #define XENVER_get_features 6
-typedef struct xen_feature_info {
+struct xen_feature_info {
     unsigned int submap_idx;    /* IN: which 32-bit submap to return */
     uint32_t     submap;        /* OUT: 32-bit submap */
-} xen_feature_info_t;
+};
+typedef struct xen_feature_info xen_feature_info_t;
 
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xen.h
--- a/xen/include/public/xen.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xen.h  Tue May 30 14:30:34 2006 -0500
@@ -195,7 +195,7 @@
 #define MMUEXT_NEW_USER_BASEPTR 15
 
 #ifndef __ASSEMBLY__
-typedef struct mmuext_op {
+struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
@@ -209,7 +209,8 @@ typedef struct mmuext_op {
         /* TLB_FLUSH_MULTI, INVLPG_MULTI */
         void *vcpumask;
     } arg2;
-} mmuext_op_t;
+};
+typedef struct mmuext_op mmuext_op_t;
 DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #endif
 
@@ -273,20 +274,22 @@ typedef uint16_t domid_t;
  * Send an array of these to HYPERVISOR_mmu_update().
  * NB. The fields are natural pointer/address size for this architecture.
  */
-typedef struct mmu_update {
+struct mmu_update {
     uint64_t ptr;       /* Machine address of PTE. */
     uint64_t val;       /* New contents of PTE.    */
-} mmu_update_t;
+};
+typedef struct mmu_update mmu_update_t;
 DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
 
 /*
  * Send an array of these to HYPERVISOR_multicall().
  * NB. The fields are natural register size for this architecture.
  */
-typedef struct multicall_entry {
+struct multicall_entry {
     unsigned long op, result;
     unsigned long args[6];
-} multicall_entry_t;
+};
+typedef struct multicall_entry multicall_entry_t;
 DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
 
 /*
@@ -295,7 +298,7 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_
  */
 #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
 
-typedef struct vcpu_time_info {
+struct vcpu_time_info {
     /*
      * Updates to the following values are preceded and followed by an
      * increment of 'version'. The guest can therefore detect updates by
@@ -319,9 +322,10 @@ typedef struct vcpu_time_info {
     uint32_t tsc_to_system_mul;
     int8_t   tsc_shift;
     int8_t   pad1[3];
-} vcpu_time_info_t; /* 32 bytes */
-
-typedef struct vcpu_info {
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
     /*
      * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
      * a pending notification for a particular VCPU. It is then cleared 
@@ -354,16 +358,17 @@ typedef struct vcpu_info {
 #endif
     uint8_t evtchn_upcall_mask;
     unsigned long evtchn_pending_sel;
-    arch_vcpu_info_t arch;
-    vcpu_time_info_t time;
-} vcpu_info_t; /* 64 bytes (x86) */
+    struct arch_vcpu_info arch;
+    struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+typedef struct vcpu_info vcpu_info_t;
 
 /*
  * Xen/kernel shared data -- pointer provided in start_info.
  * NB. We expect that this struct is smaller than a page.
  */
-typedef struct shared_info {
-    vcpu_info_t vcpu_info[MAX_VIRT_CPUS];
+struct shared_info {
+    struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
 
     /*
      * A domain can create "event channels" on which it can send and receive
@@ -407,9 +412,10 @@ typedef struct shared_info {
     uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
     uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
-    arch_shared_info_t arch;
-
-} shared_info_t;
+    struct arch_shared_info arch;
+
+};
+typedef struct shared_info shared_info_t;
 
 /*
  * Start-of-day memory layout for the initial domain (DOM0):
@@ -437,7 +443,7 @@ typedef struct shared_info {
  */
 
 #define MAX_GUEST_CMDLINE 1024
-typedef struct start_info {
+struct start_info {
     /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
     char magic[32];             /* "xen-<version>-<platform>".            */
     unsigned long nr_pages;     /* Total pages allocated to this domain.  */
@@ -454,7 +460,8 @@ typedef struct start_info {
     unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
     unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
     int8_t cmd_line[MAX_GUEST_CMDLINE];
-} start_info_t;
+};
+typedef struct start_info start_info_t;
 
 /* These flags are passed in the 'flags' field of start_info_t. */
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xenoprof.h
--- a/xen/include/public/xenoprof.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xenoprof.h     Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@ struct event_log {
 };
 
 /* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
-typedef struct xenoprof_buf {
+struct xenoprof_buf {
     uint32_t event_head;
     uint32_t event_tail;
     uint32_t event_size;
@@ -51,10 +51,11 @@ typedef struct xenoprof_buf {
     uint64_t user_samples;
     uint64_t lost_samples;
     struct event_log event_log[1];
-} xenoprof_buf_t;
+};
+typedef struct xenoprof_buf xenoprof_buf_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
 
-typedef struct xenoprof_init {
+struct xenoprof_init {
     int32_t  max_samples;
     int32_t  num_events;
     int32_t  is_primary;
@@ -62,10 +63,11 @@ typedef struct xenoprof_init {
     int32_t  bufsize;
     uint64_t buf_maddr;
     char cpu_type[XENOPROF_CPU_TYPE_SIZE];
-} xenoprof_init_t;
+};
+typedef struct xenoprof_init xenoprof_init_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
 
-typedef struct xenoprof_counter {
+struct xenoprof_counter {
     uint32_t ind;
     uint64_t count;
     uint32_t enabled;
@@ -74,7 +76,8 @@ typedef struct xenoprof_counter {
     uint32_t kernel;
     uint32_t user;
     uint64_t unit_mask;
-} xenoprof_counter_t;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
 
 
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/hypercall.h       Tue May 30 14:30:34 2006 -0500
@@ -80,7 +80,7 @@ do_vcpu_op(
 
 extern long
 do_acm_op(
-    XEN_GUEST_HANDLE(acm_op_t) u_acm_op);
+    int cmd, XEN_GUEST_HANDLE(void) arg);
 
 extern long
 do_nmi_op(
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/sched-if.h        Tue May 30 14:30:34 2006 -0500
@@ -58,6 +58,8 @@ struct scheduler {
     char *opt_name;         /* option name for this scheduler    */
     unsigned int sched_id;  /* ID for this scheduler             */
 
+    void         (*init)           (void);
+    void         (*tick)           (unsigned int cpu);
     int          (*alloc_task)     (struct vcpu *);
     void         (*add_task)       (struct vcpu *);
     void         (*free_task)      (struct domain *);
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/softirq.h Tue May 30 14:30:34 2006 -0500
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, softirq_handler handler);
 
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+    int cpu;
+
+    for_each_cpu_mask(cpu, mask)
+    {
+        if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+            cpu_clear(cpu, mask);
+    }
+
+    smp_send_event_check_mask(mask);
+}
+
 static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
 {
     if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/util.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * arch/ia64/xen/util.c
+ * This file is the ia64 counterpart of drivers/xen/util.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <xen/driver_util.h>
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+       int order;
+       unsigned long virt;
+       unsigned long nr_pages;
+       struct vm_struct* area;
+       
+       order = get_order(size);
+       virt = __get_free_pages(GFP_KERNEL, order);
+       if (virt == 0) {
+               goto err0;
+       }
+       nr_pages = 1 << order;
+       scrub_pages(virt, nr_pages);
+       
+       area = kmalloc(sizeof(*area), GFP_KERNEL);
+       if (area == NULL) {
+               goto err1;
+       }
+       
+        area->flags = VM_IOREMAP;//XXX
+        area->addr = (void*)virt;
+        area->size = size;
+        area->pages = NULL; //XXX
+        area->nr_pages = nr_pages;
+        area->phys_addr = __pa(virt);
+
+       return area;
+
+err1:
+       free_pages(virt, order);
+err0:
+       return NULL;
+       
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+       unsigned int order = get_order(area->size);
+       unsigned long i;
+
+       // This area is used for foreign page mappping.
+       // So underlying machine page may not be assigned.
+       for (i = 0; i < (1 << order); i++) {
+               unsigned long ret;
+               unsigned long gpfn = (area->phys_addr >> PAGE_SHIFT) + i;
+               struct xen_memory_reservation reservation = {
+                       .nr_extents   = 1,
+                       .address_bits = 0,
+                       .extent_order = 0,
+                       .domid        = DOMID_SELF
+               };
+               set_xen_guest_handle(reservation.extent_start, &gpfn);
+               ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+                                          &reservation);
+               BUG_ON(ret != 1);
+       }
+       free_pages((unsigned long)area->addr, order);
+       kfree(area);
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
+void lock_vm_area(struct vm_struct *area)
+{
+       // nothing
+}
+EXPORT_SYMBOL_GPL(lock_vm_area);
+
+void unlock_vm_area(struct vm_struct *area)
+{
+       // nothing
+}
+EXPORT_SYMBOL_GPL(unlock_vm_area);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Tue May 30 
14:30:34 2006 -0500
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+       /*
+        * We assume a CPU hotplug request comes from local admin if it is made
+        * via a userspace process (i.e., one with a real mm_struct).
+        */
+       return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+       int err;
+       char dir[32], state[32];
+
+       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+               return;
+
+       sprintf(dir, "cpu/%d", cpu);
+       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+       if (err != 1) {
+               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+               return;
+       }
+
+       if (strcmp(state, "online") == 0) {
+               cpu_set(cpu, xenbus_allowed_cpumask);
+               (void)cpu_up(cpu);
+       } else if (strcmp(state, "offline") == 0) {
+               cpu_clear(cpu, xenbus_allowed_cpumask);
+               (void)cpu_down(cpu);
+       } else {
+               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+                      state, cpu);
+       }
+}
+
+static void handle_vcpu_hotplug_event(
+       struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+       int cpu;
+       char *cpustr;
+       const char *node = vec[XS_WATCH_PATH];
+
+       if ((cpustr = strstr(node, "cpu/")) != NULL) {
+               sscanf(cpustr, "cpu/%d", &cpu);
+               vcpu_hotplug(cpu);
+       }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+                             unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       /*
+        * We do this in a callback notifier rather than __cpu_disable()
+        * because local_cpu_hotplug_request() does not work in the latter
+        * as it's always executed from within a stopmachine kthread.
+        */
+       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+               cpu_clear(cpu, local_allowed_cpumask);
+
+       return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+                             unsigned long event, void *data)
+{
+       int i;
+
+       static struct xenbus_watch cpu_watch = {
+               .node = "cpu",
+               .callback = handle_vcpu_hotplug_event,
+               .flags = XBWF_new_thread };
+       (void)register_xenbus_watch(&cpu_watch);
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+               for_each_cpu(i)
+                       vcpu_hotplug(i);
+               printk(KERN_INFO "Brought up %ld CPUs\n",
+                      (long)num_online_cpus());
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       static struct notifier_block hotplug_cpu = {
+               .notifier_call = smpboot_cpu_notify };
+       static struct notifier_block xsn_cpu = {
+               .notifier_call = setup_cpu_watcher };
+
+       register_cpu_notifier(&hotplug_cpu);
+       register_xenstore_notifier(&xsn_cpu);
+
+       return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+       int i, err;
+
+       lock_cpu_hotplug();
+
+       /*
+        * Take all other CPUs offline. We hold the hotplug mutex to
+        * avoid other processes bringing up CPUs under our feet.
+        */
+       while (num_online_cpus() > 1) {
+               unlock_cpu_hotplug();
+               for_each_online_cpu(i) {
+                       if (i == 0)
+                               continue;
+                       err = cpu_down(i);
+                       if (err) {
+                               printk(KERN_CRIT "Failed to take all CPUs "
+                                      "down: %d.\n", err);
+                               for_each_cpu(i)
+                                       vcpu_hotplug(i);
+                               return err;
+                       }
+               }
+               lock_cpu_hotplug();
+       }
+
+       return 0;
+}
+
+void smp_resume(void)
+{
+       int cpu;
+
+       for_each_cpu(cpu)
+               cpu_initialize_context(cpu);
+
+       unlock_cpu_hotplug();
+
+       for_each_cpu(cpu)
+               vcpu_hotplug(cpu);
+}
+
+int cpu_up_check(unsigned int cpu)
+{
+       int rc = 0;
+
+       if (local_cpu_hotplug_request()) {
+               cpu_set(cpu, local_allowed_cpumask);
+               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+                       printk("%s: attempt to bring up CPU %u disallowed by "
+                              "remote admin.\n", __FUNCTION__, cpu);
+                       rc = -EBUSY;
+               }
+       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+       xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h       Tue May 
30 14:30:34 2006 -0500
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples.  setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP        0x2d0           /* our map */
+#define E820MAX        128             /* number of entries in E820MAP */
+#define E820NR 0x1e8           /* # entries in E820MAP */
+
+#define E820_RAM       1
+#define E820_RESERVED  2
+#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS       4
+
+#define HIGH_MEMORY    (1024*1024)
+
+#define LOWMEMSIZE()   (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+       u64 addr;       /* start of memory segment */
+       u64 size;       /* size of memory segment */
+       u32 type;       /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+    int nr_map;
+       struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
+                                   unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size, 
+                             int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void); 
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu)    ((void)0)
+#endif
+
+int cpu_up_check(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_check(cpu)              (0)
+#define init_xenbus_allowed_cpumask()  ((void)0)
+
+static inline int smp_suspend(void)
+{
+       if (num_online_cpus() > 1) {
+               printk(KERN_WARNING "Can't suspend SMP guests "
+                      "without CONFIG_HOTPLUG_CPU\n");
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r e74246451527 -r f54d38cea8ac 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c  2006-05-02 
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c    2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+       u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
+ 
+-      if (!PCI_DMA_BUS_IS_PHYS) {
+-              addr = BLK_BOUNCE_ANY;
+-      } else if (on && drive->media == ide_disk) {
+-              if (HWIF(drive)->pci_dev)
++      if (on && drive->media == ide_disk) {
++              if (!PCI_DMA_BUS_IS_PHYS)
++                      addr = BLK_BOUNCE_ANY;
++              else if (HWIF(drive)->pci_dev)
+                       addr = HWIF(drive)->pci_dev->dma_mask;
+       }
+ 
diff -r e74246451527 -r f54d38cea8ac patches/linux-2.6.16.13/xen-hotplug.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/xen-hotplug.patch Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,11 @@
+--- ../pristine-linux-2.6.16.13/fs/proc/proc_misc.c    2006-05-02 
22:38:44.000000000 +0100
++++ ./fs/proc/proc_misc.c      2006-05-22 15:29:34.000000000 +0100
+@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p,
+               (unsigned long long)cputime64_to_clock_t(irq),
+               (unsigned long long)cputime64_to_clock_t(softirq),
+               (unsigned long long)cputime64_to_clock_t(steal));
+-      for_each_online_cpu(i) {
++      for_each_cpu(i) {
+ 
+               /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+               user = kstat_cpu(i).cpustat.user;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_csched.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c   Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: xc_csched.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_sched_credit_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct sched_credit_adjdom *sdom)
+{
+    DECLARE_DOM0_OP;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
+    op.u.adjustdom.u.credit = *sdom;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_sched_credit_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct sched_credit_adjdom *sdom)
+{
+    DECLARE_DOM0_OP;
+    int err;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
+
+    err = do_dom0_op(xc_handle, &op);
+    if ( err == 0 )
+        *sdom = op.u.adjustdom.u.credit;
+
+    return err;
+}
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_linux.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c  Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+       int fd;
+       int rc;
+       evtchn_port_t port; 
+       char str[20]; 
+
+       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
+       if (fd == -1)
+               return -1;
+
+       rc = read(fd, str, sizeof(str)); 
+       if (rc == -1)
+       {
+               int err = errno;
+               close(fd);
+               errno = err;
+               return -1;
+       }
+
+       str[rc] = '\0'; 
+       port = strtoul(str, NULL, 0); 
+
+       close(fd); 
+       return port;
+}
+
+void *xenbus_map(void)
+{
+       int fd;
+       void *addr;
+
+       fd = open(XENSTORED_PROC_KVA, O_RDWR);
+       if (fd == -1)
+               return NULL;
+
+       addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+               MAP_SHARED, fd, 0);
+
+       if (addr == MAP_FAILED)
+               addr = NULL;
+
+       close(fd);
+
+       return addr;
+}
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/python
+
+# Copyright (C) International Business Machines Corp., 2006
+# Author: Harry Butterworth <butterwo@xxxxxxxxxx>
+
+# This test initialises a ram disk in dom0 with data from /dev/urandom and
+# then imports the ram disk device as a physical device into a domU. The md5
+# checksum of the data in the ramdisk is calculated in dom0 and also
+# calculated by the domU reading the data through the blk frontend and
+# backend drivers.  The test succeeds if the checksums match indicating that
+# the domU successfully read all the correct data from the device.
+
+import re
+
+from XmTestLib import *
+from XmTestLib.block_utils import *
+
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
+
+domain = XmTestDomain()
+
+try:
+    console = domain.start()
+except DomainError, e:
+    FAIL(str(e))
+
+console.setHistorySaveCmds(value=True)
+
+traceCommand("cat /dev/urandom > /dev/ram1")
+
+s, o = traceCommand("md5sum /dev/ram1")
+
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+
+block_attach(domain, "phy:ram1", "hda1")
+
+try:
+    run = console.runCmd("md5sum /dev/hda1")
+except ConsoleError, e:
+    FAIL(str(e))
+
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+
+domain.closeConsole()
+
+domain.stop()
+
+if dom0_md5sum_match == None:
+    FAIL("Failed to get md5sum of test ram disk in dom0.")
+
+if domU_md5sum_match == None:
+    FAIL("Failed to get md5sum of test ram disk in domU.")
+
+if verbose:
+    print "md5sum dom0:"
+    print dom0_md5sum_match.group()
+    print "md5sum domU:"
+    print domU_md5sum_match.group()
+
+if dom0_md5sum_match.group() != domU_md5sum_match.group():
+    FAIL("MISCOMPARE: data read in domU did not match data provided by domO.")
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/block-integrity/Makefile.am
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/Makefile.am   Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,21 @@
+
+SUBDIRS =
+
+TESTS = 01_block_device_read_verify.test
+
+XFAIL_TESTS = 
+
+EXTRA_DIST = $(TESTS) $(XFAIL_TESTS)
+
+TESTS_ENVIRONMENT=@TENV@
+
+%.test: %.py
+       cp $< $@
+       chmod +x $@
+
+clean-local: am_config_clean-local
+
+am_config_clean-local:
+       rm -f *test
+       rm -f *log
+       rm -f *~
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/tools/sparse-merge
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/tools/sparse-merge  Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Generate a patch for each of the ia64 files in the linux-2.6-xen-sparse tree
+
+# Path to mercurial tree of upstream Linux
+# WARNING: This will do an 'hg up -C' on the upstream Linux tree, you
+#          will lose data if there's anything there you care about.
+: ${LINUXPATH:=/tmp/linux-2.6}
+# Tag of current base upstream image for Xen files
+: ${OLDTAG:=v$(awk '/^LINUX_VER/{print $NF}' buildconfigs/mk.linux-2.6-xen)}
+# Tag of new upstream base to go to
+: ${NEWTAG:=v$(wget -O- -o/dev/null http://kernel.org/kdist/finger_banner \
+    | awk '/latest stable/{print $NF}')}
+# Restrict merge to specific arch (set to . for all)
+: ${ARCH:=ia64}
+
+SPARSEDIR=linux-2.6-xen-sparse
+WD=$PWD
+
+if [ ! -d $SPARSEDIR ]; then
+       echo "Can't find $SPARSEDIR directory."
+       exit
+fi
+
+# Check for modified files in the sparse tree before starting
+if hg st $SPARSEDIR | head | grep .; then
+    echo
+    echo "$SPARSEDIR contains modifications, please clean it up first"
+    exit
+fi
+
+# We want the linux upstream tree to be at the OLDTAG to get the OLDTAG-Xen 
diff.
+# Save current revision to restore when done
+cd $LINUXPATH || exit 1
+OLDCSET=$(hg parents | awk '/^changeset:/{print($2)}' | cut -f 1 -d :)
+for t in $OLDTAG $NEWTAG; do
+    if ! hg tags | cut -f1 -d' ' | grep -Fx $t; then
+       echo "Tag $t not found, ketching up"
+       hg up -C ${t%.*} || exit 1
+       ketchup ${t#v} || exit 1
+       hg addremove
+       hg ci -m $t
+       hg tag -l $t
+    fi
+done
+hg up -C $OLDTAG || exit 1
+
+cd $WD
+for i in $(hg manifest | awk '{print($3)}' | grep $SPARSEDIR | grep "$ARCH"); 
do
+       cd $WD
+
+       FILENAME=$(basename $i)
+       DIRNAME=$(dirname $i)
+       DIFFPATH=$(echo $i | sed -e "s,^$SPARSEDIR,$LINUXPATH,")
+
+       if [ ! -d $DIRNAME ]; then
+               echo "Hmm, something bad happened parsing directory name: $i"
+               continue
+       fi
+
+       if [ ! -e $DIFFPATH ]; then
+               continue
+       fi
+
+       echo -n "$i ... "
+
+       cd $DIRNAME
+       XENDIR=$(pwd)
+
+       ORIGPATH=$(echo $i | sed -e "s/^$SPARSEDIR/./")
+       APATH=$(echo $i | sed -e "s/^$SPARSEDIR/a/")
+       BPATH=$(echo $i | sed -e "s/^$SPARSEDIR/b/")
+       cd $LINUXPATH
+       hg diff -r $OLDTAG -r $NEWTAG $ORIGPATH | \
+           sed -e "s,^--- $APATH,--- $FILENAME," \
+               -e "s,^+++ $BPATH,+++ $FILENAME," \
+           > $XENDIR/$FILENAME-$OLDTAG-$NEWTAG.diff
+       cd $XENDIR
+
+       # Do we have a diff file?  Did anything change?
+       if [ ! -s $FILENAME-$OLDTAG-$NEWTAG.diff ]; then
+               echo "SUCCESS (Upstream unchanged)"
+               continue
+       fi
+
+       if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1; then
+               # It failed, how badly?
+               if [ ! -e ${FILENAME}.rej ]; then
+                       echo "ERROR, Hmm, no .rej file, but diff failed, fix 
manually"
+                       continue
+               fi
+               TONEWREJ=$(wc -l ${FILENAME}.rej | \
+                          awk '{print($1)}')
+               hg st $FILENAME | grep -q . && hg revert $FILENAME
+               rm -f ${FILENAME}.rej ${FILENAME}.orig
+               diff -uN $DIFFPATH $FILENAME | \
+                   sed -e "s,^--- $DIFFPATH,--- $FILENAME," \
+                   > $FILENAME-$OLDTAG-Xen.diff
+
+               if [ ! -e $FILENAME-$OLDTAG-Xen.diff ]; then
+                       echo "ERROR, failed to create patch file"
+                       continue
+               fi
+
+               if ! patch -R -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; 
then
+                       echo "ERROR, reverting Xen changes failed"
+                       hg revert $FILENAME
+                       continue
+               fi
+
+               if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 
2>&1; then
+                       echo "ERROR, new upstream patch failed on reverted file"
+                       hg revert $FILENAME
+                       continue
+               fi
+
+               if ! patch -f -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; 
then
+                       if [ ! -e ${FILENAME}.rej ]; then
+                               echo "ERROR, Hmm, no .rej file, but diff 
failed, fix manually"
+                               continue
+                       fi
+                       TOXENREJ=$(wc -l ${FILENAME}.rej | \
+                                  awk '{print($1)}')
+
+                       if  [ $TOXENREJ -gt $TONEWREJ ]; then
+                               hg revert $FILENAME
+                               rm -f ${FILENAME}.rej ${FILENAME}.orig
+                               patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > 
/dev/null 2>&1
+                               echo "MANUAL MERGE REQUIRED (Upstream reject)"
+                       else
+                               echo "MANUAL MERGE REQUIRED (Xen reject)"
+                       fi
+
+               else
+                       rm -f ${FILENAME}.rej ${FILENAME}.orig
+                       echo "SUCCESS (Re-applied Xen patch)"
+               fi
+       else
+                       rm -f ${FILENAME}.rej ${FILENAME}.orig
+                       echo "SUCCESS (Upstream applied)"
+       fi
+done
+find $SPARSEDIR -name \*.diff -empty | xargs -r rm -f
+cd $LINUXPATH
+hg up -C $OLDCSET
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/efi_emul.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/xen/efi_emul.c      Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,180 @@
+/*
+ * efi_emul.c:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/compile.h>
+#include <asm/pgalloc.h>
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+#include <public/sched.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+
+// given a current domain (virtual or metaphysical) address, return the 
virtual address
+static unsigned long
+efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault)
+{
+       struct vcpu *v = current;
+       unsigned long mpaddr = domain_addr;
+       *fault = IA64_NO_FAULT;
+
+       if (v->domain->arch.efi_virt_mode) {
+               *fault = vcpu_tpa(v, domain_addr, &mpaddr);
+               if (*fault != IA64_NO_FAULT) return 0;
+       }
+
+       return ((unsigned long) __va(translate_domain_mpaddr(mpaddr)));
+}
+
+static efi_status_t
+efi_emulate_get_time(
+       unsigned long tv_addr, unsigned long tc_addr,
+       IA64FAULT *fault)
+{
+       unsigned long tv = 0, tc = 0;
+       efi_status_t status;
+
+       //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr);
+       tv = efi_translate_domain_addr(tv_addr, fault);
+       if (*fault != IA64_NO_FAULT) return 0;
+       if (tc_addr) {
+               tc = efi_translate_domain_addr(tc_addr, fault);
+               if (*fault != IA64_NO_FAULT) return 0;
+       }
+       //printf("efi_get_time(%016lx,%016lx) translated to xen virtual 
address\n", tv, tc);
+       status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc);
+       //printf("efi_get_time returns %lx\n", status);
+       return status;
+}
+
+static efi_status_t
+efi_emulate_set_virtual_address_map(
+       unsigned long memory_map_size, unsigned long descriptor_size,
+       u32 descriptor_version, efi_memory_desc_t *virtual_map)
+{
+       void *efi_map_start, *efi_map_end, *p;
+       efi_memory_desc_t entry, *md = &entry;
+       u64 efi_desc_size;
+
+       unsigned long *vfn;
+       struct domain *d = current->domain;
+       efi_runtime_services_t *efi_runtime = d->arch.efi_runtime;
+
+       if (descriptor_version != EFI_MEMDESC_VERSION) {
+               printf ("efi_emulate_set_virtual_address_map: memory descriptor 
version unmatched\n");
+               return EFI_INVALID_PARAMETER;
+       }
+
+       if (descriptor_size != sizeof(efi_memory_desc_t)) {
+               printf ("efi_emulate_set_virtual_address_map: memory descriptor 
size unmatched\n");
+               return EFI_INVALID_PARAMETER;
+       }
+
+       if (d->arch.efi_virt_mode) return EFI_UNSUPPORTED;
+
+       efi_map_start = virtual_map;
+       efi_map_end   = efi_map_start + memory_map_size;
+       efi_desc_size = sizeof(efi_memory_desc_t);
+
+       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+               if (copy_from_user(&entry, p, sizeof(efi_memory_desc_t))) {
+                       printf ("efi_emulate_set_virtual_address_map: 
copy_from_user() fault. addr=0x%p\n", p);
+                       return EFI_UNSUPPORTED;
+               }
+
+               /* skip over non-PAL_CODE memory descriptors; EFI_RUNTIME is 
included in PAL_CODE. */
+                if (md->type != EFI_PAL_CODE)
+                        continue;
+
+#define EFI_HYPERCALL_PATCH_TO_VIRT(tgt,call) \
+       do { \
+               vfn = (unsigned long *) domain_mpa_to_imva(d, tgt); \
+               *vfn++ = FW_HYPERCALL_##call##_INDEX * 16UL + md->virt_addr; \
+               *vfn++ = 0; \
+       } while (0)
+
+               EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_time,EFI_GET_TIME);
+               EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_time,EFI_SET_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_variable,EFI_GET_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_variable,EFI_SET_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+       }
+
+       /* The virtual address map has been applied. */
+       d->arch.efi_virt_mode = 1;
+
+       return EFI_SUCCESS;
+}
+
+efi_status_t
+efi_emulator (struct pt_regs *regs, IA64FAULT *fault)
+{
+       struct vcpu *v = current;
+       efi_status_t status;
+
+       *fault = IA64_NO_FAULT;
+
+       switch (regs->r2) {
+           case FW_HYPERCALL_EFI_RESET_SYSTEM:
+               printf("efi.reset_system called ");
+               if (current->domain == dom0) {
+                       printf("(by dom0)\n ");
+                       (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+               }
+               else
+                       domain_shutdown (current->domain, SHUTDOWN_reboot);
+               status = EFI_UNSUPPORTED;
+               break;
+           case FW_HYPERCALL_EFI_GET_TIME:
+               status = efi_emulate_get_time (
+                               vcpu_get_gr(v,32),
+                               vcpu_get_gr(v,33),
+                               fault);
+               break;
+           case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+               status = efi_emulate_set_virtual_address_map (
+                               vcpu_get_gr(v,32),
+                               vcpu_get_gr(v,33),
+                               (u32) vcpu_get_gr(v,34),
+                               (efi_memory_desc_t *) vcpu_get_gr(v,35));
+               break;
+           case FW_HYPERCALL_EFI_SET_TIME:
+           case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+           case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_GET_VARIABLE:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+           case FW_HYPERCALL_EFI_SET_VARIABLE:
+           case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+               // FIXME: need fixes in efi.h from 2.6.9
+               status = EFI_UNSUPPORTED;
+               break;
+           default:
+               printf("unknown ia64 fw hypercall %lx\n", regs->r2);
+               status = EFI_UNSUPPORTED;
+       }
+
+       return status;
+}
diff -r e74246451527 -r f54d38cea8ac xen/common/sched_credit.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: common/csched_credit.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK             10      /* milliseconds */
+#define CSCHED_TSLICE           30      /* milliseconds */
+#define CSCHED_ACCT_NTICKS      3
+#define CSCHED_ACCT_PERIOD      (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT   256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
+#define CSCHED_PRI_IDLE         -64     /* idle */
+#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c)     ((struct csched_pcpu 
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X)         (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X)  uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X)                                  \
+    do                                                          \
+    {                                                           \
+        printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X));  \
+    } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
+    _MACRO(vcpu_alloc)                      \
+    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_sleep)                      \
+    _MACRO(vcpu_wake_running)               \
+    _MACRO(vcpu_wake_onrunq)                \
+    _MACRO(vcpu_wake_runnable)              \
+    _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(dom_free)                        \
+    _MACRO(schedule)                        \
+    _MACRO(tickle_local_idler)              \
+    _MACRO(tickle_local_over)               \
+    _MACRO(tickle_local_under)              \
+    _MACRO(tickle_local_other)              \
+    _MACRO(acct_run)                        \
+    _MACRO(acct_no_work)                    \
+    _MACRO(acct_balance)                    \
+    _MACRO(acct_reorder)                    \
+    _MACRO(acct_min_credit)                 \
+    _MACRO(acct_vcpu_active)                \
+    _MACRO(acct_vcpu_idle)                  \
+    _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    _MACRO(vcpu_migrate)                                \
+    _MACRO(load_balance_idle)                           \
+    _MACRO(load_balance_over)                           \
+    _MACRO(load_balance_other)                          \
+    _MACRO(steal_trylock_failed)                        \
+    _MACRO(steal_peer_down)                             \
+    _MACRO(steal_peer_idle)                             \
+    _MACRO(steal_peer_running)                          \
+    _MACRO(steal_peer_pinned)                           \
+    _MACRO(tickle_idlers_none)                          \
+    _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
+    _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO)                 \
+    CSCHED_STATS_EXPAND_SCHED(_MACRO)               \
+    CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET()                                        \
+    do                                                              \
+    {                                                               \
+        memset(&csched_priv.stats, 0, sizeof(csched_priv.stats));   \
+    } while ( 0 )
+
+#define CSCHED_STATS_DEFINE()                   \
+    struct                                      \
+    {                                           \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+    } stats
+
+#define CSCHED_STATS_PRINTK()                   \
+    do                                          \
+    {                                           \
+        printk("stats:\n");                     \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+    } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET()    do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
+#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+    struct list_head runq;
+    uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+    struct list_head runq_elem;
+    struct list_head active_vcpu_elem;
+    struct csched_dom *sdom;
+    struct vcpu *vcpu;
+    atomic_t credit;
+    int credit_last;
+    uint32_t credit_incr;
+    uint32_t state_active;
+    uint32_t state_idle;
+    int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+    struct list_head active_vcpu;
+    struct list_head active_sdom_elem;
+    struct domain *dom;
+    uint16_t active_vcpu_count;
+    uint16_t weight;
+    uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+    spinlock_t lock;
+    struct list_head active_sdom;
+    uint32_t ncpus;
+    unsigned int master;
+    cpumask_t idlers;
+    uint32_t weight;
+    uint32_t credit;
+    int credit_balance;
+    uint32_t runq_sort;
+    CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+    return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+    const struct list_head * const runq = RUNQ(cpu);
+    struct list_head *iter;
+
+    BUG_ON( __vcpu_on_runq(svc) );
+    BUG_ON( cpu != svc->vcpu->processor );
+
+    list_for_each( iter, runq )
+    {
+        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+        if ( svc->pri > iter_svc->pri )
+            break;
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+    BUG_ON( !__vcpu_on_runq(svc) );
+    list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+    struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+    cpumask_t mask;
+
+    ASSERT(cur);
+    cpus_clear(mask);
+
+    /* If strictly higher priority than current VCPU, signal the CPU */
+    if ( new->pri > cur->pri )
+    {
+        if ( cur->pri == CSCHED_PRI_IDLE )
+            CSCHED_STAT_CRANK(tickle_local_idler);
+        else if ( cur->pri == CSCHED_PRI_TS_OVER )
+            CSCHED_STAT_CRANK(tickle_local_over);
+        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+            CSCHED_STAT_CRANK(tickle_local_under);
+        else
+            CSCHED_STAT_CRANK(tickle_local_other);
+
+        cpu_set(cpu, mask);
+    }
+
+    /*
+     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+     * let them know there is runnable work in the system...
+     */
+    if ( cur->pri > CSCHED_PRI_IDLE )
+    {
+        if ( cpus_empty(csched_priv.idlers) )
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_none);
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_some);
+            cpus_or(mask, mask, csched_priv.idlers);
+        }
+    }
+
+    /* Send scheduler interrupts to designated CPUs */
+    if ( !cpus_empty(mask) )
+        cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+    struct csched_pcpu *spc;
+    unsigned long flags;
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Initialize/update system-wide config */
+    csched_priv.credit += CSCHED_ACCT_PERIOD;
+    if ( csched_priv.ncpus <= cpu )
+        csched_priv.ncpus = cpu + 1;
+    if ( csched_priv.master >= csched_priv.ncpus )
+        csched_priv.master = cpu;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct csched_pcpu);
+    BUG_ON( spc == NULL );
+    INIT_LIST_HEAD(&spc->runq);
+    spc->runq_sort_last = csched_priv.runq_sort;
+    schedule_data[cpu].sched_priv = spc;
+
+    /* Start off idling... */
+    BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+    cpu_set(cpu, csched_priv.idlers);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( svc->vcpu != vc );
+    BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+    if ( sdom )
+    {
+        BUG_ON( is_idle_vcpu(vc) );
+        BUG_ON( sdom->dom != vc->domain );
+    }
+    else
+    {
+        BUG_ON( !is_idle_vcpu(vc) );
+    }
+
+    CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc)  (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+    /*
+     * Don't pick up work that's in the peer's scheduling tail. Also only pick
+     * up work that's allowed to run on our CPU.
+     */
+    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_running);
+        return 0;
+    }
+
+    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_pinned);
+        return 0;
+    }
+
+    return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    /* Update credits */
+    atomic_sub(credit_dec, &svc->credit);
+
+    /* Put this VCPU and domain back on the active list if it was idling */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( list_empty(&svc->active_vcpu_elem) )
+        {
+            CSCHED_STAT_CRANK(acct_vcpu_active);
+            svc->state_active++;
+
+            sdom->active_vcpu_count++;
+            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+            if ( list_empty(&sdom->active_sdom_elem) )
+            {
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+                csched_priv.weight += sdom->weight;
+            }
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+    svc->state_idle++;
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+
+    atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+    struct domain * const dom = vc->domain;
+    struct csched_dom *sdom;
+    struct csched_vcpu *svc;
+    int16_t pri;
+
+    CSCHED_STAT_CRANK(vcpu_alloc);
+
+    /* Allocate, if appropriate, per-domain info */
+    if ( is_idle_vcpu(vc) )
+    {
+        sdom = NULL;
+        pri = CSCHED_PRI_IDLE;
+    }
+    else if ( CSCHED_DOM(dom) )
+    {
+        sdom = CSCHED_DOM(dom);
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+    else 
+    {
+        sdom = xmalloc(struct csched_dom);
+        if ( !sdom )
+            return -1;
+
+        /* Initialize credit and weight */
+        INIT_LIST_HEAD(&sdom->active_vcpu);
+        sdom->active_vcpu_count = 0;
+        INIT_LIST_HEAD(&sdom->active_sdom_elem);
+        sdom->dom = dom;
+        sdom->weight = CSCHED_DEFAULT_WEIGHT;
+        sdom->cap = 0U;
+        dom->sched_priv = sdom;
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct csched_vcpu);
+    if ( !svc )
+        return -1;
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    atomic_set(&svc->credit, 0);
+    svc->credit_last = 0;
+    svc->credit_incr = 0U;
+    svc->state_active = 0U;
+    svc->state_idle = 0U;
+    svc->pri = pri;
+    vc->sched_priv = svc;
+
+    CSCHED_VCPU_CHECK(vc);
+
+    /* Attach fair-share VCPUs to the accounting list */
+    if ( likely(sdom != NULL) )
+        csched_vcpu_acct(svc, 0);
+
+    return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc) 
+{
+    CSCHED_STAT_CRANK(vcpu_add);
+
+    /* Allocate per-PCPU info */
+    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+        csched_pcpu_init(vc->processor);
+
+    CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    if ( !list_empty(&svc->active_vcpu_elem) )
+        __csched_vcpu_acct_idle_locked(svc);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+    CSCHED_STAT_CRANK(vcpu_sleep);
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( schedule_data[vc->processor].curr == vc )
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    else if ( __vcpu_on_runq(svc) )
+        __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( unlikely(schedule_data[cpu].curr == vc) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_running);
+        return;
+    }
+    if ( unlikely(__vcpu_on_runq(svc)) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+        return;
+    }
+
+    if ( likely(vcpu_runnable(vc)) )
+        CSCHED_STAT_CRANK(vcpu_wake_runnable);
+    else
+        CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+    /* Put the VCPU on the runq and tickle CPUs */
+    __runq_insert(cpu, svc);
+    __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+    unsigned long flags;
+    int lcpu;
+
+    if ( vc == current )
+    {
+        /* No locking needed but also can't move on the spot... */
+        if ( !cpu_isset(vc->processor, *affinity) )
+            return -EBUSY;
+
+        vc->cpu_affinity = *affinity;
+    }
+    else
+    {
+        /* Pause, modify, and unpause. */
+        vcpu_pause(vc);
+
+        vc->cpu_affinity = *affinity;
+        if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+        {
+            /*
+             * We must grab the scheduler lock for the CPU currently owning
+             * this VCPU before changing its ownership.
+             */
+            vcpu_schedule_lock_irqsave(vc, flags);
+            lcpu = vc->processor;
+
+            vc->processor = first_cpu(vc->cpu_affinity);
+
+            spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+        }
+
+        vcpu_unpause(vc);
+    }
+
+    return 0;
+}
+
+static int
+csched_dom_cntl(
+    struct domain *d,
+    struct sched_adjdom_cmd *cmd)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(d);
+    unsigned long flags;
+
+    if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.credit.weight = sdom->weight;
+        cmd->u.credit.cap = sdom->cap;
+    }
+    else
+    {
+        ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( cmd->u.credit.weight != 0 )
+        {
+            csched_priv.weight -= sdom->weight;
+            sdom->weight = cmd->u.credit.weight;
+            csched_priv.weight += sdom->weight;
+        }
+
+        if ( cmd->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = cmd->u.credit.cap;
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(dom);
+    int i;
+
+    CSCHED_STAT_CRANK(dom_free);
+
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( dom->vcpu[i] )
+            csched_vcpu_free(dom->vcpu[i]);
+    }
+
+    xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+    struct list_head *runq, *elem, *next, *last_under;
+    struct csched_vcpu *svc_elem;
+    unsigned long flags;
+    int sort_epoch;
+
+    sort_epoch = csched_priv.runq_sort;
+    if ( sort_epoch == spc->runq_sort_last )
+        return;
+
+    spc->runq_sort_last = sort_epoch;
+
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+    runq = &spc->runq;
+    elem = runq->next;
+    last_under = runq;
+
+    while ( elem != runq )
+    {
+        next = elem->next;
+        svc_elem = __runq_elem(elem);
+
+        if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_under )
+            {
+                list_del(elem);
+                list_add(elem, last_under);
+            }
+            last_under = elem;
+        }
+
+        elem = next;
+    }
+
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+    unsigned long flags;
+    struct list_head *iter_vcpu, *next_vcpu;
+    struct list_head *iter_sdom, *next_sdom;
+    struct csched_vcpu *svc;
+    struct csched_dom *sdom;
+    uint32_t credit_total;
+    uint32_t weight_total;
+    uint32_t weight_left;
+    uint32_t credit_fair;
+    uint32_t credit_peak;
+    int credit_balance;
+    int credit_xtra;
+    int credit;
+
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    weight_total = csched_priv.weight;
+    credit_total = csched_priv.credit;
+
+    /* Converge balance towards 0 when it drops negative */
+    if ( csched_priv.credit_balance < 0 )
+    {
+        credit_total -= csched_priv.credit_balance;
+        CSCHED_STAT_CRANK(acct_balance);
+    }
+
+    if ( unlikely(weight_total == 0) )
+    {
+        csched_priv.credit_balance = 0;
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+        CSCHED_STAT_CRANK(acct_no_work);
+        return;
+    }
+
+    CSCHED_STAT_CRANK(acct_run);
+
+    weight_left = weight_total;
+    credit_balance = 0;
+    credit_xtra = 0;
+
+    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+    {
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        BUG_ON( is_idle_domain(sdom->dom) );
+        BUG_ON( sdom->active_vcpu_count == 0 );
+        BUG_ON( sdom->weight == 0 );
+        BUG_ON( sdom->weight > weight_left );
+
+        weight_left -= sdom->weight;
+
+        /*
+         * A domain's fair share is computed using its weight in competition
+         * with that of all other active domains.
+         *
+         * At most, a domain can use credits to run all its active VCPUs
+         * for one full accounting period. We allow a domain to earn more
+         * only when the system-wide credit balance is negative.
+         */
+        credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+        if ( csched_priv.credit_balance < 0 )
+        {
+            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+                             (weight_total - 1)
+                           ) / weight_total;
+        }
+        if ( sdom->cap != 0U )
+        {
+            uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 
100;
+            if ( credit_cap < credit_peak )
+                credit_peak = credit_cap;
+        }
+
+        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+                      ) / weight_total;
+
+        if ( credit_fair < credit_peak )
+        {
+            credit_xtra = 1;
+        }
+        else
+        {
+            if ( weight_left != 0U )
+            {
+                /* Give other domains a chance at unused credits */
+                credit_total += ( ( ( credit_fair - credit_peak
+                                    ) * weight_total
+                                  ) + ( weight_left - 1 )
+                                ) / weight_left;
+            }
+
+            if ( credit_xtra )
+            {
+                /*
+                 * Lazily keep domains with extra credits at the head of
+                 * the queue to give others a chance at them in future
+                 * accounting periods.
+                 */
+                CSCHED_STAT_CRANK(acct_reorder);
+                list_del(&sdom->active_sdom_elem);
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            }
+
+            credit_fair = credit_peak;
+        }
+
+        /* Compute fair share per VCPU */
+        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+                      ) / sdom->active_vcpu_count;
+
+
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            BUG_ON( sdom != svc->sdom );
+
+            /* Increment credit */
+            atomic_add(credit_fair, &svc->credit);
+            credit = atomic_read(&svc->credit);
+
+            /*
+             * Recompute priority or, if VCPU is idling, remove it from
+             * the active list.
+             */
+            if ( credit < 0 )
+            {
+                if ( sdom->cap == 0U )
+                    svc->pri = CSCHED_PRI_TS_OVER;
+                else
+                    svc->pri = CSCHED_PRI_TS_PARKED;
+
+                if ( credit < -CSCHED_TSLICE )
+                {
+                    CSCHED_STAT_CRANK(acct_min_credit);
+                    credit = -CSCHED_TSLICE;
+                    atomic_set(&svc->credit, credit);
+                }
+            }
+            else
+            {
+                svc->pri = CSCHED_PRI_TS_UNDER;
+
+                if ( credit > CSCHED_TSLICE )
+                    __csched_vcpu_acct_idle_locked(svc);
+            }
+
+            svc->credit_last = credit;
+            svc->credit_incr = credit_fair;
+            credit_balance += credit;
+        }
+    }
+
+    csched_priv.credit_balance = credit_balance;
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    /* Inform each CPU that its runq needs to be sorted */
+    csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+    struct csched_dom * const sdom = svc->sdom;
+
+    /*
+     * Accounting for running VCPU
+     *
+     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+     */
+    if ( likely(sdom != NULL) )
+    {
+        csched_vcpu_acct(svc, CSCHED_TICK);
+    }
+
+    /*
+     * Accounting duty
+     *
+     * Note: Currently, this is always done by the master boot CPU. Eventually,
+     * we could distribute or at the very least cycle the duty.
+     */
+    if ( (csched_priv.master == cpu) &&
+         (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+    {
+        csched_acct();
+    }
+
+    /*
+     * Check if runq needs to be sorted
+     *
+     * Every physical CPU resorts the runq after the accounting master has
+     * modified priorities. This is a special O(n) sort and runs at most
+     * once per accounting period (currently 30 milliseconds).
+     */
+    csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+    struct list_head *iter;
+    struct csched_vcpu *speer;
+    struct vcpu *vc;
+
+    list_for_each( iter, &spc->runq )
+    {
+        speer = __runq_elem(iter);
+
+        /*
+         * If next available VCPU here is not of higher priority than ours,
+         * this PCPU is useless to us.
+         */
+        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+        {
+            CSCHED_STAT_CRANK(steal_peer_idle);
+            break;
+        }
+
+        /* Is this VCPU is runnable on our PCPU? */
+        vc = speer->vcpu;
+        BUG_ON( is_idle_vcpu(vc) );
+
+        if ( __csched_vcpu_is_stealable(cpu, vc) )
+        {
+            /* We got a candidate. Grab it! */
+            __runq_remove(speer);
+            vc->processor = cpu;
+
+            return speer;
+        }
+    }
+
+    return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+    struct csched_pcpu *spc;
+    struct csched_vcpu *speer;
+    int peer_cpu;
+
+    if ( snext->pri == CSCHED_PRI_IDLE )
+        CSCHED_STAT_CRANK(load_balance_idle);
+    else if ( snext->pri == CSCHED_PRI_TS_OVER )
+        CSCHED_STAT_CRANK(load_balance_over);
+    else
+        CSCHED_STAT_CRANK(load_balance_other);
+
+    peer_cpu = cpu;
+    BUG_ON( peer_cpu != snext->vcpu->processor );
+
+    while ( 1 )
+    {
+        /* For each PCPU in the system starting with our neighbour... */
+        peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+        if ( peer_cpu == cpu )
+            break;
+
+        BUG_ON( peer_cpu >= csched_priv.ncpus );
+        BUG_ON( peer_cpu == cpu );
+
+        /*
+         * Get ahold of the scheduler lock for this peer CPU.
+         *
+         * Note: We don't spin on this lock but simply try it. Spinning could
+         * cause a deadlock if the peer CPU is also load balancing and trying
+         * to lock this CPU.
+         */
+        if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+        {
+
+            spc = CSCHED_PCPU(peer_cpu);
+            if ( unlikely(spc == NULL) )
+            {
+                CSCHED_STAT_CRANK(steal_peer_down);
+                speer = NULL;
+            }
+            else
+            {
+                speer = csched_runq_steal(spc, cpu, snext->pri);
+            }
+
+            spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+            /* Got one! */
+            if ( speer )
+            {
+                CSCHED_STAT_CRANK(vcpu_migrate);
+                return speer;
+            }
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(steal_trylock_failed);
+        }
+    }
+
+
+    /* Failed to find more important work */
+    __runq_remove(snext);
+    return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+    const int cpu = smp_processor_id();
+    struct list_head * const runq = RUNQ(cpu);
+    struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+    struct csched_vcpu *snext;
+    struct task_slice ret;
+
+    CSCHED_STAT_CRANK(schedule);
+    CSCHED_VCPU_CHECK(current);
+
+    /*
+     * Select next runnable local VCPU (ie top of local runq)
+     */
+    if ( vcpu_runnable(current) )
+        __runq_insert(cpu, scurr);
+    else
+        BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+    snext = __runq_elem(runq->next);
+
+    /*
+     * SMP Load balance:
+     *
+     * If the next highest priority local runnable VCPU has already eaten
+     * through its credits, look on other PCPUs to see if we have more
+     * urgent work... If not, csched_load_balance() will return snext, but
+     * already removed from the runq.
+     */
+    if ( snext->pri > CSCHED_PRI_TS_OVER )
+        __runq_remove(snext);
+    else
+        snext = csched_load_balance(cpu, snext);
+
+    /*
+     * Update idlers mask if necessary. When we're idling, other CPUs
+     * will tickle us when they get extra work.
+     */
+    if ( snext->pri == CSCHED_PRI_IDLE )
+    {
+        if ( !cpu_isset(cpu, csched_priv.idlers) )
+            cpu_set(cpu, csched_priv.idlers);
+    }
+    else if ( cpu_isset(cpu, csched_priv.idlers) )
+    {
+        cpu_clear(cpu, csched_priv.idlers);
+    }
+
+    /*
+     * Return task to run next...
+     */
+    ret.time = MILLISECS(CSCHED_TSLICE);
+    ret.task = snext->vcpu;
+
+    CSCHED_VCPU_CHECK(ret.task);
+    BUG_ON( !vcpu_runnable(ret.task) );
+
+    return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    printk("[%i.%i] pri=%i cpu=%i",
+            svc->vcpu->domain->domain_id,
+            svc->vcpu->vcpu_id,
+            svc->pri,
+            svc->vcpu->processor);
+
+    if ( sdom )
+    {
+        printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+            atomic_read(&svc->credit),
+            svc->credit_last,
+            svc->credit_incr,
+            svc->state_active,
+            svc->state_idle,
+            sdom->weight);
+    }
+
+    printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+    struct list_head *runq, *iter;
+    struct csched_pcpu *spc;
+    struct csched_vcpu *svc;
+    int loop;
+
+    spc = CSCHED_PCPU(cpu);
+    runq = &spc->runq;
+
+    printk(" tick=%lu, sort=%d\n",
+            schedule_data[cpu].tick,
+            spc->runq_sort_last);
+
+    /* current VCPU */
+    svc = CSCHED_VCPU(schedule_data[cpu].curr);
+    if ( svc )
+    {
+        printk("\trun: ");
+        csched_dump_vcpu(svc);
+    }
+
+    loop = 0;
+    list_for_each( iter, runq )
+    {
+        svc = __runq_elem(iter);
+        if ( svc )
+        {
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_dump(void)
+{
+    struct list_head *iter_sdom, *iter_svc;
+    int loop;
+
+    printk("info:\n"
+           "\tncpus              = %u\n"
+           "\tmaster             = %u\n"
+           "\tcredit             = %u\n"
+           "\tcredit balance     = %d\n"
+           "\tweight             = %u\n"
+           "\trunq_sort          = %u\n"
+           "\ttick               = %dms\n"
+           "\ttslice             = %dms\n"
+           "\taccounting period  = %dms\n"
+           "\tdefault-weight     = %d\n",
+           csched_priv.ncpus,
+           csched_priv.master,
+           csched_priv.credit,
+           csched_priv.credit_balance,
+           csched_priv.weight,
+           csched_priv.runq_sort,
+           CSCHED_TICK,
+           CSCHED_TSLICE,
+           CSCHED_ACCT_PERIOD,
+           CSCHED_DEFAULT_WEIGHT);
+
+    printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+    CSCHED_STATS_PRINTK();
+
+    printk("active vcpus:\n");
+    loop = 0;
+    list_for_each( iter_sdom, &csched_priv.active_sdom )
+    {
+        struct csched_dom *sdom;
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        list_for_each( iter_svc, &sdom->active_vcpu )
+        {
+            struct csched_vcpu *svc;
+            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_init(void)
+{
+    spin_lock_init(&csched_priv.lock);
+    INIT_LIST_HEAD(&csched_priv.active_sdom);
+    csched_priv.ncpus = 0;
+    csched_priv.master = UINT_MAX;
+    cpus_clear(csched_priv.idlers);
+    csched_priv.weight = 0U;
+    csched_priv.credit = 0U;
+    csched_priv.credit_balance = 0;
+    csched_priv.runq_sort = 0U;
+    CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+    .name           = "SMP Credit Scheduler",
+    .opt_name       = "credit",
+    .sched_id       = SCHED_CREDIT,
+
+    .alloc_task     = csched_vcpu_alloc,
+    .add_task       = csched_vcpu_add,
+    .sleep          = csched_vcpu_sleep,
+    .wake           = csched_vcpu_wake,
+    .set_affinity   = csched_vcpu_set_affinity,
+
+    .adjdom         = csched_dom_cntl,
+    .free_task      = csched_dom_free,
+
+    .tick           = csched_tick,
+    .do_schedule    = csched_schedule,
+
+    .dump_cpu_state = csched_dump_pcpu,
+    .dump_settings  = csched_dump,
+    .init           = csched_init,
+};
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/tlbflush.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-ia64/tlbflush.h   Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,37 @@
+#ifndef __FLUSHTLB_H__
+#define __FLUSHTLB_H__
+
+#include <xen/sched.h>
+
+/* TLB flushes can be either local (current vcpu only) or domain wide (on
+   all vcpus).
+   TLB flushes can be either all-flush or range only.
+
+   vTLB flushing means flushing VCPU virtual TLB + machine TLB + machine VHPT.
+*/
+
+/* Local all flush of vTLB.  */
+void vcpu_flush_vtlb_all (void);
+
+/* Local range flush of machine TLB only (not full VCPU virtual TLB!!!)  */
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range);
+
+/* Global all flush of vTLB  */
+void domain_flush_vtlb_all (void);
+
+/* Global range-flush of vTLB.  */
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
+
+/* Final vTLB flush on every dirty cpus.  */
+void domain_flush_destroy (struct domain *d);
+
+/* Flush v-tlb on cpus set in mask for current domain.  */
+void flush_tlb_mask(cpumask_t mask);
+
+/* Flush local machine TLB.  */
+void local_flush_tlb_all (void);
+
+#define tlbflush_current_time() 0
+#define tlbflush_filter(x,y) ((void)0)
+
+#endif
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c    Tue May 30 12:52:02 
2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-/*
- * Architecture-specific kernel symbols
- *
- * Don't put any exports here unless it's defined in an assembler file.
- * All other exports should be put directly after the definition.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-extern int is_running_on_xen(void);
-EXPORT_SYMBOL(is_running_on_xen);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c        Tue May 30 
12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
-       char *s;
-       int i;
-       char *e;
-       char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
-       if (IS_ERR(macstr))
-               return PTR_ERR(macstr);
-       s = macstr;
-       for (i = 0; i < ETH_ALEN; i++) {
-               mac[i] = simple_strtoul(s, &e, 16);
-               if (s == e || (e[0] != ':' && e[0] != 0)) {
-                       kfree(macstr);
-                       return -ENOENT;
-               }
-               s = &e[1];
-       }
-       kfree(macstr);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h        Tue May 30 
12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array.  mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h   Tue May 30 12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/* 
-    Copyright (C) 2005 XenSource Ltd
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/flushtlb.h
--- a/xen/include/asm-ia64/flushtlb.h   Tue May 30 12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#ifndef __FLUSHTLB_H__
-#define __FLUSHTLB_H__
-
-#include <asm/tlbflush.h>
-
-#define tlbflush_current_time() 0
-#define tlbflush_filter(x,y) ((void)0)
-
-#endif
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/tlbflush.h
--- a/xen/include/asm-ia64/linux-xen/asm/tlbflush.h     Tue May 30 12:52:02 
2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-#ifndef _ASM_IA64_TLBFLUSH_H
-#define _ASM_IA64_TLBFLUSH_H
-
-/*
- * Copyright (C) 2002 Hewlett-Packard Co
- *     David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <linux/mm.h>
-
-#include <asm/intrinsics.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-
-/*
- * Now for some TLB flushing routines.  This is the kind of stuff that
- * can be very expensive, so try to avoid them whenever possible.
- */
-
-/*
- * Flush everything (kernel mapping may also have changed due to
- * vmalloc/vfree).
- */
-extern void local_flush_tlb_all (void);
-
-#ifdef CONFIG_SMP
-  extern void smp_flush_tlb_all (void);
-  extern void smp_flush_tlb_mm (struct mm_struct *mm);
-# define flush_tlb_all()       smp_flush_tlb_all()
-#else
-# define flush_tlb_all()       local_flush_tlb_all()
-#endif
-
-#ifndef XEN
-static inline void
-local_finish_flush_tlb_mm (struct mm_struct *mm)
-{
-#ifndef XEN
-// FIXME SMP?
-       if (mm == current->active_mm)
-               activate_context(mm);
-#endif
-}
-
-/*
- * Flush a specified user mapping.  This is called, e.g., as a result of 
fork() and
- * exit().  fork() ends up here because the copy-on-write mechanism needs to 
write-protect
- * the PTEs of the parent task.
- */
-static inline void
-flush_tlb_mm (struct mm_struct *mm)
-{
-       if (!mm)
-               return;
-
-#ifndef XEN
-// FIXME SMP?
-       mm->context = 0;
-#endif
-
-       if (atomic_read(&mm->mm_users) == 0)
-               return;         /* happens as a result of exit_mmap() */
-
-#ifdef CONFIG_SMP
-       smp_flush_tlb_mm(mm);
-#else
-       local_finish_flush_tlb_mm(mm);
-#endif
-}
-
-extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, 
unsigned long end);
-
-/*
- * Page-granular tlb flush.
- */
-static inline void
-flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
-{
-#ifdef CONFIG_SMP
-       flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + 
PAGE_SIZE);
-#else
-#ifdef XEN
-       if (vma->vm_mm == current->domain->arch.mm)
-#else
-       if (vma->vm_mm == current->active_mm)
-#endif
-               ia64_ptcl(addr, (PAGE_SHIFT << 2));
-#ifndef XEN
-// FIXME SMP?
-       else
-               vma->vm_mm->context = 0;
-#endif
-#endif
-}
-
-/*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long 
end)
-{
-       /*
-        * Deprecated.  The virtual page table is now flushed via the normal 
gather/flush
-        * interface (see tlb.h).
-        */
-}
-
-
-#define flush_tlb_kernel_range(start, end)     flush_tlb_all() /* XXX fix me */
-#endif /* XEN */
-
-#ifdef XEN
-extern void flush_tlb_mask(cpumask_t mask);
-#endif
-
-#endif /* _ASM_IA64_TLBFLUSH_H */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.