[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [PATCH] [ppc] merge with upstream
235 files changed, 7718 insertions(+), 4608 deletions(-) .hgignore | 2 buildconfigs/linux-defconfig_xen0_ia64 | 38 buildconfigs/linux-defconfig_xenU_ia64 | 30 buildconfigs/linux-defconfig_xen_ia64 | 38 docs/src/interface.tex | 29 docs/src/user.tex | 32 extras/mini-os/Makefile | 15 extras/mini-os/include/mm.h | 79 extras/mini-os/include/types.h | 7 extras/mini-os/kernel.c | 11 extras/mini-os/lib/printf.c | 4 extras/mini-os/lib/string.c | 4 extras/mini-os/mm.c | 71 extras/mini-os/traps.c | 21 extras/mini-os/x86_32.S | 8 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 266 +- linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c | 1 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 13 linux-2.6-xen-sparse/arch/ia64/Kconfig | 54 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 3 linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre | 14 linux-2.6-xen-sparse/arch/ia64/xen/Makefile | 6 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile | 24 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile | 12 linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c | 17 linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c | 9 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S | 4 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 550 +++- linux-2.6-xen-sparse/arch/ia64/xen/util.c | 115 linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c | 12 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 198 - linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 586 ++-- linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h | 2 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S | 21 linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 257 -- linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 73 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 26 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 2 linux-2.6-xen-sparse/drivers/xen/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 12 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 4 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 2 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 6 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 2 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 2 linux-2.6-xen-sparse/drivers/xen/console/console.c | 32 linux-2.6-xen-sparse/drivers/xen/core/Makefile | 11 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 185 + linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 31 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 5 linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c | 3 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 9 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 215 - linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c | 3 linux-2.6-xen-sparse/drivers/xen/net_driver_util.c | 58 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 7 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 31 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 91 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c | 2 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c | 4 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 26 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 6 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 8 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 12 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h | 2 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 28 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 84 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 34 linux-2.6-xen-sparse/include/asm-ia64/page.h | 50 linux-2.6-xen-sparse/include/asm-ia64/privop.h | 11 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 58 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h | 63 linux-2.6-xen-sparse/include/xen/cpu_hotplug.h | 42 linux-2.6-xen-sparse/include/xen/net_driver_util.h | 48 linux-2.6-xen-sparse/include/xen/xenbus.h | 8 patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch | 18 patches/linux-2.6.16.13/xen-hotplug.patch | 11 tools/examples/network-bridge | 2 tools/examples/xmexample.vti | 2 tools/libxc/Makefile | 1 tools/libxc/xc_acm.c | 33 tools/libxc/xc_csched.c | 50 tools/libxc/xc_ia64_stubs.c | 6 tools/libxc/xc_linux_build.c | 57 tools/libxc/xc_linux_restore.c | 122 tools/libxc/xc_load_elf.c | 54 tools/libxc/xc_private.c | 22 tools/libxc/xc_ptrace.c | 173 - tools/libxc/xc_ptrace.h | 3 tools/libxc/xc_ptrace_core.c | 7 tools/libxc/xc_tbuf.c | 56 tools/libxc/xenctrl.h | 13 tools/libxc/xg_private.h | 10 tools/python/xen/lowlevel/acm/acm.c | 54 tools/python/xen/lowlevel/xc/xc.c | 68 tools/python/xen/lowlevel/xs/xs.c | 11 tools/python/xen/xend/XendDomain.py | 22 tools/python/xen/xend/XendDomainInfo.py | 24 tools/python/xen/xend/balloon.py | 11 tools/python/xen/xend/image.py | 27 tools/python/xen/xend/server/SrvDomain.py | 14 tools/python/xen/xend/xenstore/xstransact.py | 28 tools/python/xen/xm/main.py | 45 tools/security/secpol_tool.c | 32 tools/tests/test_x86_emulator.c | 67 tools/xenstore/Makefile | 8 tools/xenstore/xenstored_core.c | 7 tools/xenstore/xenstored_core.h | 8 tools/xenstore/xenstored_domain.c | 37 tools/xenstore/xenstored_linux.c | 69 tools/xenstore/xenstored_proc.h | 27 tools/xentrace/xentrace_format | 6 tools/xm-test/configure.ac | 1 tools/xm-test/ramdisk/bin/create_disk_image | 7 tools/xm-test/tests/Makefile.am | 7 tools/xm-test/tests/block-integrity/01_block_device_read_verify.py | 62 tools/xm-test/tests/block-integrity/Makefile.am | 21 tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py | 32 tools/xm-test/tests/network/03_network_local_tcp_pos.py | 4 tools/xm-test/tests/network/04_network_local_udp_pos.py | 4 tools/xm-test/tests/network/06_network_dom0_tcp_pos.py | 2 tools/xm-test/tests/network/07_network_dom0_udp_pos.py | 2 tools/xm-test/tests/network/12_network_domU_tcp_pos.py | 2 tools/xm-test/tests/network/13_network_domU_udp_pos.py | 2 xen/arch/ia64/Rules.mk | 28 xen/arch/ia64/asm-offsets.c | 2 xen/arch/ia64/linux-xen/setup.c | 10 xen/arch/ia64/linux-xen/smp.c | 32 xen/arch/ia64/linux-xen/unaligned.c | 2 xen/arch/ia64/tools/sparse-merge | 144 + xen/arch/ia64/vmx/pal_emul.c | 6 xen/arch/ia64/vmx/vlsapic.c | 7 xen/arch/ia64/vmx/vmmu.c | 22 xen/arch/ia64/vmx/vmx_entry.S | 69 xen/arch/ia64/vmx/vmx_init.c | 24 xen/arch/ia64/vmx/vmx_interrupt.c | 6 xen/arch/ia64/vmx/vmx_ivt.S | 127 - xen/arch/ia64/vmx/vmx_phy_mode.c | 9 xen/arch/ia64/vmx/vmx_process.c | 2 xen/arch/ia64/vmx/vmx_support.c | 8 xen/arch/ia64/vmx/vmx_vcpu.c | 4 xen/arch/ia64/vmx/vmx_virt.c | 23 xen/arch/ia64/vmx/vtlb.c | 86 xen/arch/ia64/xen/Makefile | 1 xen/arch/ia64/xen/dom0_ops.c | 12 xen/arch/ia64/xen/dom_fw.c | 51 xen/arch/ia64/xen/domain.c | 687 ++++- xen/arch/ia64/xen/efi_emul.c | 180 + xen/arch/ia64/xen/hypercall.c | 98 xen/arch/ia64/xen/hyperprivop.S | 138 - xen/arch/ia64/xen/ivt.S | 49 xen/arch/ia64/xen/privop.c | 27 xen/arch/ia64/xen/process.c | 301 -- xen/arch/ia64/xen/regionreg.c | 10 xen/arch/ia64/xen/vcpu.c | 68 xen/arch/ia64/xen/vhpt.c | 214 + xen/arch/ia64/xen/xenasm.S | 349 -- xen/arch/ia64/xen/xenmisc.c | 3 xen/arch/ia64/xen/xensetup.c | 3 xen/arch/x86/dom0_ops.c | 21 xen/arch/x86/domain.c | 2 xen/arch/x86/domain_build.c | 5 xen/arch/x86/hvm/hvm.c | 22 xen/arch/x86/hvm/i8254.c | 405 +-- xen/arch/x86/hvm/intercept.c | 82 xen/arch/x86/hvm/svm/intr.c | 47 xen/arch/x86/hvm/svm/svm.c | 48 xen/arch/x86/hvm/svm/vmcb.c | 31 xen/arch/x86/hvm/vmx/io.c | 62 xen/arch/x86/hvm/vmx/vmx.c | 37 xen/arch/x86/mm.c | 204 + xen/arch/x86/shadow.c | 6 xen/arch/x86/shadow32.c | 8 xen/arch/x86/shadow_public.c | 5 xen/arch/x86/traps.c | 4 xen/arch/x86/x86_emulate.c | 81 xen/common/Makefile | 1 xen/common/acm_ops.c | 282 +- xen/common/elf.c | 49 xen/common/grant_table.c | 15 xen/common/kernel.c | 5 xen/common/sched_credit.c | 1233 ++++++++++ xen/common/schedule.c | 5 xen/common/trace.c | 6 xen/include/asm-ia64/config.h | 11 xen/include/asm-ia64/dom_fw.h | 14 xen/include/asm-ia64/domain.h | 26 xen/include/asm-ia64/event.h | 2 xen/include/asm-ia64/flushtlb.h | 9 xen/include/asm-ia64/grant_table.h | 33 xen/include/asm-ia64/linux-xen/asm/pgalloc.h | 2 xen/include/asm-ia64/linux-xen/asm/pgtable.h | 14 xen/include/asm-ia64/linux-xen/asm/tlbflush.h | 119 xen/include/asm-ia64/mm.h | 10 xen/include/asm-ia64/shadow.h | 57 xen/include/asm-ia64/tlbflush.h | 37 xen/include/asm-ia64/vcpu.h | 8 xen/include/asm-ia64/vhpt.h | 18 xen/include/asm-ia64/vmx_vcpu.h | 2 xen/include/asm-x86/domain.h | 21 xen/include/asm-x86/fixmap.h | 10 xen/include/asm-x86/hvm/domain.h | 6 xen/include/asm-x86/hvm/svm/intr.h | 1 xen/include/asm-x86/hvm/svm/svm.h | 1 xen/include/asm-x86/hvm/vcpu.h | 3 xen/include/asm-x86/hvm/vmx/vmx.h | 1 xen/include/asm-x86/hvm/vpit.h | 67 xen/include/asm-x86/string.h | 162 - xen/include/asm-x86/x86_emulate.h | 66 xen/include/public/acm_ops.h | 54 xen/include/public/arch-ia64.h | 119 xen/include/public/arch-x86_32.h | 36 xen/include/public/arch-x86_64.h | 29 xen/include/public/callback.h | 10 xen/include/public/dom0_ops.h | 205 - xen/include/public/event_channel.h | 99 xen/include/public/grant_table.h | 30 xen/include/public/hvm/ioreq.h | 26 xen/include/public/hvm/vmx_assist.h | 5 xen/include/public/io/blkif.h | 12 xen/include/public/io/netif.h | 32 xen/include/public/io/tpmif.h | 19 xen/include/public/io/xenbus.h | 59 xen/include/public/memory.h | 48 xen/include/public/nmi.h | 5 xen/include/public/physdev.h | 45 xen/include/public/sched.h | 15 xen/include/public/sched_ctl.h | 5 xen/include/public/vcpu.h | 10 xen/include/public/version.h | 15 xen/include/public/xen.h | 47 xen/include/public/xenoprof.h | 15 xen/include/xen/hypercall.h | 2 xen/include/xen/sched-if.h | 2 xen/include/xen/softirq.h | 13 # HG changeset patch # User Hollis Blanchard <hollisb@xxxxxxxxxx> # Node ID f54d38cea8acaa870e6b73990fbff61fe4c3e2ac # Parent e7424645152709dfbacd30df4b996db736403408 # Parent d5f98d23427a0d256b896fc63ccfd2c1f79e55ba [ppc] merge with upstream Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx> diff -r e74246451527 -r f54d38cea8ac .hgignore --- a/.hgignore Tue May 30 12:52:02 2006 -0500 +++ b/.hgignore Tue May 30 14:30:34 2006 -0500 @@ -14,7 +14,7 @@ .*\.orig$ .*\.rej$ .*/a\.out$ -.*/cscope\.*$ +.*/cscope\..*$ ^[^/]*\.bz2$ ^TAGS$ ^dist/.*$ diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Tue May 30 12:52:02 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Tue May 30 14:30:34 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-xen0 -# Mon Mar 27 14:46:03 2006 +# Linux kernel version: 2.6.16.13-xen0 +# Mon May 22 14:46:31 2006 # # @@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_ARCH_XEN=y -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_BLKDEV_GRANT=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BACKEND=y -CONFIG_XEN_BLKDEV_BACKEND=y -CONFIG_XEN_SYSFS=y +CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -1522,3 +1517,30 @@ CONFIG_CRYPTO_DES=y # # Hardware crypto devices # +CONFIG_HAVE_ARCH_ALLOC_SKB=y +CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_XEN_INTERFACE_VERSION=0x00030202 + +# +# XEN +# +CONFIG_XEN_PRIVILEGED_GUEST=y +# CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y +# CONFIG_XEN_PCIDEV_BACKEND is not set +CONFIG_XEN_BLKDEV_BACKEND=y +# CONFIG_XEN_BLKDEV_TAP_BE is not set +CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set +CONFIG_XEN_NETDEV_LOOPBACK=y +# CONFIG_XEN_TPMDEV_BACKEND is not set +CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_TAP is not set +# CONFIG_XEN_SCRUB_PAGES is not set +# CONFIG_XEN_DISABLE_SERIAL is not set +CONFIG_XEN_SYSFS=y +CONFIG_XEN_COMPAT_030002_AND_LATER=y +# CONFIG_XEN_COMPAT_LATEST_ONLY is not set +CONFIG_XEN_COMPAT_030002=y +CONFIG_NO_IDLE_HZ=y diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Tue May 30 12:52:02 2006 -0500 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Tue May 30 14:30:34 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-xenU -# Mon Mar 27 14:01:13 2006 +# Linux kernel version: 2.6.16.13-xenU +# Mon May 22 15:05:32 2006 # # @@ -89,12 +89,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_ARCH_XEN=y -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_BLKDEV_GRANT=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BACKEND=y -CONFIG_XEN_BLKDEV_BACKEND=y -CONFIG_XEN_SYSFS=y +CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -1386,3 +1381,22 @@ CONFIG_CRYPTO_DES=y # # Hardware crypto devices # +CONFIG_HAVE_ARCH_ALLOC_SKB=y +CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_XEN_INTERFACE_VERSION=0x00030202 + +# +# XEN +# +# CONFIG_XEN_PRIVILEGED_GUEST is not set +CONFIG_XEN_UNPRIVILEGED_GUEST=y +# CONFIG_XEN_BACKEND is not set +CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_SCRUB_PAGES is not set +# CONFIG_XEN_DISABLE_SERIAL is not set +CONFIG_XEN_SYSFS=y +CONFIG_XEN_COMPAT_030002_AND_LATER=y +# CONFIG_XEN_COMPAT_LATEST_ONLY is not set +CONFIG_XEN_COMPAT_030002=y +CONFIG_NO_IDLE_HZ=y diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Tue May 30 12:52:02 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen_ia64 Tue May 30 14:30:34 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-xen -# Mon Mar 27 14:36:21 2006 +# Linux kernel version: 2.6.16.13-xen +# Mon May 22 14:15:20 2006 # # @@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_ARCH_XEN=y -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_BLKDEV_GRANT=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BACKEND=y -CONFIG_XEN_BLKDEV_BACKEND=y -CONFIG_XEN_SYSFS=y +CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -1528,3 +1523,30 @@ CONFIG_CRYPTO_DES=y # # Hardware crypto devices # +CONFIG_HAVE_ARCH_ALLOC_SKB=y +CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_XEN_INTERFACE_VERSION=0x00030202 + +# +# XEN +# +CONFIG_XEN_PRIVILEGED_GUEST=y +# CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y +# CONFIG_XEN_PCIDEV_BACKEND is not set +CONFIG_XEN_BLKDEV_BACKEND=y +# CONFIG_XEN_BLKDEV_TAP_BE is not set +CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set +CONFIG_XEN_NETDEV_LOOPBACK=y +# CONFIG_XEN_TPMDEV_BACKEND is not set +CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_TAP is not set +# CONFIG_XEN_SCRUB_PAGES is not set +# CONFIG_XEN_DISABLE_SERIAL is not set +CONFIG_XEN_SYSFS=y +CONFIG_XEN_COMPAT_030002_AND_LATER=y +# CONFIG_XEN_COMPAT_LATEST_ONLY is not set +CONFIG_XEN_COMPAT_030002=y +CONFIG_NO_IDLE_HZ=y diff -r e74246451527 -r f54d38cea8ac docs/src/interface.tex --- a/docs/src/interface.tex Tue May 30 12:52:02 2006 -0500 +++ b/docs/src/interface.tex Tue May 30 14:30:34 2006 -0500 @@ -205,30 +205,23 @@ implement timeout values when they block implement timeout values when they block. - -%% % akw: demoting this to a section -- not sure if there is any point -%% % though, maybe just remove it. - -% KAF: Remove these random sections! -\begin{comment} \section{Xen CPU Scheduling} Xen offers a uniform API for CPU schedulers. It is possible to choose from a number of schedulers at boot and it should be easy to add more. -The BVT, Atropos and Round Robin schedulers are part of the normal Xen -distribution. BVT provides proportional fair shares of the CPU to the -running domains. Atropos can be used to reserve absolute shares of -the CPU for each domain. Round-robin is provided as an example of -Xen's internal scheduler API. +The SEDF, BVT, and Credit schedulers are part of the normal Xen +distribution. BVT and SEDF will be going away and their use should be +avoided once the credit scheduler has stabilized and become the default. +The Credit scheduler provides proportional fair shares of the +host's CPUs to the running domains. It does this while transparently +load balancing runnable VCPUs across the whole system. \paragraph*{Note: SMP host support} -Xen has always supported SMP host systems. Domains are statically -assigned to CPUs, either at creation time or when manually pinning to -a particular CPU. The current schedulers then run locally on each CPU -to decide which of the assigned domains should be run there. The -user-level control software can be used to perform coarse-grain -load-balancing between CPUs. -\end{comment} +Xen has always supported SMP host systems. When using the credit scheduler, +a domain's VCPUs will be dynamically moved across physical CPUs to maximise +domain and system throughput. VCPUs can also be manually restricted to be +mapped only on a subset of the host's physical CPUs, using the pinning +mechanism. %% More information on the characteristics and use of these schedulers diff -r e74246451527 -r f54d38cea8ac docs/src/user.tex --- a/docs/src/user.tex Tue May 30 12:52:02 2006 -0500 +++ b/docs/src/user.tex Tue May 30 14:30:34 2006 -0500 @@ -1093,6 +1093,36 @@ running domains in \xend's SXP configura You can get access to the console of a particular domain using the \verb_# xm console_ command (e.g.\ \verb_# xm console myVM_). + +\subsection{Domain Scheduling Management Commands} + +The credit CPU scheduler automatically load balances guest VCPUs +across all available physical CPUs on an SMP host. The user need +not manually pin VCPUs to load balance the system. However, she +can restrict which CPUs a particular VCPU may run on using +the \path{xm vcpu-pin} command. + +Each guest domain is assigned a \path{weight} and a \path{cap}. + +A domain with a weight of 512 will get twice as much CPU as a +domain with a weight of 256 on a contended host. Legal weights +range from 1 to 65535 and the default is 256. + +The cap optionally fixes the maximum amount of CPU a guest will +be able to consume, even if the host system has idle CPU cycles. +The cap is expressed in percentage of one physical CPU: 100 is +1 physical CPU, 50 is half a CPU, 400 is 4 CPUs, etc... The +default, 0, means there is no upper cap. + +When you are running with the credit scheduler, you can check and +modify your domains' weights and caps using the \path{xm sched-credit} +command: + +\begin{tabular}{ll} +\verb!xm sched-credit -d <domain>! & lists weight and cap \\ +\verb!xm sched-credit -d <domain> -w <weight>! & sets the weight \\ +\verb!xm sched-credit -d <domain> -c <cap>! & sets the cap +\end{tabular} @@ -1985,7 +2015,7 @@ editing \path{grub.conf}. \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in pages (default 0). \item [ sched=xxx ] Select the CPU scheduler Xen should use. The - current possibilities are `sedf' (default) and `bvt'. + current possibilities are `sedf' (default), `credit', and `bvt'. \item [ apic\_verbosity=debug,verbose ] Print more detailed information about local APIC and IOAPIC configuration. \item [ lapic ] Force use of local APIC even when left disabled by diff -r e74246451527 -r f54d38cea8ac extras/mini-os/Makefile --- a/extras/mini-os/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/Makefile Tue May 30 14:30:34 2006 -0500 @@ -1,4 +1,5 @@ debug ?= y debug ?= y +pae ?= n include $(CURDIR)/../../Config.mk @@ -12,11 +13,17 @@ override CPPFLAGS := -Iinclude $(CPPFLAG override CPPFLAGS := -Iinclude $(CPPFLAGS) ASFLAGS = -D__ASSEMBLY__ +LDLIBS = -L. -lminios LDFLAGS := -N -T minios-$(TARGET_ARCH).lds ifeq ($(TARGET_ARCH),x86_32) CFLAGS += -m32 -march=i686 LDFLAGS += -m elf_i386 +endif + +ifeq ($(TARGET_ARCH)$(pae),x86_32y) +CFLAGS += -DCONFIG_X86_PAE=1 +ASFLAGS += -DCONFIG_X86_PAE=1 endif ifeq ($(TARGET_ARCH),x86_64) @@ -49,11 +56,11 @@ links: links: [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen -libminios.a: $(OBJS) $(HEAD) - ar r libminios.a $(HEAD) $(OBJS) +libminios.a: links $(OBJS) $(HEAD) + $(AR) r libminios.a $(HEAD) $(OBJS) -$(TARGET): links libminios.a $(HEAD) - $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf +$(TARGET): libminios.a $(HEAD) + $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf gzip -f -9 -c $@.elf >$@.gz .PHONY: clean diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/include/mm.h Tue May 30 14:30:34 2006 -0500 @@ -43,6 +43,8 @@ #if defined(__i386__) +#if !defined(CONFIG_X86_PAE) + #define L2_PAGETABLE_SHIFT 22 #define L1_PAGETABLE_ENTRIES 1024 @@ -50,6 +52,30 @@ #define PADDR_BITS 32 #define PADDR_MASK (~0UL) + +#define UNMAPPED_PT_FRAMES 1 +#define PRIpte "08lx" +typedef unsigned long pgentry_t; + +#else /* defined(CONFIG_X86_PAE) */ + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +#define UNMAPPED_PT_FRAMES 2 +#define PRIpte "016llx" +typedef uint64_t pgentry_t; + +#endif /* !defined(CONFIG_X86_PAE) */ #elif defined(__x86_64__) @@ -81,6 +107,10 @@ #define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) #define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) +#define UNMAPPED_PT_FRAMES 3 +#define PRIpte "016lx" +typedef unsigned long pgentry_t; + #endif #define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) @@ -90,9 +120,11 @@ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) #define l2_table_offset(_a) \ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) #define l3_table_offset(_a) \ (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#endif +#if defined(__x86_64__) #define l4_table_offset(_a) \ (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) #endif @@ -111,14 +143,21 @@ #if defined(__i386__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#if defined(CONFIG_X86_PAE) +#define L3_PROT (_PAGE_PRESENT) +#endif /* CONFIG_X86_PAE */ #elif defined(__x86_64__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#endif - +#endif /* __i386__ || __x86_64__ */ + +#ifndef CONFIG_X86_PAE #define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) +#else +#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT) +#endif #define PAGE_SHIFT L1_PAGETABLE_SHIFT #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -129,23 +168,31 @@ /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +/* Definitions for machine and pseudophysical addresses. */ +#ifdef CONFIG_X86_PAE +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + extern unsigned long *phys_to_machine_mapping; extern char _text, _etext, _edata, _end; #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) -static __inline__ unsigned long phys_to_machine(unsigned long phys) -{ - unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT); - machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} - +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} #define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) -static __inline__ unsigned long machine_to_phys(unsigned long machine) -{ - unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT); - phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK); - return phys; +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; } #define VIRT_START ((unsigned long)&_text) @@ -155,7 +202,7 @@ static __inline__ unsigned long machine_ #define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) #define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) -#define mfn_to_virt(_mfn) (mach_to_virt(_mfn << PAGE_SHIFT)) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) #define pfn_to_virt(_pfn) (to_virt(_pfn << PAGE_SHIFT)) /* Pagetable walking. */ diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/include/types.h Tue May 30 14:30:34 2006 -0500 @@ -43,14 +43,19 @@ typedef unsigned long long u_quad_t; typedef unsigned long long u_quad_t; typedef unsigned int uintptr_t; +#if !defined(CONFIG_X86_PAE) typedef struct { unsigned long pte_low; } pte_t; +#else +typedef struct { unsigned long pte_low, pte_high; } pte_t; +#endif /* CONFIG_X86_PAE */ + #elif defined(__x86_64__) typedef long quad_t; typedef unsigned long u_quad_t; typedef unsigned long uintptr_t; typedef struct { unsigned long pte; } pte_t; -#endif +#endif /* __i386__ || __x86_64__ */ typedef u8 uint8_t; typedef s8 int8_t; diff -r e74246451527 -r f54d38cea8ac extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/kernel.c Tue May 30 14:30:34 2006 -0500 @@ -63,7 +63,12 @@ void failsafe_callback(void); extern char shared_info[PAGE_SIZE]; +#if !defined(CONFIG_X86_PAE) #define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif static shared_info_t *map_shared_info(unsigned long pa) { @@ -71,7 +76,7 @@ static shared_info_t *map_shared_info(un (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) ) { printk("Failed to map shared_info!!\n"); - *(int*)0=0; + do_exit(); } return (shared_info_t *)shared_info; } @@ -126,6 +131,10 @@ void start_kernel(start_info_t *si) /* WARN: don't do printk before here, it uses information from shared_info. Use xprintk instead. */ memcpy(&start_info, si, sizeof(*si)); + + /* set up minimal memory infos */ + phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; + /* Grab the shared_info pointer and put it in a safe place. */ HYPERVISOR_shared_info = map_shared_info(start_info.shared_info); diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/printf.c --- a/extras/mini-os/lib/printf.c Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/lib/printf.c Tue May 30 14:30:34 2006 -0500 @@ -53,6 +53,8 @@ * * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ */ + +#if !defined HAVE_LIBC #include <os.h> #include <types.h> @@ -789,4 +791,4 @@ int sscanf(const char * buf, const char return i; } - +#endif diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/string.c --- a/extras/mini-os/lib/string.c Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/lib/string.c Tue May 30 14:30:34 2006 -0500 @@ -17,6 +17,8 @@ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ **************************************************************************** */ + +#if !defined HAVE_LIBC #include <os.h> #include <types.h> @@ -153,3 +155,5 @@ char * strstr(const char * s1,const char } return NULL; } + +#endif diff -r e74246451527 -r f54d38cea8ac extras/mini-os/mm.c --- a/extras/mini-os/mm.c Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/mm.c Tue May 30 14:30:34 2006 -0500 @@ -368,7 +368,7 @@ void new_pt_frame(unsigned long *pt_pfn, void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, unsigned long offset, unsigned long level) { - unsigned long *tab = (unsigned long *)start_info.pt_base; + pgentry_t *tab = (pgentry_t *)start_info.pt_base; unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); unsigned long prot_e, prot_t, pincmd; mmu_update_t mmu_updates[1]; @@ -382,40 +382,45 @@ void new_pt_frame(unsigned long *pt_pfn, as a page table page */ memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE); - if (level == L1_FRAME) - { + switch ( level ) + { + case L1_FRAME: prot_e = L1_PROT; prot_t = L2_PROT; pincmd = MMUEXT_PIN_L1_TABLE; - } -#if (defined __x86_64__) - else if (level == L2_FRAME) - { + break; +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) + case L2_FRAME: prot_e = L2_PROT; prot_t = L3_PROT; pincmd = MMUEXT_PIN_L2_TABLE; - } - else if (level == L3_FRAME) - { + break; +#endif +#if defined(__x86_64__) + case L3_FRAME: prot_e = L3_PROT; prot_t = L4_PROT; pincmd = MMUEXT_PIN_L3_TABLE; - } -#endif - else - { + break; +#endif + default: printk("new_pt_frame() called with invalid level number %d\n", level); do_exit(); - } + break; + } /* Update the entry */ -#if (defined __x86_64__) +#if defined(__x86_64__) tab = pte_to_virt(tab[l4_table_offset(pt_page)]); tab = pte_to_virt(tab[l3_table_offset(pt_page)]); #endif - mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + - sizeof(void *)* l1_table_offset(pt_page); - mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | +#if defined(CONFIG_X86_PAE) + tab = pte_to_virt(tab[l3_table_offset(pt_page)]); +#endif + + mmu_updates[0].ptr = ((pgentry_t)tab[l2_table_offset(pt_page)] & PAGE_MASK) + + sizeof(pgentry_t) * l1_table_offset(pt_page); + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | (prot_e & ~_PAGE_RW); if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) { @@ -434,8 +439,8 @@ void new_pt_frame(unsigned long *pt_pfn, /* Now fill the new page table page with entries. Update the page directory as well. */ - mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset; - mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; + mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) { printk("ERROR: mmu_update failed\n"); @@ -450,16 +455,13 @@ void build_pagetable(unsigned long *star unsigned long start_address, end_address; unsigned long pfn_to_map, pt_pfn = *start_pfn; static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; - unsigned long *tab = (unsigned long *)start_info.pt_base; + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); - unsigned long page, offset; + unsigned long offset; int count = 0; -#if defined(__x86_64__) - pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES; -#else - pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES; -#endif + pfn_to_map = (start_info.nr_pt_frames - UNMAPPED_PT_FRAMES) * L1_PAGETABLE_ENTRIES; + start_address = (unsigned long)pfn_to_virt(pfn_to_map); end_address = (unsigned long)pfn_to_virt(*max_pfn); @@ -468,7 +470,7 @@ void build_pagetable(unsigned long *star while(start_address < end_address) { - tab = (unsigned long *)start_info.pt_base; + tab = (pgentry_t *)start_info.pt_base; mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); #if defined(__x86_64__) @@ -480,6 +482,8 @@ void build_pagetable(unsigned long *star page = tab[offset]; mfn = pte_to_mfn(page); tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) offset = l3_table_offset(start_address); /* Need new L2 pt frame */ if(!(start_address & L2_MASK)) @@ -498,9 +502,9 @@ void build_pagetable(unsigned long *star mfn = pte_to_mfn(page); offset = l1_table_offset(start_address); - mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset; + mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; mmu_updates[count].val = - pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; + (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; count++; if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn) { @@ -557,9 +561,6 @@ void init_mm(void) printk(" stack start: %p\n", &stack); printk(" _end: %p\n", &_end); - /* set up minimal memory infos */ - phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; - /* First page follows page table pages and 3 more pages (store page etc) */ start_pfn = PFN_UP(to_phys(start_info.pt_base)) + start_info.nr_pt_frames + 3; @@ -569,7 +570,7 @@ void init_mm(void) printk(" max_pfn: %lx\n", max_pfn); build_pagetable(&start_pfn, &max_pfn); - + /* * now we can initialise the page allocator */ diff -r e74246451527 -r f54d38cea8ac extras/mini-os/traps.c --- a/extras/mini-os/traps.c Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/traps.c Tue May 30 14:30:34 2006 -0500 @@ -95,25 +95,26 @@ DO_ERROR(18, "machine check", machine_ch void page_walk(unsigned long virt_address) { - unsigned long *tab = (unsigned long *)start_info.pt_base; - unsigned long addr = virt_address, page; + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long addr = virt_address; printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, start_info.pt_base); #if defined(__x86_64__) page = tab[l4_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L4 = %p (%p) [offset = %lx]\n", page, tab, l4_table_offset(addr)); - + tab = pte_to_virt(page); + printk(" L4 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l4_table_offset(addr)); +#endif +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) page = tab[l3_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L3 = %p (%p) [offset = %lx]\n", page, tab, l3_table_offset(addr)); + tab = pte_to_virt(page); + printk(" L3 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l3_table_offset(addr)); #endif page = tab[l2_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L2 = %p (%p) [offset = %lx]\n", page, tab, l2_table_offset(addr)); + tab = pte_to_virt(page); + printk(" L2 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l2_table_offset(addr)); page = tab[l1_table_offset(addr)]; - printk(" L1 = %p (%p) [offset = %lx]\n", page, tab, l1_table_offset(addr)); + printk(" L1 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l1_table_offset(addr)); } diff -r e74246451527 -r f54d38cea8ac extras/mini-os/x86_32.S --- a/extras/mini-os/x86_32.S Tue May 30 12:52:02 2006 -0500 +++ b/extras/mini-os/x86_32.S Tue May 30 14:30:34 2006 -0500 @@ -4,9 +4,15 @@ .section __xen_guest .ascii "GUEST_OS=Mini-OS" .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0xc0000000" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0xc0000000" .ascii ",HYPERCALL_PAGE=0x2" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif .ascii ",LOADER=generic" - .ascii ",PT_MODE_WRITABLE" .byte 0 .text diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 14:30:34 2006 -0500 @@ -70,9 +70,6 @@ /* Forward Declaration. */ void __init find_max_pfn(void); - -/* Allows setting of maximum possible memory size */ -static unsigned long xen_override_max_pfn; static int xen_panic_event(struct notifier_block *, unsigned long, void *); static struct notifier_block xen_panic_block = { @@ -399,6 +396,26 @@ start_info_t *xen_start_info; start_info_t *xen_start_info; EXPORT_SYMBOL(xen_start_info); +static void __init add_memory_region(unsigned long long start, + unsigned long long size, int type) +{ + int x; + + if (!efi_enabled) { + x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; + } +} /* add_memory_region */ + static void __init limit_regions(unsigned long long size) { unsigned long long current_addr = 0; @@ -442,27 +459,20 @@ static void __init limit_regions(unsigne } return; } -} - -static void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - if (!efi_enabled) { - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; - } -} /* add_memory_region */ +#ifdef CONFIG_XEN + if (i==e820.nr_map && current_addr < size) { + /* + * The e820 map finished before our requested size so + * extend the final entry to the requested address. + */ + --i; + if (e820.map[i].type == E820_RAM) + e820.map[i].size -= current_addr - size; + else + add_memory_region(current_addr, size - current_addr, E820_RAM); + } +#endif +} #define E820_DEBUG 1 @@ -492,7 +502,6 @@ static void __init print_memory_map(char } } -#if 0 /* * Sanitize the BIOS e820 map. * @@ -680,9 +689,13 @@ static int __init sanitize_e820_map(stru */ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) { +#ifndef CONFIG_XEN /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) return -1; +#else + BUG_ON(nr_map < 1); +#endif do { unsigned long long start = biosmap->addr; @@ -694,6 +707,7 @@ static int __init copy_e820_map(struct e if (start > end) return -1; +#ifndef CONFIG_XEN /* * Some BIOSes claim RAM in the 640k - 1M region. * Not right. Fix it up. @@ -708,11 +722,11 @@ static int __init copy_e820_map(struct e size = end - start; } } +#endif add_memory_region(start, size, type); } while (biosmap++,--nr_map); return 0; } -#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -785,13 +799,8 @@ static void __init parse_cmdline_early ( unsigned long long mem_size; mem_size = memparse(from+4, &from); -#if 0 limit_regions(mem_size); userdef=1; -#else - xen_override_max_pfn = - (unsigned long)(mem_size>>PAGE_SHIFT); -#endif } } @@ -984,7 +993,6 @@ static void __init parse_cmdline_early ( } } -#if 0 /* !XEN */ /* * Callback for efi_memory_walk. */ @@ -1036,21 +1044,6 @@ void __init find_max_pfn(void) memory_present(0, start, end); } } -#else -/* We don't use the fake e820 because we need to respond to user override. */ -void __init find_max_pfn(void) -{ - if (xen_override_max_pfn == 0) { - max_pfn = xen_start_info->nr_pages; - /* Default 8MB slack (to balance backend allocations). */ - max_pfn += 8 << (20 - PAGE_SHIFT); - } else if (xen_override_max_pfn > xen_start_info->nr_pages) { - max_pfn = xen_override_max_pfn; - } else { - max_pfn = xen_start_info->nr_pages; - } -} -#endif /* XEN */ /* * Determine low and high memory ranges: @@ -1158,6 +1151,15 @@ static void __init register_bootmem_low_ */ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); +#ifdef CONFIG_XEN + /* + * Truncate to the number of actual pages currently + * present. + */ + if (last_pfn > xen_start_info->nr_pages) + last_pfn = xen_start_info->nr_pages; +#endif + if (last_pfn > max_low_pfn) last_pfn = max_low_pfn; @@ -1351,83 +1353,33 @@ void __init remapped_pgdat_init(void) * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct e820entry *e820, int nr_map, + struct resource *code_resource, + struct resource *data_resource) { int i; -#ifdef CONFIG_XEN - dom0_op_t op; - struct dom0_memory_map_entry *map; - unsigned long gapstart, gapsize; - unsigned long long last; -#endif - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST + +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) probe_roms(); #endif -#ifdef CONFIG_XEN - map = alloc_bootmem_low_pages(PAGE_SIZE); - op.cmd = DOM0_PHYSICAL_MEMORY_MAP; - set_xen_guest_handle(op.u.physical_memory_map.memory_map, map); - op.u.physical_memory_map.max_map_entries = - PAGE_SIZE / sizeof(struct dom0_memory_map_entry); - BUG_ON(HYPERVISOR_dom0_op(&op)); - - last = 0x100000000ULL; - gapstart = 0x10000000; - gapsize = 0x400000; - - for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) { + for (i = 0; i < nr_map; i++) { struct resource *res; - - if ((last > map[i].end) && ((last - map[i].end) > gapsize)) { - gapsize = last - map[i].end; - gapstart = map[i].end; - } - if (map[i].start < last) - last = map[i].start; - - if (map[i].end > 0x100000000ULL) + if (e820[i].addr + e820[i].size > 0x100000000ULL) continue; res = alloc_bootmem_low(sizeof(struct resource)); - res->name = map[i].is_ram ? "System RAM" : "reserved"; - res->start = map[i].start; - res->end = map[i].end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - request_resource(&iomem_resource, res); - } - - free_bootmem(__pa(map), PAGE_SIZE); - - /* - * Start allocating dynamic PCI memory a bit into the gap, - * aligned up to the nearest megabyte. - * - * Question: should we try to pad it up a bit (do something - * like " + (gapsize >> 3)" in there too?). We now have the - * technology. - */ - pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -#else - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); - switch (e820.map[i].type) { + switch (e820[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; default: res->name = "reserved"; } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; + res->start = e820[i].addr; + res->end = res->start + e820[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); - if (e820.map[i].type == E820_RAM) { +#ifndef CONFIG_XEN + if (e820[i].type == E820_RAM) { /* * We don't know which RAM region contains kernel data, * so we try it repeatedly and let the resource manager @@ -1439,38 +1391,21 @@ legacy_init_iomem_resources(struct resou request_resource(res, &crashk_res); #endif } - } -#endif -} - -/* - * Request address space for all standard resources - */ -static void __init register_memory(void) -{ -#ifndef CONFIG_XEN +#endif + } +} + +/* + * Locate a unused range of the physical address space below 4G which + * can be used for PCI mappings. + */ +static void __init +e820_setup_gap(struct e820entry *e820, int nr_map) +{ unsigned long gapstart, gapsize, round; unsigned long long last; -#endif - int i; - - /* Nothing to do if not running in dom0. */ - if (!(xen_start_info->flags & SIF_INITDOMAIN)) - return; - - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); - else - legacy_init_iomem_resources(&code_resource, &data_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - -#ifndef CONFIG_XEN + int i; + /* * Search for the bigest gap in the low 32 bits of the e820 * memory space. @@ -1478,10 +1413,10 @@ static void __init register_memory(void) last = 0x100000000ull; gapstart = 0x10000000; gapsize = 0x400000; - i = e820.nr_map; + i = nr_map; while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; + unsigned long long start = e820[i].addr; + unsigned long long end = start + e820[i].size; /* * Since "last" is at most 4GB, we know we'll @@ -1511,6 +1446,53 @@ static void __init register_memory(void) printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); +} + +/* + * Request address space for all standard resources + */ +static void __init register_memory(void) +{ +#ifdef CONFIG_XEN + struct e820entry *machine_e820; + struct xen_memory_map memmap; +#endif + int i; + + /* Nothing to do if not running in dom0. */ + if (!(xen_start_info->flags & SIF_INITDOMAIN)) + return; + +#ifdef CONFIG_XEN + machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, machine_e820); + + BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); + + legacy_init_iomem_resources(machine_e820, memmap.nr_entries, + &code_resource, &data_resource); +#else + if (efi_enabled) + efi_initialize_iomem_resources(&code_resource, &data_resource); + else + legacy_init_iomem_resources(e820.map, e820.nr_map, + &code_resource, &data_resource); +#endif + + /* EFI systems may still have VGA */ + request_resource(&iomem_resource, &video_ram_resource); + + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < STANDARD_IO_RESOURCES; i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + +#ifdef CONFIG_XEN + e820_setup_gap(machine_e820, memmap.nr_entries); + free_bootmem(__pa(machine_e820), PAGE_SIZE); +#else + e820_setup_gap(e820.map, e820.nr_map); #endif } diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue May 30 14:30:34 2006 -0500 @@ -191,6 +191,7 @@ swiotlb_init(void) if (swiotlb_force == 1) { swiotlb = 1; } else if ((swiotlb_force != -1) && + is_running_on_xen() && (xen_start_info->flags & SIF_INITDOMAIN)) { /* Domain 0 always has a swiotlb. */ ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue May 30 14:30:34 2006 -0500 @@ -228,6 +228,12 @@ static inline int page_kills_ppro(unsign return 0; } +#else + +#define page_kills_ppro(p) 0 + +#endif + extern int is_available_memory(efi_memory_desc_t *); int page_is_ram(unsigned long pagenr) @@ -268,13 +274,6 @@ int page_is_ram(unsigned long pagenr) } return 0; } - -#else /* CONFIG_XEN */ - -#define page_kills_ppro(p) 0 -#define page_is_ram(p) 1 - -#endif #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue May 30 14:30:34 2006 -0500 @@ -51,7 +51,7 @@ config GENERIC_IOMAP default y config XEN - bool + bool "Xen hypervisor support" default y help Enable Xen hypervisor support. Resulting kernel runs @@ -60,34 +60,9 @@ config ARCH_XEN config ARCH_XEN bool default y + depends on XEN help TEMP ONLY. Needs to be on for drivers/xen to build. - -config XEN_PRIVILEGED_GUEST - bool "Privileged Guest" - default n - help - Used in drivers/xen/privcmd.c. Should go away? - -config XEN_BLKDEV_GRANT - depends on XEN - bool - default y - -config XEN_BLKDEV_FRONTEND - depends on XEN - bool - default y - -config XEN_BACKEND - depends on XEN - bool - default y - -config XEN_BLKDEV_BACKEND - depends on XEN && XEN_BACKEND - bool - default y config XEN_IA64_DOM0_VP bool "dom0 vp model" @@ -102,18 +77,6 @@ config XEN_IA64_DOM0_NON_VP default y help dom0 P=M model - -config XEN_SYSFS - bool "Export Xen attributes in sysfs" - depends on XEN && SYSFS - default y - help - Xen hypervisor attributes will show up under /sys/hypervisor/. - -config XEN_INTERFACE_VERSION - hex - depends on XEN - default 0x00030202 config SCHED_NO_NO_OMIT_FRAME_POINTER bool @@ -532,3 +495,16 @@ source "security/Kconfig" source "security/Kconfig" source "crypto/Kconfig" + +# override default values of drivers/xen/Kconfig +if !XEN_IA64_DOM0_VP +config HAVE_ARCH_ALLOC_SKB + bool + default n + +config HAVE_ARCH_DEV_ALLOC_SKB + bool + default n +endif + +source "drivers/xen/Kconfig" diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Tue May 30 14:30:34 2006 -0500 @@ -514,6 +514,9 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_XEN if (running_on_xen) { extern shared_info_t *HYPERVISOR_shared_info; + extern int xen_init (void); + + xen_init (); /* xen_start_info isn't setup yet, get the flags manually */ if (HYPERVISOR_shared_info->arch.flags & SIF_INITDOMAIN) { diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre --- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Tue May 30 14:30:34 2006 -0500 @@ -6,20 +6,6 @@ # for building (as all files in mv'd directories are thought by hg # to have been deleted). I don't know how to avoid this right now, # but if someone has a better way, I'm all ears - -function try_to_mv() { - if [ ! -e $2 ] - then - mv $1 $2 - fi -} - -try_to_mv mm/Kconfig mm/Kconfig.xen-x86 - -# need to grab a couple of xen-modified files for generic_page_range and -# typedef pte_fn_t which are used by driver/xen blkif -#ln -sf ../mm.xen-x86/memory.c mm/ -#ln -sf ../linux.xen-x86/mm.h include/linux/ #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Tue May 30 14:30:34 2006 -0500 @@ -2,7 +2,7 @@ # Makefile for Xen components # -obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o xenconsole.o xen_ksyms.o +obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o xenconsole.o -obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o -pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o \ No newline at end of file +obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o util.o +pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Tue May 30 14:30:34 2006 -0500 @@ -1,20 +1,22 @@ +ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y) obj-y += util.o +endif obj-y += core/ +#obj-y += char/ obj-y += console/ obj-y += evtchn/ -#obj-y += balloon/ +obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/ obj-y += privcmd/ -obj-y += blkback/ -#obj-y += netback/ -obj-y += blkfront/ obj-y += xenbus/ -#obj-y += netfront/ -#obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += privcmd/ -#obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -#obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ -#obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ -#obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ -#obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_TPMDEV_FRONTEND) += tpmfront/ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue May 30 14:30:34 2006 -0500 @@ -1,14 +1,6 @@ # # Makefile for the linux kernel. # - -XENARCH := $(subst ",,$(CONFIG_XENARCH)) - -CPPFLAGS_vmlinux.lds += -U$(XENARCH) - -$(obj)/vmlinux.lds.S: - @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@ - obj-y := gnttab.o features.o obj-$(CONFIG_PROC_FS) += xen_proc.o @@ -16,8 +8,10 @@ ifeq ($(ARCH),ia64) ifeq ($(ARCH),ia64) obj-y += evtchn_ia64.o obj-y += xenia64_init.o +ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y) +obj-$(CONFIG_NET) += skbuff.o +endif else -extra-y += vmlinux.lds obj-y += reboot.o evtchn.o fixup.o obj-$(CONFIG_SMP) += smp.o # setup_profiling_timer def'd in ia64 obj-$(CONFIG_NET) += skbuff.o # until networking is up on ia64 diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Tue May 30 14:30:34 2006 -0500 @@ -246,25 +246,14 @@ static struct irqaction evtchn_irqaction .name = "xen-event-channel" }; -int evtchn_irq = 0xe9; +static int evtchn_irq = 0xe9; void __init evtchn_init(void) { shared_info_t *s = HYPERVISOR_shared_info; - vcpu_info_t *vcpu_info = &s->vcpu_info[smp_processor_id()]; - -#if 0 - int ret; - irq = assign_irq_vector(AUTO_ASSIGN); - ret = request_irq(irq, evtchn_interrupt, 0, "xen-event-channel", NULL); - if (ret < 0) - { - printk("xen-event-channel unable to get irq %d (%d)\n", irq, ret); - return; - } -#endif + register_percpu_irq(evtchn_irq, &evtchn_irqaction); - vcpu_info->arch.evtchn_vector = evtchn_irq; + s->arch.evtchn_vector = evtchn_irq; printk("xen-event-channel using irq %d\n", evtchn_irq); spin_lock_init(&irq_mapping_update_lock); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 14:30:34 2006 -0500 @@ -11,17 +11,20 @@ shared_info_t *HYPERVISOR_shared_info = shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE; EXPORT_SYMBOL(HYPERVISOR_shared_info); -static int initialized; start_info_t *xen_start_info; + +int running_on_xen; +EXPORT_SYMBOL(running_on_xen); int xen_init(void) { + static int initialized; shared_info_t *s = HYPERVISOR_shared_info; if (initialized) return running_on_xen ? 0 : -1; - if (!running_on_xen) + if (!is_running_on_xen()) return -1; xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT); @@ -35,6 +38,7 @@ int xen_init(void) return 0; } +#ifndef CONFIG_XEN_IA64_DOM0_VP /* We just need a range of legal va here, though finally identity * mapped one is instead used for gnttab mapping. */ @@ -47,6 +51,7 @@ unsigned long alloc_empty_foreign_map_pa return (unsigned long)vma->addr; } +#endif #if 0 /* These should be define'd but some drivers use them without diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Tue May 30 14:30:34 2006 -0500 @@ -247,7 +247,7 @@ 1: mov r8=r32 XEN_RESTORE_PSR_IC ;; br.ret.sptk.many rp -END(xen_set_rr) +END(xen_set_kr) GLOBAL_ENTRY(xen_fc) movl r8=running_on_xen;; @@ -345,7 +345,7 @@ GLOBAL_ENTRY(xen_send_ipi) GLOBAL_ENTRY(xen_send_ipi) mov r14=r32 mov r15=r33 - mov r2=0x380 + mov r2=0x400 break 0x1000 ;; br.ret.sptk.many rp diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Tue May 30 14:30:34 2006 -0500 @@ -23,18 +23,56 @@ //#include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/bootmem.h> +#include <linux/vmalloc.h> #include <asm/page.h> #include <asm/hypervisor.h> #include <asm/hypercall.h> - -#define XEN_IA64_BALLOON_IS_NOT_YET -#ifndef XEN_IA64_BALLOON_IS_NOT_YET +#include <xen/interface/memory.h> #include <xen/balloon.h> -#else -#define balloon_lock(flags) ((void)flags) -#define balloon_unlock(flags) ((void)flags) -#endif - + +//XXX xen/ia64 copy_from_guest() is broken. +// This is a temporal work around until it is fixed. +// used by balloon.c netfront.c + +// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined +// if the definition in arch-ia64.h is changed, this must be updated. +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) + +int +ia64_xenmem_reservation_op(unsigned long op, + struct xen_memory_reservation* reservation__) +{ + struct xen_memory_reservation reservation = *reservation__; + unsigned long* frame_list; + unsigned long nr_extents = reservation__->nr_extents; + int ret = 0; + get_xen_guest_handle(frame_list, reservation__->extent_start); + + BUG_ON(op != XENMEM_increase_reservation && + op != XENMEM_decrease_reservation && + op != XENMEM_populate_physmap); + + while (nr_extents > 0) { + int tmp_ret; + volatile unsigned long dummy; + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_extents; + + dummy = frame_list[0];// re-install tlb entry before hypercall + tmp_ret = ____HYPERVISOR_memory_op(op, &reservation); + if (tmp_ret < 0) { + if (ret == 0) { + ret = tmp_ret; + } + break; + } + frame_list += tmp_ret; + nr_extents -= tmp_ret; + ret += tmp_ret; + } + return ret; +} //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() // move those to lib/contiguous_bitmap? @@ -105,6 +143,39 @@ static void contiguous_bitmap_clear( } } +static unsigned long +HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order, + unsigned int address_bits) +{ + unsigned long ret; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .address_bits = address_bits, + .extent_order = extent_order, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gpfn); + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + BUG_ON(ret != 1); + return 0; +} + +static unsigned long +HYPERVISOR_remove_physmap(unsigned long gpfn, unsigned int extent_order) +{ + unsigned long ret; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .address_bits = 0, + .extent_order = extent_order, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gpfn); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != 1); + return 0; +} + /* Ensure multi-page extents are contiguous in machine memory. */ int __xen_create_contiguous_region(unsigned long vstart, @@ -113,29 +184,29 @@ __xen_create_contiguous_region(unsigned unsigned long error = 0; unsigned long gphys = __pa(vstart); unsigned long start_gpfn = gphys >> PAGE_SHIFT; - unsigned long num_pfn = 1 << order; + unsigned long num_gpfn = 1 << order; unsigned long i; unsigned long flags; - scrub_pages(vstart, 1 << order); + scrub_pages(vstart, num_gpfn); balloon_lock(flags); - //XXX order - for (i = 0; i < num_pfn; i++) { - error = HYPERVISOR_zap_physmap(start_gpfn + i, 0); - if (error) { - goto out; - } + error = HYPERVISOR_remove_physmap(start_gpfn, order); + if (error) { + goto fail; } error = HYPERVISOR_populate_physmap(start_gpfn, order, address_bits); - contiguous_bitmap_set(start_gpfn, 1UL << order); + if (error) { + goto fail; + } + contiguous_bitmap_set(start_gpfn, num_gpfn); #if 0 { unsigned long mfn; unsigned long mfn_prev = ~0UL; - for (i = 0; i < 1 << order; i++) { + for (i = 0; i < num_gpfn; i++) { mfn = pfn_to_mfn_for_dma(start_gpfn + i); if (mfn_prev != ~0UL && mfn != mfn_prev + 1) { xprintk("\n"); @@ -145,7 +216,7 @@ __xen_create_contiguous_region(unsigned vstart, virt_to_bus((void*)vstart), phys_to_machine_for_dma(gphys)); xprintk("mfn: "); - for (i = 0; i < 1 << order; i++) { + for (i = 0; i < num_gpfn; i++) { mfn = pfn_to_mfn_for_dma(start_gpfn + i); xprintk("0x%lx ", mfn); } @@ -159,76 +230,405 @@ out: out: balloon_unlock(flags); return error; + +fail: + for (i = 0; i < num_gpfn; i++) { + error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0); + if (error) { + BUG();//XXX + } + } + goto out; } void __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) { + unsigned long flags; unsigned long error = 0; - unsigned long gphys = __pa(vstart); - unsigned long start_gpfn = gphys >> PAGE_SHIFT; - unsigned long num_pfn = 1 << order; + unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT; + unsigned long num_gpfn = 1UL << order; + unsigned long* gpfns; + struct xen_memory_reservation reservation; unsigned long i; - unsigned long flags; - - scrub_pages(vstart, 1 << order); + + gpfns = kmalloc(sizeof(gpfns[0]) * num_gpfn, + GFP_KERNEL | __GFP_NOFAIL); + for (i = 0; i < num_gpfn; i++) { + gpfns[i] = start_gpfn + i; + } + + scrub_pages(vstart, num_gpfn); balloon_lock(flags); - contiguous_bitmap_clear(start_gpfn, 1UL << order); - - //XXX order - for (i = 0; i < num_pfn; i++) { - error = HYPERVISOR_zap_physmap(start_gpfn + i, 0); - if (error) { - goto out; - } - } - - for (i = 0; i < num_pfn; i++) { - error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0); - if (error) { - goto out; - } - } - + contiguous_bitmap_clear(start_gpfn, num_gpfn); + error = HYPERVISOR_remove_physmap(start_gpfn, order); + if (error) { + goto fail; + } + + set_xen_guest_handle(reservation.extent_start, gpfns); + reservation.nr_extents = num_gpfn; + reservation.address_bits = 0; + reservation.extent_order = 0; + reservation.domid = DOMID_SELF; + error = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + if (error != num_gpfn) { + error = -EFAULT;//XXX + goto fail; + } + error = 0; out: balloon_unlock(flags); + kfree(gpfns); if (error) { - //XXX - } + // error can't be returned. + BUG();//XXX + } + return; + +fail: + for (i = 0; i < num_gpfn; i++) { + int tmp_error;// don't overwrite error. + tmp_error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0); + if (tmp_error) { + BUG();//XXX + } + } + goto out; } /////////////////////////////////////////////////////////////////////////// -//XXX taken from balloon.c -// temporal hack until balloon driver support. -#include <linux/module.h> - -struct page *balloon_alloc_empty_page_range(unsigned long nr_pages) -{ - unsigned long vstart; - unsigned int order = get_order(nr_pages * PAGE_SIZE); - - vstart = __get_free_pages(GFP_KERNEL, order); - if (vstart == 0) - return NULL; - - return virt_to_page(vstart); -} - -void balloon_dealloc_empty_page_range( - struct page *page, unsigned long nr_pages) -{ - __free_pages(page, get_order(nr_pages * PAGE_SIZE)); -} - -void balloon_update_driver_allowance(long delta) -{ -} - -EXPORT_SYMBOL(balloon_alloc_empty_page_range); -EXPORT_SYMBOL(balloon_dealloc_empty_page_range); -EXPORT_SYMBOL(balloon_update_driver_allowance); - - +// grant table hack +// cmd: GNTTABOP_xxx + +#include <linux/mm.h> +#include <xen/interface/xen.h> +#include <xen/gnttab.h> + +static void +gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop) +{ + uint32_t flags; + + flags = uop->flags; + if (flags & GNTMAP_readonly) { +#if 0 + xprintd("GNTMAP_readonly is not supported yet\n"); +#endif + flags &= ~GNTMAP_readonly; + } + + if (flags & GNTMAP_host_map) { + if (flags & GNTMAP_application_map) { + xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags); + BUG(); + } + if (flags & GNTMAP_contains_pte) { + xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags); + BUG(); + } + } else if (flags & GNTMAP_device_map) { + xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags); + BUG();//XXX not yet. actually this flag is not used. + } else { + BUG(); + } +} + +int +HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) +{ + if (cmd == GNTTABOP_map_grant_ref) { + unsigned int i; + for (i = 0; i < count; i++) { + gnttab_map_grant_ref_pre( + (struct gnttab_map_grant_ref*)uop + i); + } + } + + return ____HYPERVISOR_grant_table_op(cmd, uop, count); +} + + +/////////////////////////////////////////////////////////////////////////// +// PageForeign(), SetPageForeign(), ClearPageForeign() + +struct address_space xen_ia64_foreign_dummy_mapping; + +/////////////////////////////////////////////////////////////////////////// +// foreign mapping + +struct xen_ia64_privcmd_entry { + atomic_t map_count; + struct page* page; +}; + +static void +xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry) +{ + atomic_set(&entry->map_count, 0); + entry->page = NULL; +} + +//TODO alloc_page() to allocate pseudo physical address space is +// waste of memory. +// When vti domain is created, qemu maps all of vti domain pages which +// reaches to several hundred megabytes at least. +// remove alloc_page(). +static int +xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma, + unsigned long addr, + struct xen_ia64_privcmd_entry* entry, + unsigned long mfn, + pgprot_t prot, + domid_t domid) +{ + int error = 0; + struct page* page; + unsigned long gpfn; + + BUG_ON((addr & ~PAGE_MASK) != 0); + BUG_ON(mfn == INVALID_MFN); + + if (entry->page != NULL) { + error = -EBUSY; + goto out; + } + page = alloc_page(GFP_KERNEL); + if (page == NULL) { + error = -ENOMEM; + goto out; + } + gpfn = page_to_pfn(page); + + error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */, + domid); + if (error != 0) { + goto out; + } + + prot = vma->vm_page_prot; + error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot); + if (error != 0) { + (void)HYPERVISOR_zap_physmap(gpfn, 0); + error = HYPERVISOR_populate_physmap(gpfn, 0, 0); + if (error) { + BUG();//XXX + } + __free_page(page); + } else { + atomic_inc(&entry->map_count); + entry->page = page; + } + +out: + return error; +} + +static void +xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry) +{ + struct page* page = entry->page; + unsigned long gpfn = page_to_pfn(page); + int error; + + error = HYPERVISOR_zap_physmap(gpfn, 0); + if (error) { + BUG();//XXX + } + + error = HYPERVISOR_populate_physmap(gpfn, 0, 0); + if (error) { + BUG();//XXX + } + + entry->page = NULL; + __free_page(page); +} + +static int +xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry) +{ + if (entry->page != NULL) { + atomic_inc(&entry->map_count); + } else { + BUG_ON(atomic_read(&entry->map_count) != 0); + } +} + +static int +xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry) +{ + if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) { + xen_ia64_privcmd_entry_munmap(entry); + } +} + +struct xen_ia64_privcmd_range { + atomic_t ref_count; + unsigned long pgoff; // in PAGE_SIZE + + unsigned long num_entries; + struct xen_ia64_privcmd_entry entries[0]; +}; + +struct xen_ia64_privcmd_vma { + struct xen_ia64_privcmd_range* range; + + unsigned long num_entries; + struct xen_ia64_privcmd_entry* entries; +}; + +static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma); +static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma); + +struct vm_operations_struct xen_ia64_privcmd_vm_ops = { + .open = &xen_ia64_privcmd_vma_open, + .close = &xen_ia64_privcmd_vma_close, +}; + +static void +__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma, + struct xen_ia64_privcmd_vma* privcmd_vma, + struct xen_ia64_privcmd_range* privcmd_range) +{ + unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff; + unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long i; + + BUG_ON(entry_offset < 0); + BUG_ON(entry_offset + num_entries > privcmd_range->num_entries); + + privcmd_vma->range = privcmd_range; + privcmd_vma->num_entries = num_entries; + privcmd_vma->entries = &privcmd_range->entries[entry_offset]; + vma->vm_private_data = privcmd_vma; + for (i = 0; i < privcmd_vma->num_entries; i++) { + xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]); + } + + vma->vm_private_data = privcmd_vma; + vma->vm_ops = &xen_ia64_privcmd_vm_ops; +} + +static void +xen_ia64_privcmd_vma_open(struct vm_area_struct* vma) +{ + struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; + struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; + + atomic_inc(&privcmd_range->ref_count); + // vm_op->open() can't fail. + privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL); + + __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range); +} + +static void +xen_ia64_privcmd_vma_close(struct vm_area_struct* vma) +{ + struct xen_ia64_privcmd_vma* privcmd_vma = + (struct xen_ia64_privcmd_vma*)vma->vm_private_data; + struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; + unsigned long i; + + for (i = 0; i < privcmd_vma->num_entries; i++) { + xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]); + } + vma->vm_private_data = NULL; + kfree(privcmd_vma); + + if (atomic_dec_and_test(&privcmd_range->ref_count)) { +#if 1 + for (i = 0; i < privcmd_range->num_entries; i++) { + struct xen_ia64_privcmd_entry* entry = + &privcmd_range->entries[i]; + BUG_ON(atomic_read(&entry->map_count) != 0); + BUG_ON(entry->page != NULL); + } +#endif + vfree(privcmd_range); + } +} + +int +privcmd_mmap(struct file * file, struct vm_area_struct * vma) +{ + unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + struct xen_ia64_privcmd_range* privcmd_range; + struct xen_ia64_privcmd_vma* privcmd_vma; + unsigned long i; + BUG_ON(!running_on_xen); + + BUG_ON(file->private_data != NULL); + privcmd_range = + vmalloc(sizeof(*privcmd_range) + + sizeof(privcmd_range->entries[0]) * num_entries); + if (privcmd_range == NULL) { + goto out_enomem0; + } + privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL); + if (privcmd_vma == NULL) { + goto out_enomem1; + } + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; + + atomic_set(&privcmd_range->ref_count, 1); + privcmd_range->pgoff = vma->vm_pgoff; + privcmd_range->num_entries = num_entries; + for (i = 0; i < privcmd_range->num_entries; i++) { + xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]); + } + + __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range); + return 0; + +out_enomem1: + kfree(privcmd_vma); +out_enomem0: + vfree(privcmd_range); + return -ENOMEM; +} + +int +direct_remap_pfn_range(struct vm_area_struct *vma, + unsigned long address, // process virtual address + unsigned long mfn, // mfn, mfn + 1, ... mfn + size/PAGE_SIZE + unsigned long size, + pgprot_t prot, + domid_t domid) // target domain +{ + struct xen_ia64_privcmd_vma* privcmd_vma = + (struct xen_ia64_privcmd_vma*)vma->vm_private_data; + unsigned long i; + unsigned long offset; + int error = 0; + BUG_ON(!running_on_xen); + +#if 0 + if (prot != vm->vm_page_prot) { + return -EINVAL; + } +#endif + + i = (address - vma->vm_start) >> PAGE_SHIFT; + for (offset = 0; offset < size; offset += PAGE_SIZE) { + struct xen_ia64_privcmd_entry* entry = + &privcmd_vma->entries[i]; + error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, entry, mfn, prot, domid); + if (error != 0) { + break; + } + + i++; + mfn++; + } + + return error; +} + diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Tue May 30 14:30:34 2006 -0500 @@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to) mov r8=1 ;; st4 [r27]=r8 // psr.ic back on - ;; -#else -(p6) ssm psr.ic // if we had to map, reenable the psr.ic bit FIRST!!! - ;; -(p6) srlz.d +#else ld8 sp=[r21] // load kernel stack pointer of new task mov IA64_KR(CURRENT)=in0 // update "current" application register #endif @@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to) #endif ;; itr.d dtr[r25]=r23 // wire in new mapping... +#ifndef CONFIG_XEN + ssm psr.ic // reenable the psr.ic bit + ;; + srlz.d +#endif br.cond.sptk .done #ifdef CONFIG_XEN END(xen_switch_to) @@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall) .mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 .mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: br.cond.sptk .work_pending_syscall_end +.ret3: +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk + br.cond.sptk .work_pending_syscall_end strace_error: ld8 r3=[r2] // load pt_regs.r8 @@ -246,7 +249,7 @@ END(ia64_trace_syscall) * r8-r11: restored (syscall return value(s)) * r12: restored (user-level stack pointer) * r13: restored (user-level thread pointer) - * r14: cleared + * r14: set to __kernel_syscall_via_epc * r15: restored (syscall #) * r16-r17: cleared * r18: user-level b6 @@ -267,7 +270,7 @@ END(ia64_trace_syscall) * pr: restored (user-level pr) * b0: restored (user-level rp) * b6: restored - * b7: cleared + * b7: set to __kernel_syscall_via_epc * ar.unat: restored (user-level ar.unat) * ar.pfs: restored (user-level ar.pfs) * ar.rsc: restored (user-level ar.rsc) @@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall) ;; (p6) ld4 r31=[r18] // load current_thread_info()->flags ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - mov b7=r0 // clear b7 - ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) + nop.i 0 + ;; + mov r16=ar.bsp // M2 get existing backing store pointer ld8 r18=[r2],PT(R9)-PT(B6) // load b6 (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? ;; - mov r16=ar.bsp // M2 get existing backing store pointer + ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? (p6) br.cond.spnt .work_pending_syscall ;; // start restoring the state saved on the kernel stack (struct pt_regs): ld8 r9=[r2],PT(CR_IPSR)-PT(R9) ld8 r11=[r3],PT(CR_IIP)-PT(R11) - mov f6=f0 // clear f6 +(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! ;; invala // M0|1 invalidate ALAT #ifdef CONFIG_XEN @@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall) st4 [r29]=r0 // note: clears both vpsr.i and vpsr.ic! ;; #else - rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection -#endif - mov f9=f0 // clear f9 - - ld8 r29=[r2],16 // load cr.ipsr - ld8 r28=[r3],16 // load cr.iip - mov f8=f0 // clear f8 + rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection +#endif + cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs + + ld8 r29=[r2],16 // M0|1 load cr.ipsr + ld8 r28=[r3],16 // M0|1 load cr.iip + mov r22=r0 // A clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs - mov.m ar.ssd=r0 // M2 clear ar.ssd - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; ld8 r25=[r3],16 // M0|1 load ar.unat - mov.m ar.csd=r0 // M2 clear ar.csd - mov r22=r0 // clear r22 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled - mov f10=f0 // clear f10 - ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc - mov f11=f0 // clear f11 - ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // load predicates -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr - ld8.fill r1=[r3],16 // load r1 -(pUStk) mov r17=1 - ;; - srlz.d // M0 ensure interruption collection is off - ld8.fill r13=[r3],16 - mov f7=f0 // clear f7 - ;; - ld8.fill r12=[r2] // restore r12 (sp) - ld8.fill r15=[r3] // restore r15 - addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; -(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 -(pUStk) st1 [r14]=r17 - mov b6=r18 // I0 restore b6 - ;; - mov r14=r0 // clear r14 - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition -(pKStk) br.cond.dpnt.many skip_rbs_switch - - mov.m ar.ccv=r0 // clear ar.ccv -(pNonSys) br.cond.dpnt.many dont_preserve_current_frame - br.cond.sptk.many rbs_switch +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + nop 0 + ;; + ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 + ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc + mov f6=f0 // F clear f6 + ;; + ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) + ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates + mov f7=f0 // F clear f7 + ;; + ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr + ld8.fill r1=[r3],16 // M0|1 load r1 +(pUStk) mov r17=1 // A + ;; +(pUStk) st1 [r14]=r17 // M2|3 + ld8.fill r13=[r3],16 // M0|1 + mov f8=f0 // F clear f8 + ;; + ld8.fill r12=[r2] // M0|1 restore r12 (sp) + ld8.fill r15=[r3] // M0|1 restore r15 + mov b6=r18 // I0 restore b6 + + addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A + mov f9=f0 // F clear f9 +(pKStk) br.cond.dpnt.many skip_rbs_switch // B + + srlz.d // M0 ensure interruption collection is off (for cover) + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition +#ifdef CONFIG_XEN + XEN_HYPER_COVER; +#else + cover // B add current frame into dirty partition & set cr.ifs +#endif + ;; +(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 + mov r19=ar.bsp // M2 get new backing store pointer + mov f10=f0 // F clear f10 + + nop.m 0 + movl r14=__kernel_syscall_via_epc // X + ;; + mov.m ar.csd=r0 // M2 clear ar.csd + mov.m ar.ccv=r0 // M2 clear ar.ccv + mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) + + mov.m ar.ssd=r0 // M2 clear ar.ssd + mov f11=f0 // F clear f11 + br.cond.sptk.many rbs_switch // B #ifdef CONFIG_XEN END(xen_leave_syscall) #else @@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) ldf.fill f7=[r2],PT(F11)-PT(F7) ldf.fill f8=[r3],32 ;; - srlz.i // ensure interruption collection is off + srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) mov ar.ccv=r15 ;; ldf.fill f11=[r2] @@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel) movl r2=XSI_BANK1_R16 movl r3=XSI_BANK1_R16+8 ;; - st8.spill [r2]=r16,16 - st8.spill [r3]=r17,16 - ;; - st8.spill [r2]=r18,16 - st8.spill [r3]=r19,16 - ;; - st8.spill [r2]=r20,16 - st8.spill [r3]=r21,16 - ;; - st8.spill [r2]=r22,16 - st8.spill [r3]=r23,16 - ;; - st8.spill [r2]=r24,16 - st8.spill [r3]=r25,16 - ;; - st8.spill [r2]=r26,16 - st8.spill [r3]=r27,16 - ;; - st8.spill [r2]=r28,16 - st8.spill [r3]=r29,16 - ;; - st8.spill [r2]=r30,16 - st8.spill [r3]=r31,16 +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,16 ;; movl r2=XSI_BANKNUM;; st4 [r2]=r0; @@ -641,14 +655,14 @@ GLOBAL_ENTRY(ia64_leave_kernel) */ (pNonSys) br.cond.dpnt dont_preserve_current_frame +#ifdef CONFIG_XEN + XEN_HYPER_COVER; +#else + cover // add current frame into dirty partition and set cr.ifs +#endif + ;; + mov r19=ar.bsp // get new backing store pointer rbs_switch: -#ifdef CONFIG_XEN - XEN_HYPER_COVER; -#else - cover // add current frame into dirty partition and set cr.ifs -#endif - ;; - mov r19=ar.bsp // get new backing store pointer sub r16=r16,r18 // krbs = old bsp - size of dirty partition cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs ;; @@ -723,14 +737,14 @@ rse_clear_invalid: mov loc5=0 mov loc6=0 mov loc7=0 -(pRecurse) br.call.sptk.few b0=rse_clear_invalid +(pRecurse) br.call.dptk.few b0=rse_clear_invalid ;; mov loc8=0 mov loc9=0 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret mov loc10=0 mov loc11=0 -(pReturn) br.ret.sptk.many b0 +(pReturn) br.ret.dptk.many b0 #endif /* !CONFIG_ITANIUM */ # undef pRecurse # undef pReturn diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Tue May 30 14:30:34 2006 -0500 @@ -87,16 +87,17 @@ ENTRY(vhpt_miss) * (the "original") TLB miss, which may either be caused by an instruction * fetch or a data access (or non-access). * - * What we do here is normal TLB miss handing for the _original_ miss, followed - * by inserting the TLB entry for the virtual page table page that the VHPT - * walker was attempting to access. The latter gets inserted as long - * as both L1 and L2 have valid mappings for the faulting address. - * The TLB entry for the original miss gets inserted only if - * the L3 entry indicates that the page is present. + * What we do here is normal TLB miss handing for the _original_ miss, + * followed by inserting the TLB entry for the virtual page table page + * that the VHPT walker was attempting to access. The latter gets + * inserted as long as page table entry above pte level have valid + * mappings for the faulting address. The TLB entry for the original + * miss gets inserted only if the pte entry indicates that the page is + * present. * * do_page_fault gets invoked in the following cases: * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no L1, L2, or L3 mapping + * - the faulting virtual address has no valid page table mapping */ #ifdef CONFIG_XEN movl r16=XSI_IFA @@ -127,7 +128,7 @@ ENTRY(vhpt_miss) shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 ;; - shr r22=r21,3 + shr.u r22=r21,3 #ifdef CONFIG_HUGETLB_PAGE extr.u r26=r25,2,6 ;; @@ -139,7 +140,7 @@ ENTRY(vhpt_miss) #endif ;; cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address + shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit ;; (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place @@ -150,41 +151,54 @@ ENTRY(vhpt_miss) (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r22,PMD_SHIFT // shift L2 index into position - ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry - ;; -#ifdef CONFIG_XEN -(p7) ld8 r18=[r21] // read the L3 PTE +#ifdef CONFIG_PGTABLE_4 + shr.u r28=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? +#ifdef CONFIG_PGTABLE_4 + dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) + ;; + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +(p7) ld8 r29=[r28] // get *pud (may be 0) + ;; +(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) +#else + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) +#endif + ;; +(p7) ld8 r20=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) + ;; +(p7) ld8 r18=[r21] // read *pte +#ifdef CONFIG_XEN movl r19=XSI_ISR ;; ld8 r19=[r19] +#else + mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss +#endif ;; (p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? +#ifdef CONFIG_XEN movl r22=XSI_IHA ;; ld8 r22=[r22] - ;; -#else -(p7) ld8 r18=[r21] // read the L3 PTE - mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss - ;; -(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? +#else mov r22=cr.iha // get the VHPT address that caused the TLB miss +#endif ;; // avoid RAW on p7 -#endif (p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address ;; @@ -198,16 +212,17 @@ ENTRY(vhpt_miss) ;; mov r8=r24 ;; -(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - ;; - movl r24=XSI_IFA - ;; - st8 [r24]=r22 - ;; #else (p10) itc.i r18 // insert the instruction TLB entry (p11) itc.d r18 // insert the data TLB entry +#endif (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) +#ifdef CONFIG_XEN + movl r24=XSI_IFA + ;; + st8 [r24]=r22 + ;; +#else mov cr.ifa=r22 #endif @@ -242,25 +257,41 @@ ENTRY(vhpt_miss) dv_serialize_data /* - * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g + * Re-check pagetable entry. If they changed, we may have received a ptc.g * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. - */ - ld8 r25=[r21] // read L3 PTE again - ld8 r26=[r17] // read L2 entry again - ;; - cmp.ne p6,p7=r26,r20 // did L2 entry change + * inserted and retry. At this point, we have: + * + * r28 = equivalent of pud_offset(pgd, ifa) + * r17 = equivalent of pmd_offset(pud, ifa) + * r21 = equivalent of pte_offset(pmd, ifa) + * + * r29 = *pud + * r20 = *pmd + * r18 = *pte + */ + ld8 r25=[r21] // read *pte again + ld8 r26=[r17] // read *pmd again +#ifdef CONFIG_PGTABLE_4 + ld8 r19=[r28] // read *pud again +#endif + cmp.ne p6,p7=r0,r0 + ;; + cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change +#ifdef CONFIG_PGTABLE_4 + cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change +#endif mov r27=PAGE_SHIFT<<2 ;; (p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change ;; (p6) ptc.l r16,r27 // purge translation #endif mov pr=r31,-1 // restore predicate registers #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -272,10 +303,10 @@ ENTRY(itlb_miss) ENTRY(itlb_miss) DBG_FAULT(1) /* - * The ITLB handler accesses the L3 PTE via the virtually mapped linear + * The ITLB handler accesses the PTE via the virtually mapped linear * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. */ #ifdef CONFIG_XEN movl r16=XSI_IFA @@ -292,11 +323,11 @@ ENTRY(itlb_miss) ;; ld8 r17=[r17] // get virtual address of L3 PTE #else - mov r17=cr.iha // get virtual address of L3 PTE + mov r17=cr.iha // get virtual address of PTE #endif movl r30=1f // load nested fault continuation point ;; -1: ld8 r18=[r17] // read L3 PTE +1: ld8 r18=[r17] // read *pte ;; mov b0=r29 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? @@ -320,7 +351,7 @@ 1: ld8 r18=[r17] // read L3 PTE */ dv_serialize_data - ld8 r19=[r17] // read L3 PTE again and see if same + ld8 r19=[r17] // read *pte again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; cmp.ne p7,p0=r18,r19 @@ -329,7 +360,8 @@ 1: ld8 r18=[r17] // read L3 PTE #endif mov pr=r31,-1 #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -341,10 +373,10 @@ ENTRY(dtlb_miss) ENTRY(dtlb_miss) DBG_FAULT(2) /* - * The DTLB handler accesses the L3 PTE via the virtually mapped linear + * The DTLB handler accesses the PTE via the virtually mapped linear * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. */ #ifdef CONFIG_XEN movl r16=XSI_IFA @@ -361,11 +393,11 @@ dtlb_fault: ;; ld8 r17=[r17] // get virtual address of L3 PTE #else - mov r17=cr.iha // get virtual address of L3 PTE + mov r17=cr.iha // get virtual address of PTE #endif movl r30=1f // load nested fault continuation point ;; -1: ld8 r18=[r17] // read L3 PTE +1: ld8 r18=[r17] // read *pte ;; mov b0=r29 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? @@ -390,7 +422,7 @@ 1: ld8 r18=[r17] // read L3 PTE */ dv_serialize_data - ld8 r19=[r17] // read L3 PTE again and see if same + ld8 r19=[r17] // read *pte again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; cmp.ne p7,p0=r18,r19 @@ -399,7 +431,8 @@ 1: ld8 r18=[r17] // read L3 PTE #endif mov pr=r31,-1 #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss) ld8 r21=[r31],XSI_IFA-XSI_IPSR // get ipsr, point to ifa movl r17=PAGE_KERNEL ;; - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; ld8 r16=[r31] // get ifa - mov r31=pr - ;; #else mov r16=cr.ifa // get address that caused the TLB miss movl r17=PAGE_KERNEL mov r21=cr.ipsr +#endif movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r31=pr ;; -#endif #ifdef CONFIG_DISABLE_VHPT shr.u r22=r16,61 // get the region number into r21 ;; @@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) ;; ld8 r16=[r31] // get ifa - mov r31=pr - ;; #else mov r16=cr.ifa // get address that caused the TLB miss movl r17=PAGE_KERNEL mov r20=cr.isr movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r21=cr.ipsr +#endif mov r31=pr ;; -#endif #ifdef CONFIG_DISABLE_VHPT shr.u r22=r16,61 // get the region number into r21 ;; @@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss) * r30: continuation address * r31: saved pr * - * Output: r17: physical address of L3 PTE of faulting address + * Output: r17: physical address of PTE of faulting address * r29: saved b0 * r30: continuation address * r31: saved pr * - * Clobbered: b0, r18, r19, r21, psr.dt (cleared) + * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) */ #ifdef CONFIG_XEN XEN_HYPER_RSM_PSR_DT; @@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss) #endif mov r19=IA64_KR(PT_BASE) // get the page table base address shl r21=r16,3 // shift bit 60 into sign bit +#ifdef CONFIG_XEN + movl r18=XSI_ITIR + ;; + ld8 r18=[r18] +#else + mov r18=cr.itir +#endif ;; shr.u r17=r16,61 // get the region number into r17 + extr.u r18=r18,2,6 // get the faulting page size ;; cmp.eq p6,p7=5,r17 // is faulting address in region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address - ;; + add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address + add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 + ;; + shr.u r22=r16,r22 + shr.u r18=r16,r18 (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place srlz.d @@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss) (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position - ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry +#ifdef CONFIG_PGTABLE_4 + shr.u r18=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) + ;; +#ifdef CONFIG_PGTABLE_4 +(p7) ld8 r17=[r17] // get *pud (may be 0) + shr.u r18=r22,PMD_SHIFT // shift pmd index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) + ;; +#endif +(p7) ld8 r17=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); (p6) br.cond.spnt page_fault mov b0=r30 br.sptk.many b0 // return to continuation point @@ -626,7 +676,7 @@ END(ikey_miss) // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) ENTRY(page_fault) #ifdef CONFIG_XEN - XEN_HYPER_SSM_PSR_DT; + XEN_HYPER_SSM_PSR_DT #else ssm psr.dt ;; @@ -742,11 +792,12 @@ 1: ld8 r18=[r17] ;; // avoid RAW on r18 mov ar.ccv=r18 // set compare value for cmpxchg or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present mov r24=PAGE_SHIFT<<2 ;; - cmp.eq p6,p7=r26,r18 +(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present ;; (p6) itc.d r25 // install updated PTE ;; @@ -775,7 +826,8 @@ 1: ld8 r18=[r17] #endif mov pr=r31,-1 // restore pr #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -826,11 +878,12 @@ 1: ld8 r18=[r17] ;; mov ar.ccv=r18 // set compare value for cmpxchg or r25=_PAGE_A,r18 // set the accessed bit - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present mov r24=PAGE_SHIFT<<2 ;; - cmp.eq p6,p7=r26,r18 +(p6) cmp.eq p6,p7=r26,r18 // Only if page present ;; #ifdef CONFIG_XEN mov r26=r8 @@ -869,7 +922,8 @@ 1: ld8 r18=[r17] #endif /* !CONFIG_SMP */ mov pr=r31,-1 #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -892,11 +946,13 @@ ENTRY(daccess_bit) movl r30=1f // load continuation point in case of nested fault ;; #ifdef CONFIG_XEN - mov r18=r8; - mov r8=r16; - XEN_HYPER_THASH;; - mov r17=r8; - mov r8=r18;; + mov r18=r8 + mov r8=r16 + XEN_HYPER_THASH + ;; + mov r17=r8 + mov r8=r18 + ;; #else thash r17=r16 // compute virtual address of L3 PTE #endif @@ -909,11 +965,12 @@ 1: ld8 r18=[r17] ;; // avoid RAW on r18 mov ar.ccv=r18 // set compare value for cmpxchg or r25=_PAGE_A,r18 // set the dirty bit - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present mov r24=PAGE_SHIFT<<2 ;; - cmp.eq p6,p7=r26,r18 +(p6) cmp.eq p6,p7=r26,r18 // Only if page is present ;; #ifdef CONFIG_XEN mov r26=r8 @@ -950,7 +1007,8 @@ 1: ld8 r18=[r17] mov b0=r29 // restore b0 mov pr=r31,-1 #ifdef CONFIG_XEN - XEN_HYPER_RFI; + XEN_HYPER_RFI + dv_serialize_data #else rfi #endif @@ -976,143 +1034,157 @@ ENTRY(break_fault) * to prevent leaking bits from kernel to user level. */ DBG_FAULT(11) - mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. -#ifdef CONFIG_XEN - movl r31=XSI_IPSR - ;; - ld8 r29=[r31],XSI_IIP-XSI_IPSR // get ipsr, point to iip - mov r18=__IA64_BREAK_SYSCALL - mov r21=ar.fpsr - ;; - ld8 r28=[r31],XSI_IIM-XSI_IIP // get iip, point to iim - mov r19=b6 - mov r25=ar.unat - ;; - ld8 r17=[r31] // get iim - mov r27=ar.rsc - mov r26=ar.pfs - ;; -#else - mov r17=cr.iim - mov r18=__IA64_BREAK_SYSCALL - mov r21=ar.fpsr - mov r29=cr.ipsr - mov r19=b6 - mov r25=ar.unat - mov r27=ar.rsc - mov r26=ar.pfs - mov r28=cr.iip -#endif - mov r31=pr // prepare to save predicates - mov r20=r1 - ;; + mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) +#ifdef CONFIG_XEN + movl r22=XSI_IPSR + ;; + ld8 r29=[r22],XSI_IIM-XSI_IPSR // get ipsr, point to iip +#else + mov r29=cr.ipsr // M2 (12 cyc) +#endif + mov r31=pr // I0 (2 cyc) + +#ifdef CONFIG_XEN + ;; + ld8 r17=[r22],XSI_IIP-XSI_IIM +#else + mov r17=cr.iim // M2 (2 cyc) +#endif + mov.m r27=ar.rsc // M2 (12 cyc) + mov r18=__IA64_BREAK_SYSCALL // A + + mov.m ar.rsc=0 // M2 + mov.m r21=ar.fpsr // M2 (12 cyc) + mov r19=b6 // I0 (2 cyc) + ;; + mov.m r23=ar.bspstore // M2 (12 cyc) + mov.m r24=ar.rnat // M2 (5 cyc) + mov.i r26=ar.pfs // I0 (2 cyc) + + invala // M0|1 + nop.m 0 // M + mov r20=r1 // A save r1 + + nop.m 0 + movl r30=sys_call_table // X + +#ifdef CONFIG_XEN + ld8 r28=[r22] +#else + mov r28=cr.iip // M2 (2 cyc) +#endif + cmp.eq p0,p7=r18,r17 // I0 is this a system call? +(p7) br.cond.spnt non_syscall // B no -> + // + // From this point on, we are definitely on the syscall-path + // and we can use (non-banked) scratch registers. + // +/////////////////////////////////////////////////////////////////////// + mov r1=r16 // A move task-pointer to "addl"-addressable reg + mov r2=r16 // A setup r2 for ia64_syscall_setup + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) -(p7) br.cond.spnt non_syscall - ;; - ld1 r17=[r16] // load current->thread.on_ustack flag - st1 [r16]=r0 // clear current->thread.on_ustack flag - add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT - ;; - invala - - /* adjust return address so we skip over the break instruction: */ - - extr.u r8=r29,41,2 // extract ei field from cr.ipsr - ;; - cmp.eq p6,p7=2,r8 // isr.ei==2? - mov r2=r1 // setup r2 for ia64_syscall_setup - ;; -(p6) mov r8=0 // clear ei to 0 -(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped -(p7) adds r8=1,r8 // increment ei to next slot - ;; - cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? - dep r29=r8,r29,41,2 // insert new ei into cr.ipsr - ;; - - // switch from user to kernel RBS: - MINSTATE_START_SAVE_MIN_VIRT - br.call.sptk.many b7=ia64_syscall_setup - ;; + adds r15=-1024,r15 // A subtract 1024 from syscall number + mov r3=NR_syscalls - 1 + ;; + ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag + ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags + extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr + + shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) + addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS + cmp.leu p6,p7=r15,r3 // A syscall number in range? + ;; + + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS +(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point + tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? + + mov.m ar.bspstore=r22 // M2 switch to kernel RBS + cmp.eq p8,p9=2,r8 // A isr.ei==2? + ;; + +(p8) mov r8=0 // A clear ei to 0 +(p7) movl r30=sys_ni_syscall // X + +(p8) adds r28=16,r28 // A switch cr.iip to next bundle +(p9) adds r8=1,r8 // A increment ei to next slot + nop.i 0 + ;; + + mov.m r25=ar.unat // M2 (5 cyc) + dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr + adds r15=1024,r15 // A restore original syscall number + // + // If any of the above loads miss in L1D, we'll stall here until + // the data arrives. + // +/////////////////////////////////////////////////////////////////////// + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + mov b6=r30 // I0 setup syscall handler branch reg early + cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? + + and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit + mov r18=ar.bsp // M2 (12 cyc) +(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS + ;; +.back_from_break_fixup: +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack + cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? + br.call.sptk.many b7=ia64_syscall_setup // B +1: + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + nop 0 #ifdef CONFIG_XEN mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;; #else - MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 -#endif -#ifdef CONFIG_XEN - movl r3=XSI_PSR_IC - mov r16=1 - ;; -#if 1 - st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1 - ;; -(p15) ld8 r3=[r3] - ;; -(p15) st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1 - mov r16=r0 - ;; -(p15) ld4 r16=[r3] // if (pending_interrupts) - ;; - cmp.ne p6,p0=r16,r0 + bsw.1 // B (6 cyc) regs are saved, switch to bank 1 +#endif + ;; + +#ifdef CONFIG_XEN + movl r16=XSI_PSR_IC + mov r3=1 + ;; + st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1 +#else + ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection +#endif + movl r3=ia64_ret_from_syscall // X + ;; + + srlz.i // M0 ensure interruption collection is on + mov rp=r3 // I0 set the real return addr +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + +#ifdef CONFIG_XEN +(p15) ld8 r16=[r16] // vpsr.i + ;; +(p15) st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1 + mov r2=r0 + ;; +(p15) ld4 r2=[r16] // if (pending_interrupts) + ;; + cmp.ne p6,p0=r2,r0 ;; (p6) ssm psr.i // do a real ssm psr.i - ;; -#else -// st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1 - adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3 // SKIP vpsr.ic = 1 - ;; -(p15) ld8 r3=[r3] - ;; -(p15) st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1 - mov r16=r0 - ;; -(p15) ld4 r16=[r3] // if (pending_interrupts) - ;; - cmp.ne p6,p0=r16,r0 - ;; -//(p6) ssm psr.i // do a real ssm psr.i -//(p6) XEN_HYPER_SSM_I; -(p6) break 0x7; - ;; -#endif - mov r3=NR_syscalls - 1 - ;; -#else - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - mov r3=NR_syscalls - 1 - ;; -(p15) ssm psr.i // restore psr.i -#endif - // p10==true means out registers are more than 8 or r15's Nat is true -(p10) br.cond.spnt.many ia64_ret_from_syscall - ;; - movl r16=sys_call_table - - adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 - movl r2=ia64_ret_from_syscall - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? - mov rp=r2 // set the real return addr - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - - add r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit - ;; - cmp.eq p8,p0=r2,r0 - mov b6=r20 - ;; -(p8) br.call.sptk.many b6=b6 // ignore this return addr - br.cond.sptk ia64_trace_syscall +#else +(p15) ssm psr.i // M2 restore psr.i +#endif +(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) + br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic // NOT REACHED +/////////////////////////////////////////////////////////////////////// + // On entry, we optimistically assumed that we're coming from user-space. + // For the rare cases where a system-call is done from within the kernel, + // we fix things up at this point: +.break_fixup: + add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure + mov ar.rnat=r24 // M2 restore kernel's AR.RNAT + ;; + mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE + br.cond.sptk .back_from_break_fixup END(break_fault) .org ia64_ivt+0x3000 @@ -1201,8 +1273,6 @@ END(interrupt) * - r31: saved pr * - b0: original contents (to be saved) * On exit: - * - executing on bank 1 registers - * - psr.ic enabled, interrupts restored * - p10: TRUE if syscall is invoked with more than 8 out * registers or r15's Nat is true * - r1: kernel's gp @@ -1210,8 +1280,11 @@ END(interrupt) * - r8: -EINVAL if p10 is true * - r12: points to kernel stack * - r13: points to current task + * - r14: preserved (same as on entry) + * - p13: preserved * - p15: TRUE if interrupts need to be re-enabled * - ar.fpsr: set to kernel settings + * - b6: preserved (same as on entry) */ #ifndef CONFIG_XEN GLOBAL_ENTRY(ia64_syscall_setup) @@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup) (p13) mov in5=-1 ;; st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p14,p0=in6 + tnat.nz p13,p0=in6 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 ;; - stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) + mov r8=1 (p9) tnat.nz p10,p0=r15 adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) @@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup) mov r13=r2 // establish `current' movl r1=__gp // establish kernel global pointer ;; -(p14) mov in6=-1 + st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) +(p13) mov in6=-1 (p8) mov in7=-1 - nop.i 0 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 movl r17=FPSR_DEFAULT @@ -1323,6 +1396,8 @@ END(ia64_syscall_setup) * element, followed by the arguments. */ ENTRY(dispatch_illegal_op_fault) + .prologue + .body SAVE_MIN_WITH_COVER ssm psr.ic | PSR_DEFAULT_BITS ;; @@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault) mov out0=ar.ec ;; SAVE_REST + PT_REGS_UNWIND_INFO(0) ;; br.call.sptk.many rp=ia64_illegal_op_fault .ret0: ;; @@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault) FAULT(17) ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; SAVE_MIN_WITH_COVER // There is no particular reason for this code to be here, other than that @@ -1540,7 +1618,7 @@ ENTRY(daccess_rights) ;; ld8 r16=[r16] ;; - XEN_HYPER_RSM_PSR_DT; + XEN_HYPER_RSM_PSR_DT #else mov r16=cr.ifa rsm psr.dt @@ -1584,6 +1662,25 @@ END(disabled_fp_reg) // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) ENTRY(nat_consumption) DBG_FAULT(26) + + mov r16=cr.ipsr + mov r17=cr.isr + mov r31=pr // save PR + ;; + and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} + tbit.z p6,p0=r17,IA64_ISR_NA_BIT + ;; + cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 + dep r16=-1,r16,IA64_PSR_ED_BIT,1 +(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) + ;; + mov cr.ipsr=r16 // set cr.ipsr.na + mov pr=r31,-1 + ;; + rfi + +1: mov pr=r31,-1 + ;; FAULT(26) END(nat_consumption) @@ -1624,7 +1721,7 @@ ENTRY(speculation_vector) #ifdef CONFIG_XEN XEN_HYPER_RFI; #else - rfi + rfi // and go back #endif END(speculation_vector) @@ -1647,7 +1744,6 @@ END(debug_vector) // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) ENTRY(unaligned_access) DBG_FAULT(30) - mov r16=cr.ipsr mov r31=pr // prepare to save predicates ;; br.sptk.many dispatch_unaligned_handler diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Tue May 30 14:30:34 2006 -0500 @@ -155,6 +155,8 @@ ;; \ ld4 r30=[r8]; \ ;; \ + /* set XSI_INCOMPL_REGFR 0 */ \ + st4 [r8]=r0; \ cmp.eq p6,p7=r30,r0; \ ;; /* not sure if this stop bit is necessary */ \ (p6) adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8; \ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Tue May 30 14:30:34 2006 -0500 @@ -8,28 +8,17 @@ #include <asm/processor.h> #include <asm/asmmacro.h> - .data - .align 8 - .globl running_on_xen -running_on_xen: - data4 0 - #define isBP p3 // are we the Bootstrap Processor? .text GLOBAL_ENTRY(early_xen_setup) - mov r8=cr.dcr + mov r8=ar.rsc // Initialized in head.S (isBP) movl r9=running_on_xen;; - extr.u r8=r8,63,1;; - cmp.ne p7,p0=r8,r0;; + extr.u r8=r8,2,2;; // Extract pl fields + cmp.ne p7,p0=r8,r0;; // p7: running on xen +(p7) mov r8=1 // booleanize. +(p7) movl r10=xen_ivt;; (isBP) st4 [r9]=r8 -(p7) movl r10=xen_ivt;; (p7) mov cr.iva=r10 br.ret.sptk.many rp;; END(early_xen_setup) - -GLOBAL_ENTRY(is_running_on_xen) - movl r9=running_on_xen;; - ld4 r8=[r9] - br.ret.sptk.many rp;; -END(is_running_on_xen) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Tue May 30 14:30:34 2006 -0500 @@ -26,53 +26,34 @@ #include <asm/sections.h> #include <xen/interface/memory.h> -unsigned long pci_mem_start = 0xaeedbabe; - /* * PFN of last memory page. */ unsigned long end_pfn; EXPORT_SYMBOL(end_pfn); -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; -unsigned long end_pfn_map; - -/* - * Add a memory region to the kernel e820 map. - */ -void __init add_memory_region(unsigned long start, unsigned long size, int type) -{ - int x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} - -#ifndef CONFIG_XEN /* * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. * The direct mapping extends to end_pfn_map, so that we can directly access * apertures, ACPI and other tables without having to play with fixmaps. */ +unsigned long end_pfn_map; /* * Last pfn which the user wants to use. */ - +unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; + +#ifndef CONFIG_XEN extern struct resource code_resource, data_resource; +#endif /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) { unsigned long addr = *addrp, last = addr + size; +#ifndef CONFIG_XEN /* various gunk below that needed for SMP startup */ if (addr < 0x8000) { *addrp = 0x8000; @@ -100,9 +81,16 @@ static inline int bad_addr(unsigned long return 1; } /* XXX ramdisk image here? */ +#else + if (last < (table_end<<PAGE_SHIFT)) { + *addrp = table_end << PAGE_SHIFT; + return 1; + } +#endif return 0; } +#ifndef CONFIG_XEN int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) { int i; @@ -116,6 +104,7 @@ int __init e820_mapped(unsigned long sta } return 0; } +#endif /* * Find a free area in a specific range. @@ -246,22 +235,23 @@ e820_hole_size(unsigned long start_pfn, /* * Mark e820 reserved areas as busy for the resource manager. */ -void __init e820_reserve_resources(void) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { +void __init e820_reserve_resources(struct e820entry *e820, int nr_map) +{ + int i; + for (i = 0; i < nr_map; i++) { struct resource *res; res = alloc_bootmem_low(sizeof(struct resource)); - switch (e820.map[i].type) { + switch (e820[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; default: res->name = "reserved"; } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; + res->start = e820[i].addr; + res->end = res->start + e820[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); +#ifndef CONFIG_XEN if (e820.map[i].type == E820_RAM) { /* * We don't know which RAM region contains kernel data, @@ -274,9 +264,27 @@ void __init e820_reserve_resources(void) request_resource(res, &crashk_res); #endif } - } -} -#endif /* CONFIG_XEN */ +#endif + } +} + +/* + * Add a memory region to the kernel e820 map. + */ +void __init add_memory_region(unsigned long start, unsigned long size, int type) +{ + int x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; +} void __init e820_print_map(char *who) { @@ -304,7 +312,6 @@ void __init e820_print_map(char *who) } } -#ifndef CONFIG_XEN /* * Sanitize the BIOS e820 map. * @@ -491,9 +498,13 @@ static int __init sanitize_e820_map(stru */ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) { +#ifndef CONFIG_XEN /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) return -1; +#else + BUG_ON(nr_map < 1); +#endif do { unsigned long start = biosmap->addr; @@ -505,6 +516,7 @@ static int __init copy_e820_map(struct e if (start > end) return -1; +#ifndef CONFIG_XEN /* * Some BIOSes claim RAM in the 640k - 1M region. * Not right. Fix it up. @@ -523,12 +535,14 @@ static int __init copy_e820_map(struct e size = end - start; } } +#endif add_memory_region(start, size, type); } while (biosmap++,--nr_map); return 0; } +#ifndef CONFIG_XEN void __init setup_memory_region(void) { char *who = "BIOS-e820"; @@ -562,104 +576,63 @@ void __init setup_memory_region(void) #else /* CONFIG_XEN */ -extern unsigned long xen_override_max_pfn; -extern union xen_start_info_union xen_start_info_union; - -unsigned long __init e820_end_of_ram(void) -{ - unsigned long max_end_pfn; - - if (xen_override_max_pfn == 0) { - max_end_pfn = xen_start_info->nr_pages; - /* Default 8MB slack (to balance backend allocations). */ - max_end_pfn += 8 << (20 - PAGE_SHIFT); - } else if (xen_override_max_pfn > xen_start_info->nr_pages) { - max_end_pfn = xen_override_max_pfn; - } else { - max_end_pfn = xen_start_info->nr_pages; - } - - return max_end_pfn; -} - -unsigned long __init -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn) -{ - return 0; -} - -void __init e820_reserve_resources(void) -{ - dom0_op_t op; - struct dom0_memory_map_entry *map; - unsigned long gapstart, gapsize, round, last; - int i, found = 0; - - if (!(xen_start_info->flags & SIF_INITDOMAIN)) - return; - - map = alloc_bootmem_low_pages(PAGE_SIZE); - op.cmd = DOM0_PHYSICAL_MEMORY_MAP; - set_xen_guest_handle(op.u.physical_memory_map.memory_map, map); - op.u.physical_memory_map.max_map_entries = - PAGE_SIZE / sizeof(struct dom0_memory_map_entry); - BUG_ON(HYPERVISOR_dom0_op(&op)); - - last = 0x100000000ULL; - gapstart = 0x10000000; - gapsize = 0x400000; - - for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) { - struct resource *res; - - if ((last > map[i].end) && ((last - map[i].end) > gapsize)) { - gapsize = last - map[i].end; - gapstart = map[i].end; - found = 1; - } - if (map[i].start < last) - last = map[i].start; - - if (map[i].end > 0x100000000ULL) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); - res->name = map[i].is_ram ? "System RAM" : "reserved"; - res->start = map[i].start; - res->end = map[i].end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - request_resource(&iomem_resource, res); - } - - free_bootmem(__pa(map), PAGE_SIZE); - - if (!found) { - gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); - gapstart = (gapstart << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); - } - +void __init setup_memory_region(void) +{ + int rc; + struct xen_memory_map memmap; /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. + * This is rather large for a stack variable but this early in + * the boot process we know we have plenty slack space. */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} - + struct e820entry map[E820MAX]; + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, map); + + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if ( rc == -ENOSYS ) { + memmap.nr_entries = 1; + map[0].addr = 0ULL; + map[0].size = xen_start_info->nr_pages << PAGE_SHIFT; + /* 8MB slack (to balance backend allocations). */ + map[0].size += 8 << 20; + map[0].type = E820_RAM; + rc = 0; + } + BUG_ON(rc); + + sanitize_e820_map(map, (char *)&memmap.nr_entries); + + BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); + + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + e820_print_map("Xen"); +} #endif void __init parse_memopt(char *p, char **from) { + int i; + unsigned long current_end; + unsigned long end; + end_user_pfn = memparse(p, from); end_user_pfn >>= PAGE_SHIFT; - xen_override_max_pfn = (unsigned long) end_user_pfn; + + end = end_user_pfn<<PAGE_SHIFT; + i = e820.nr_map-1; + current_end = e820.map[i].addr + e820.map[i].size; + + if (current_end < end) { + /* + * The e820 map ends before our requested size so + * extend the final entry to the requested address. + */ + if (e820.map[i].type == E820_RAM) + e820.map[i].size = end - e820.map[i].addr; + else + add_memory_region(current_end, end - current_end, E820_RAM); + } } void __init parse_memmapopt(char *p, char **from) @@ -683,16 +656,17 @@ void __init parse_memmapopt(char *p, cha p = *from; } +unsigned long pci_mem_start = 0xaeedbabe; + /* * Search for the biggest gap in the low 32 bits of the e820 * memory space. We pass this space to PCI to assign MMIO resources * for hotplug or unconfigured devices in. * Hopefully the BIOS let enough space left. */ -__init void e820_setup_gap(void) -{ -#ifndef CONFIG_XEN - unsigned long gapstart, gapsize; +__init void e820_setup_gap(struct e820entry *e820, int nr_map) +{ + unsigned long gapstart, gapsize, round; unsigned long last; int i; int found = 0; @@ -700,10 +674,10 @@ __init void e820_setup_gap(void) last = 0x100000000ull; gapstart = 0x10000000; gapsize = 0x400000; - i = e820.nr_map; + i = nr_map; while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; + unsigned long long start = e820[i].addr; + unsigned long long end = start + e820[i].size; /* * Since "last" is at most 4GB, we know we'll @@ -729,16 +703,15 @@ __init void e820_setup_gap(void) } /* - * Start allocating dynamic PCI memory a bit into the gap, - * aligned up to the nearest megabyte. - * - * Question: should we try to pad it up a bit (do something - * like " + (gapsize >> 3)" in there too?). We now have the - * technology. + * See how much we want to round up: start off with + * rounding to the next 1MB area. */ - pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); -#endif -} +} diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue May 30 14:30:34 2006 -0500 @@ -76,8 +76,8 @@ #include <xen/features.h> #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_PHYS(x) ((x) << PAGE_SHIFT) -#define end_pfn_map end_pfn #include <asm/mach-xen/setup_arch_post.h> +#include <xen/interface/memory.h> extern unsigned long start_pfn; extern struct edid_info edid_info; @@ -490,19 +490,6 @@ static __init void parse_cmdline_early ( } #ifndef CONFIG_NUMA -#ifdef CONFIG_XEN -static void __init -contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long bootmap_size; - - bootmap_size = init_bootmem(start_pfn, end_pfn); - free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT); - reserve_bootmem(HIGH_MEMORY, - (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1) - - HIGH_MEMORY); -} -#else static void __init contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { @@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_ if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n",bootmap_size); bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); +#ifdef CONFIG_XEN + e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT); +#else e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); +#endif reserve_bootmem(bootmap, bootmap_size); } -#endif /* !CONFIG_XEN */ #endif /* Use inline assembly to define this because the nops are defined @@ -636,6 +626,11 @@ void __init setup_arch(char **cmdline_p) { unsigned long kernel_end; +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + struct e820entry *machine_e820; + struct xen_memory_map memmap; +#endif + #ifdef CONFIG_XEN /* Register a call for panic conditions. */ notifier_chain_register(&panic_notifier_list, &xen_panic_block); @@ -693,20 +688,18 @@ void __init setup_arch(char **cmdline_p) rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif +#endif /* !CONFIG_XEN */ setup_memory_region(); copy_edd(); -#endif /* !CONFIG_XEN */ if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; -#ifdef CONFIG_XEN - init_mm.brk = start_pfn << PAGE_SHIFT; -#else - init_mm.brk = (unsigned long) &_end; - + init_mm.brk = (unsigned long) &_end; + +#ifndef CONFIG_XEN code_resource.start = virt_to_phys(&_text); code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); @@ -735,12 +728,11 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_NUMA - numa_initmem_init(start_pfn, end_pfn); + numa_initmem_init(0, end_pfn); #else - contig_initmem_init(start_pfn, end_pfn); -#endif - -#ifndef CONFIG_XEN + contig_initmem_init(0, end_pfn); +#endif + /* Reserve direct mapping */ reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end - table_start) << PAGE_SHIFT); @@ -749,6 +741,10 @@ void __init setup_arch(char **cmdline_p) kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE); reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY); +#ifdef CONFIG_XEN + /* reserve physmap, start info and initial page tables */ + reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end); +#else /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. @@ -933,13 +929,25 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); #endif -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) /* * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) probe_roms(); - e820_reserve_resources(); + if (xen_start_info->flags & SIF_INITDOMAIN) { + machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, machine_e820); + + BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); + + e820_reserve_resources(machine_e820, memmap.nr_entries); + } +#elif !defined(CONFIG_XEN) + probe_roms(); + e820_reserve_resources(e820.map, e820.nr_map); #endif request_resource(&iomem_resource, &video_ram_resource); @@ -951,7 +959,14 @@ void __init setup_arch(char **cmdline_p) request_resource(&ioport_resource, &standard_io_resources[i]); } - e820_setup_gap(); +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + if (xen_start_info->flags & SIF_INITDOMAIN) { + e820_setup_gap(machine_e820, memmap.nr_entries); + free_bootmem(__pa(machine_e820), PAGE_SIZE); + } +#elif !defined(CONFIG_XEN) + e820_setup_gap(e820.map, e820.nr_map); +#endif #ifdef CONFIG_GART_IOMMU iommu_hole_init(); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue May 30 14:30:34 2006 -0500 @@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addre set_pte_phys(address, phys, prot, SET_FIXMAP_USER); } -unsigned long __initdata table_start, tables_space; +unsigned long __initdata table_start, table_end; unsigned long get_machine_pfn(unsigned long addr) { @@ -409,11 +409,17 @@ static inline int make_readonly(unsigned { int readonly = 0; - /* Make old and new page tables read-only. */ + /* Make new page tables read-only. */ + if (!xen_feature(XENFEAT_writable_page_tables) + && (paddr >= (table_start << PAGE_SHIFT)) + && (paddr < (table_end << PAGE_SHIFT))) + readonly = 1; + /* Make old page tables read-only. */ if (!xen_feature(XENFEAT_writable_page_tables) && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) - && (paddr < ((table_start << PAGE_SHIFT) + tables_space))) + && (paddr < (start_pfn << PAGE_SHIFT))) readonly = 1; + /* * No need for writable mapping of kernel image. This also ensures that * page and descriptor tables embedded inside don't have writable @@ -544,7 +550,7 @@ void __init xen_init_pt(void) mk_kernel_pgd(__pa_symbol(level3_user_pgt))); } -void __init extend_init_mapping(void) +void __init extend_init_mapping(unsigned long tables_space) { unsigned long va = __START_KERNEL_map; unsigned long phys, addr, *pte_page; @@ -599,23 +605,23 @@ void __init extend_init_mapping(void) static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, ptes; + unsigned long puds, pmds, ptes, tables; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; - tables_space = - round_up(puds * 8, PAGE_SIZE) + + tables = round_up(puds * 8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE) + round_up(ptes * 8, PAGE_SIZE); - extend_init_mapping(); + extend_init_mapping(tables); table_start = start_pfn; + table_end = table_start + (tables>>PAGE_SHIFT); early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT); + end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. @@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsig set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); } - BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >> PAGE_SHIFT)); + BUG_ON(!after_bootmem && start_pfn != table_end); __flush_tlb_all(); } diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Tue May 30 14:30:34 2006 -0500 @@ -329,7 +329,7 @@ out: * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { struct tpm_private *tp = dev->data; DPRINTK("\n"); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 14:30:34 2006 -0500 @@ -1,5 +1,4 @@ -obj-y += net_driver_util.o obj-y += util.o obj-y += core/ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue May 30 14:30:34 2006 -0500 @@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex); * Also protects non-atomic updates of current_pages and driver_pages, and * balloon lists. */ -spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(balloon_lock); /* We aim for 'current allocation' == 'target allocation'. */ static unsigned long current_pages; @@ -360,6 +360,12 @@ static void balloon_process(void *unused /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { + unsigned long min_target; + + /* Do not allow target to reduce below 2% of maximum memory size. */ + min_target = max_pfn / 50; + target = max(target, min_target); + /* No need for lock. Not read-modify-write updates. */ hard_limit = ~0UL; target_pages = target; @@ -468,8 +474,8 @@ static int __init balloon_init(void) IPRINTK("Initialising balloon driver.\n"); - if (xen_init() < 0) - return -1; + if (!is_running_on_xen()) + return -ENODEV; current_pages = min(xen_start_info->nr_pages, max_pfn); totalram_pages = current_pages; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue May 30 14:30:34 2006 -0500 @@ -82,7 +82,7 @@ typedef struct { static pending_req_t *pending_reqs; static struct list_head pending_free; -static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pending_free_lock); static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); #define BLKBACK_INVALID_HANDLE (~0) @@ -526,7 +526,7 @@ static int __init blkif_init(void) struct page *page; int i; - if (xen_init() < 0) + if (!is_running_on_xen()) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 14:30:34 2006 -0500 @@ -247,7 +247,7 @@ static void backend_changed(struct xenbu * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; int err; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue May 30 14:30:34 2006 -0500 @@ -247,7 +247,7 @@ fail: * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { struct blkfront_info *info = dev->data; struct block_device *bd; @@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s have ignored this request initially, as the device was still mounted. */ struct xenbus_device * dev = info->xbdev; - XenbusState state = xenbus_read_driver_state(dev->otherend); + enum xenbus_state state = xenbus_read_driver_state(dev->otherend); if (state == XenbusStateClosing) blkfront_closing(dev); @@ -792,7 +792,7 @@ static struct xenbus_driver blkfront = { static int __init xlblk_init(void) { - if (xen_init() < 0) + if (!is_running_on_xen()) return -ENODEV; return xenbus_register_frontend(&blkfront); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue May 30 14:30:34 2006 -0500 @@ -93,7 +93,7 @@ static struct block_device_operations xl .ioctl = blkif_ioctl, }; -spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(blkif_io_lock); static struct xlbd_major_info * xlbd_alloc_major_info(int major, int minor, int index) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue May 30 14:30:34 2006 -0500 @@ -138,7 +138,7 @@ typedef struct { */ static pending_req_t pending_reqs[MAX_PENDING_REQS]; static unsigned char pending_ring[MAX_PENDING_REQS]; -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pend_prod_lock); /* NB. We use a different index type to differentiate from shared blk rings. */ typedef unsigned int PEND_RING_IDX; #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue May 30 14:30:34 2006 -0500 @@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch { unsigned int goal; goal = simple_strtoul(str, NULL, 0); - while (wbuf_size < goal) - wbuf_size <<= 1; + if (goal) { + goal = roundup_pow_of_two(goal); + if (wbuf_size < goal) + wbuf_size = goal; + } return 1; } __setup("xencons_bufsz=", xencons_bufsz_setup); /* This lock protects accesses to the common transmit buffer. */ -static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(xencons_lock); /* Common transmit-kick routine. */ static void __xencons_tx_flush(void); @@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver /******************** Kernel console driver ********************************/ -static void kcons_write( - struct console *c, const char *s, unsigned int count) +static void kcons_write(struct console *c, const char *s, unsigned int count) { int i = 0; unsigned long flags; @@ -155,14 +157,14 @@ static void kcons_write( spin_unlock_irqrestore(&xencons_lock, flags); } -static void kcons_write_dom0( - struct console *c, const char *s, unsigned int count) -{ - int rc; - - while ((count > 0) && - ((rc = HYPERVISOR_console_io( - CONSOLEIO_write, count, (char *)s)) > 0)) { +static void kcons_write_dom0(struct console *c, const char *s, unsigned int count) +{ + + while (count > 0) { + int rc; + rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s); + if (rc <= 0) + break; count -= rc; s += rc; } @@ -183,7 +185,7 @@ static struct console kcons_info = { #define __RETCODE 0 static int __init xen_console_init(void) { - if (xen_init() < 0) + if (!is_running_on_xen()) return __RETCODE; if (xen_start_info->flags & SIF_INITDOMAIN) { @@ -566,7 +568,7 @@ static int __init xencons_init(void) { int rc; - if (xen_init() < 0) + if (!is_running_on_xen()) return -ENODEV; if (xc_mode == XC_OFF) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Tue May 30 14:30:34 2006 -0500 @@ -4,8 +4,9 @@ obj-y := evtchn.o reboot.o gnttab.o features.o -obj-$(CONFIG_PROC_FS) += xen_proc.o -obj-$(CONFIG_NET) += skbuff.o -obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_PROC_FS) += xen_proc.o +obj-$(CONFIG_NET) += skbuff.o +obj-$(CONFIG_SMP) += smpboot.o +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o +obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue May 30 14:30:34 2006 -0500 @@ -51,10 +51,10 @@ * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. */ -static spinlock_t irq_mapping_update_lock; +static DEFINE_SPINLOCK(irq_mapping_update_lock); /* IRQ <-> event-channel mappings. */ -static int evtchn_to_irq[NR_EVENT_CHANNELS]; +static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ... NR_EVENT_CHANNELS-1] = -1}; /* Packed IRQ information: binding type, sub-type index, and event channel. */ static u32 irq_info[NR_IRQS]; @@ -91,13 +91,13 @@ static inline unsigned int type_from_irq } /* IRQ <-> VIRQ mapping. */ -DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]); +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping. */ #ifndef NR_IPIS #define NR_IPIS 1 #endif -DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]); +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; /* Reference counts for bindings to IRQs. */ static int irq_bindcount[NR_IRQS]; @@ -751,7 +751,9 @@ void irq_resume(void) BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND); /* Secondary CPUs must have no VIRQ or IPI bindings. */ - for (cpu = 1; cpu < NR_CPUS; cpu++) { + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; for (virq = 0; virq < NR_VIRQS; virq++) BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1); for (ipi = 0; ipi < NR_IPIS; ipi++) @@ -813,25 +815,12 @@ void __init xen_init_IRQ(void) void __init xen_init_IRQ(void) { int i; - int cpu; - - spin_lock_init(&irq_mapping_update_lock); init_evtchn_cpu_bindings(); - /* No VIRQ or IPI bindings. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - for (i = 0; i < NR_VIRQS; i++) - per_cpu(virq_to_irq, cpu)[i] = -1; - for (i = 0; i < NR_IPIS; i++) - per_cpu(ipi_to_irq, cpu)[i] = -1; - } - - /* No event-channel -> IRQ mappings. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - evtchn_to_irq[i] = -1; - mask_evtchn(i); /* No event channels are 'live' right now. */ - } + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); /* No IRQ -> event-channel mappings. */ for (i = 0; i < NR_IRQS; i++) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue May 30 14:30:34 2006 -0500 @@ -38,7 +38,6 @@ #include <linux/vmalloc.h> #include <asm/pgtable.h> #include <xen/interface/xen.h> -#include <asm/fixmap.h> #include <asm/uaccess.h> #include <xen/gnttab.h> #include <asm/synch_bitops.h> @@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_ static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; static int gnttab_free_count; static grant_ref_t gnttab_free_head; -static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(gnttab_list_lock); static grant_entry_t *shared = NULL; @@ -443,7 +442,7 @@ gnttab_init(void) { int i; - if (xen_init() < 0) + if (!is_running_on_xen()) return -ENODEV; if (gnttab_resume() < 0) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c --- a/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c Tue May 30 14:30:34 2006 -0500 @@ -49,6 +49,9 @@ static struct kobj_type hyp_sysfs_kobj_t static int __init hypervisor_subsys_init(void) { + if (!is_running_on_xen()) + return -ENODEV; + hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; return subsystem_register(&hypervisor_subsys); } diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue May 30 14:30:34 2006 -0500 @@ -17,6 +17,7 @@ #include <linux/kthread.h> #include <xen/gnttab.h> #include <xen/xencons.h> +#include <xen/cpu_hotplug.h> #if defined(__i386__) || defined(__x86_64__) /* @@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA static int shutting_down = SHUTDOWN_INVALID; static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); - -#ifdef CONFIG_SMP -int smp_suspend(void); -void smp_resume(void); -#else -#define smp_suspend() (0) -#define smp_resume() ((void)0) -#endif /* Ensure we run on the idle task page tables so that we will switch page tables before running user space. This is needed diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue May 30 14:30:34 2006 -0500 @@ -23,6 +23,7 @@ #include <asm/pgalloc.h> #include <xen/evtchn.h> #include <xen/interface/vcpu.h> +#include <xen/cpu_hotplug.h> #include <xen/xenbus.h> #ifdef CONFIG_SMP_ALTERNATIVES @@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); #elif !defined(CONFIG_X86_IO_APIC) unsigned int maxcpus = NR_CPUS; #endif - -/* - * Set of CPUs that remote admin software will allow us to bring online. - * Notified to us via xenbus. - */ -static cpumask_t xenbus_allowed_cpumask; - -/* Set of CPUs that local admin will allow us to bring online. */ -static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; void __init prefill_possible_map(void) { @@ -167,17 +159,17 @@ static void cpu_bringup(void) cpu_idle(); } -static void vcpu_prepare(int vcpu) +void cpu_initialize_context(unsigned int cpu) { vcpu_guest_context_t ctxt; - struct task_struct *idle = idle_task(vcpu); + struct task_struct *idle = idle_task(cpu); #ifdef __x86_64__ - struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu]; + struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; #else - struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu); -#endif - - if (vcpu == 0) + struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); +#endif + + if (cpu == 0) return; memset(&ctxt, 0, sizeof(ctxt)); @@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu) ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; - ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu)); -#endif - - BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt)); + ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); +#endif + + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); } void __init smp_prepare_cpus(unsigned int max_cpus) @@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in cpu_set(cpu, cpu_present_map); #endif - vcpu_prepare(cpu); - } - - xenbus_allowed_cpumask = cpu_present_map; + cpu_initialize_context(cpu); + } + + init_xenbus_allowed_cpumask(); /* Currently, Xen gives no dynamic NUMA/HT info. */ for (cpu = 1; cpu < NR_CPUS; cpu++) { @@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void cpu_online_map = cpumask_of_cpu(0); } -static int local_cpu_hotplug_request(void) -{ - /* - * We assume a CPU hotplug request comes from local admin if it is made - * via a userspace process (i.e., one with a real mm_struct). - */ - return (current->mm != NULL); -} - #ifdef CONFIG_HOTPLUG_CPU /* @@ -355,141 +338,6 @@ static int __init initialize_cpu_present } core_initcall(initialize_cpu_present_map); -static void vcpu_hotplug(unsigned int cpu) -{ - int err; - char dir[32], state[32]; - - if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) - return; - - sprintf(dir, "cpu/%d", cpu); - err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state); - if (err != 1) { - printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); - return; - } - - if (strcmp(state, "online") == 0) { - cpu_set(cpu, xenbus_allowed_cpumask); - (void)cpu_up(cpu); - } else if (strcmp(state, "offline") == 0) { - cpu_clear(cpu, xenbus_allowed_cpumask); - (void)cpu_down(cpu); - } else { - printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", - state, cpu); - } -} - -static void handle_vcpu_hotplug_event( - struct xenbus_watch *watch, const char **vec, unsigned int len) -{ - int cpu; - char *cpustr; - const char *node = vec[XS_WATCH_PATH]; - - if ((cpustr = strstr(node, "cpu/")) != NULL) { - sscanf(cpustr, "cpu/%d", &cpu); - vcpu_hotplug(cpu); - } -} - -static int smpboot_cpu_notify(struct notifier_block *notifier, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - - /* - * We do this in a callback notifier rather than __cpu_disable() - * because local_cpu_hotplug_request() does not work in the latter - * as it's always executed from within a stopmachine kthread. - */ - if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) - cpu_clear(cpu, local_allowed_cpumask); - - return NOTIFY_OK; -} - -static int setup_cpu_watcher(struct notifier_block *notifier, - unsigned long event, void *data) -{ - int i; - - static struct xenbus_watch cpu_watch = { - .node = "cpu", - .callback = handle_vcpu_hotplug_event, - .flags = XBWF_new_thread }; - (void)register_xenbus_watch(&cpu_watch); - - if (!(xen_start_info->flags & SIF_INITDOMAIN)) { - for_each_cpu(i) - vcpu_hotplug(i); - printk(KERN_INFO "Brought up %ld CPUs\n", - (long)num_online_cpus()); - } - - return NOTIFY_DONE; -} - -static int __init setup_vcpu_hotplug_event(void) -{ - static struct notifier_block hotplug_cpu = { - .notifier_call = smpboot_cpu_notify }; - static struct notifier_block xsn_cpu = { - .notifier_call = setup_cpu_watcher }; - - register_cpu_notifier(&hotplug_cpu); - register_xenstore_notifier(&xsn_cpu); - - return 0; -} - -arch_initcall(setup_vcpu_hotplug_event); - -int smp_suspend(void) -{ - int i, err; - - lock_cpu_hotplug(); - - /* - * Take all other CPUs offline. We hold the hotplug mutex to - * avoid other processes bringing up CPUs under our feet. - */ - while (num_online_cpus() > 1) { - unlock_cpu_hotplug(); - for_each_online_cpu(i) { - if (i == 0) - continue; - err = cpu_down(i); - if (err) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); - for_each_cpu(i) - vcpu_hotplug(i); - return err; - } - } - lock_cpu_hotplug(); - } - - return 0; -} - -void smp_resume(void) -{ - int i; - - for_each_cpu(i) - vcpu_prepare(i); - - unlock_cpu_hotplug(); - - for_each_cpu(i) - vcpu_hotplug(i); -} - static void remove_siblinginfo(int cpu) { @@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu) #else /* !CONFIG_HOTPLUG_CPU */ -int smp_suspend(void) -{ - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); - return -EOPNOTSUPP; - } - return 0; -} - -void smp_resume(void) -{ -} - int __cpu_disable(void) { return -ENOSYS; @@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu) { int rc; - if (local_cpu_hotplug_request()) { - cpu_set(cpu, local_allowed_cpumask); - if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { - printk("%s: attempt to bring up CPU %u disallowed by " - "remote admin.\n", __FUNCTION__, cpu); - return -EBUSY; - } - } else if (!cpu_isset(cpu, local_allowed_cpumask) || - !cpu_isset(cpu, xenbus_allowed_cpumask)) { - return -EBUSY; - } + rc = cpu_up_check(cpu); + if (rc) + return rc; #ifdef CONFIG_SMP_ALTERNATIVES if (num_online_cpus() == 1) @@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, cpu_online_map); rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - if (rc != 0) - BUG(); + BUG_ON(rc); return 0; } diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Tue May 30 14:30:34 2006 -0500 @@ -429,6 +429,9 @@ static int __init evtchn_init(void) { int err; + if (!is_running_on_xen()) + return -ENODEV; + spin_lock_init(&port_user_lock); memset(port_user, 0, sizeof(port_user)); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue May 30 14:30:34 2006 -0500 @@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock #define MAX_MFN_ALLOC 64 static unsigned long mfn_list[MAX_MFN_ALLOC]; static unsigned int alloc_index = 0; -static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mfn_lock); static unsigned long alloc_mfn(void) { @@ -691,7 +691,7 @@ static void net_tx_action(unsigned long static void netif_idx_release(u16 pending_idx) { - static spinlock_t _lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(_lock); unsigned long flags; spin_lock_irqsave(&_lock, flags); @@ -810,6 +810,9 @@ static int __init netback_init(void) int i; struct page *page; + if (!is_running_on_xen()) + return -ENODEV; + /* We can increase reservation by this much in net_rx_action(). */ balloon_update_driver_allowance(NET_RX_RING_SIZE); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 14:30:34 2006 -0500 @@ -17,13 +17,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include <stdarg.h> #include <linux/module.h> #include <xen/xenbus.h> -#include <xen/net_driver_util.h> #include "common.h" - #if 0 #undef DPRINTK @@ -31,22 +28,19 @@ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) #endif - struct backend_info { struct xenbus_device *dev; netif_t *netif; struct xenbus_watch backend_watch; - XenbusState frontend_state; + enum xenbus_state frontend_state; }; - static int connect_rings(struct backend_info *); static void connect(struct backend_info *); static void maybe_connect(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); - static int netback_remove(struct xenbus_device *dev) { @@ -191,7 +185,7 @@ static void backend_changed(struct xenbu * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; @@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen kfree(ratestr); } +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} static void connect(struct backend_info *be) { diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue May 30 14:30:34 2006 -0500 @@ -60,12 +60,11 @@ #include <asm/uaccess.h> #include <xen/interface/grant_table.h> #include <xen/gnttab.h> -#include <xen/net_driver_util.h> #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) static inline void init_skb_shinfo(struct sk_buff *skb) { @@ -80,20 +79,14 @@ struct netfront_info { struct net_device_stats stats; - netif_tx_front_ring_t tx; - netif_rx_front_ring_t rx; + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; spinlock_t tx_lock; spinlock_t rx_lock; unsigned int handle; unsigned int evtchn, irq; - - /* What is the status of our connection to the remote backend? */ -#define BEST_CLOSED 0 -#define BEST_DISCONNECTED 1 -#define BEST_CONNECTED 2 - unsigned int backend_state; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 @@ -123,8 +116,8 @@ struct netfront_info { u8 mac[ETH_ALEN]; unsigned long rx_pfn_array[NET_RX_RING_SIZE]; - multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; - mmu_update_t rx_mmu[NET_RX_RING_SIZE]; + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; }; /* @@ -143,14 +136,6 @@ static inline unsigned short get_id_from list[0] = list[id]; return id; } - -#ifdef DEBUG -static const char *be_state_name[] = { - [BEST_CLOSED] = "closed", - [BEST_DISCONNECTED] = "disconnected", - [BEST_CONNECTED] = "connected", -}; -#endif #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \ __FUNCTION__, __LINE__, ##args) @@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus return talk_to_backend(dev, info); } +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(struct xenbus_device *dev, @@ -317,8 +323,8 @@ again: static int setup_device(struct xenbus_device *dev, struct netfront_info *info) { - netif_tx_sring_t *txs; - netif_rx_sring_t *rxs; + struct netif_tx_sring *txs; + struct netif_rx_sring *rxs; int err; struct net_device *netdev = info->netdev; @@ -328,13 +334,13 @@ static int setup_device(struct xenbus_de info->tx.sring = NULL; info->irq = 0; - txs = (netif_tx_sring_t *)__get_free_page(GFP_KERNEL); + txs = (struct netif_tx_sring *)__get_free_page(GFP_KERNEL); if (!txs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating tx ring page"); goto fail; } - rxs = (netif_rx_sring_t *)__get_free_page(GFP_KERNEL); + rxs = (struct netif_rx_sring *)__get_free_page(GFP_KERNEL); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); @@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de } memset(txs, 0, PAGE_SIZE); memset(rxs, 0, PAGE_SIZE); - info->backend_state = BEST_DISCONNECTED; SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); @@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { DPRINTK("\n"); @@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - if (np->backend_state != BEST_CONNECTED) + if (unlikely(!netif_carrier_ok(dev))) return; do { @@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str struct xen_memory_reservation reservation; grant_ref_t ref; - if (unlikely(np->backend_state != BEST_CONNECTED)) + if (unlikely(!netif_carrier_ok(dev))) return; /* @@ -638,7 +643,7 @@ static int network_start_xmit(struct sk_ { unsigned short id; struct netfront_info *np = netdev_priv(dev); - netif_tx_request_t *tx; + struct netif_tx_request *tx; RING_IDX i; grant_ref_t ref; unsigned long mfn; @@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_ spin_lock_irq(&np->tx_lock); - if (np->backend_state != BEST_CONNECTED) { + if (unlikely(!netif_carrier_ok(dev))) { spin_unlock_irq(&np->tx_lock); goto drop; } @@ -736,10 +741,10 @@ static int netif_poll(struct net_device { struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb, *nskb; - netif_rx_response_t *rx; + struct netif_rx_response *rx; RING_IDX i, rp; - mmu_update_t *mmu = np->rx_mmu; - multicall_entry_t *mcl = np->rx_mcl; + struct mmu_update *mmu = np->rx_mmu; + struct multicall_entry *mcl = np->rx_mcl; int work_done, budget, more_to_do = 1; struct sk_buff_head rxq; unsigned long flags; @@ -748,7 +753,7 @@ static int netif_poll(struct net_device spin_lock(&np->rx_lock); - if (np->backend_state != BEST_CONNECTED) { + if (unlikely(!netif_carrier_ok(dev))) { spin_unlock(&np->rx_lock); return 0; } @@ -962,7 +967,7 @@ static void network_connect(struct net_d { struct netfront_info *np; int i, requeue_idx; - netif_tx_request_t *tx; + struct netif_tx_request *tx; struct sk_buff *skb; np = netdev_priv(dev); @@ -1041,11 +1046,9 @@ static void network_connect(struct net_d * domain a kick because we've probably just requeued some * packets. */ - np->backend_state = BEST_CONNECTED; + netif_carrier_on(dev); notify_remote_via_irq(np->irq); network_tx_buf_gc(dev); - - network_maybe_wake_tx(dev); spin_unlock(&np->rx_lock); spin_unlock_irq(&np->tx_lock); @@ -1057,7 +1060,7 @@ static void show_device(struct netfront_ if (np) { IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n", np->handle, - be_state_name[np->backend_state], + netif_carrier_ok(np->netdev) ? "on" : "off", netif_running(np->netdev) ? "open" : "closed", np->evtchn, np->tx, @@ -1243,9 +1246,10 @@ static struct net_device * __devinit cre } np = netdev_priv(netdev); - np->backend_state = BEST_CLOSED; np->handle = handle; np->xbdev = dev; + + netif_carrier_off(netdev); spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); @@ -1394,7 +1398,7 @@ static void netif_disconnect_backend(str /* Stop old i/f to prevent errors whilst we rebuild the state. */ spin_lock_irq(&info->tx_lock); spin_lock(&info->rx_lock); - info->backend_state = BEST_DISCONNECTED; + netif_carrier_off(info->netdev); spin_unlock(&info->rx_lock); spin_unlock_irq(&info->tx_lock); @@ -1454,6 +1458,9 @@ static struct notifier_block notifier_in static int __init netif_init(void) { + if (!is_running_on_xen()) + return -ENODEV; + if (xen_start_info->flags & SIF_INITDOMAIN) return 0; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 14:30:34 2006 -0500 @@ -166,7 +166,7 @@ static int pciback_attach(struct pciback } static void pciback_frontend_changed(struct xenbus_device *xdev, - XenbusState fe_state) + enum xenbus_state fe_state) { struct pciback_device *pdev = xdev->data; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue May 30 14:30:34 2006 -0500 @@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc static int pcifront_try_disconnect(struct pcifront_device *pdev) { int err = 0; - XenbusState prev_state; + enum xenbus_state prev_state; spin_lock(&pdev->dev_lock); @@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc } static void pcifront_backend_changed(struct xenbus_device *xdev, - XenbusState be_state) + enum xenbus_state be_state) { struct pcifront_device *pdev = xdev->data; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 30 14:30:34 2006 -0500 @@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i break; case IOCTL_PRIVCMD_MMAPBATCH: { -#ifndef __ia64__ - mmu_update_t u; - uint64_t ptep; -#endif privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; unsigned long __user *p; @@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) { if (get_user(mfn, p)) return -EFAULT; -#ifdef __ia64__ + ret = direct_remap_pfn_range(vma, addr & PAGE_MASK, - mfn, 1 << PAGE_SHIFT, + mfn, PAGE_SIZE, vma->vm_page_prot, m.dom); if (ret < 0) - goto batch_err; -#else - - ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep); - if (ret) - goto batch_err; - - u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot)); - u.ptr = ptep; - - if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) put_user(0xF0000000 | mfn, p); -#endif } ret = 0; @@ -271,6 +255,9 @@ static int capabilities_read(char *page, static int __init privcmd_init(void) { + if (!is_running_on_xen()) + return -ENODEV; + /* Set of hypercalls that privileged applications may execute. */ set_bit(__HYPERVISOR_acm_op, hypercall_permission_map); set_bit(__HYPERVISOR_dom0_op, hypercall_permission_map); @@ -280,6 +267,9 @@ static int __init privcmd_init(void) set_bit(__HYPERVISOR_mmuext_op, hypercall_permission_map); set_bit(__HYPERVISOR_xen_version, hypercall_permission_map); set_bit(__HYPERVISOR_sched_op, hypercall_permission_map); + set_bit(__HYPERVISOR_sched_op_compat, hypercall_permission_map); + set_bit(__HYPERVISOR_event_channel_op_compat, + hypercall_permission_map); privcmd_intf = create_xen_proc_entry("privcmd", 0400); if (privcmd_intf != NULL) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 14:30:34 2006 -0500 @@ -34,7 +34,7 @@ struct backend_info /* watch front end for changes */ struct xenbus_watch backend_watch; - XenbusState frontend_state; + enum xenbus_state frontend_state; }; static void maybe_connect(struct backend_info *be); @@ -43,7 +43,7 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len); static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state); + enum xenbus_state frontend_state); static int tpmback_remove(struct xenbus_device *dev) { @@ -129,7 +129,7 @@ static void backend_changed(struct xenbu static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; int err; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue May 30 14:30:34 2006 -0500 @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2); EXPORT_SYMBOL_GPL(xenbus_watch_path2); -int xenbus_switch_state(struct xenbus_device *dev, XenbusState state) +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev } -XenbusState xenbus_read_driver_state(const char *path) -{ - XenbusState result; +enum xenbus_state xenbus_read_driver_state(const char *path) +{ + enum xenbus_state result; int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL); if (err) result = XenbusStateClosed; diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue May 30 14:30:34 2006 -0500 @@ -284,7 +284,7 @@ static void otherend_changed(struct xenb struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); - XenbusState state; + enum xenbus_state state; /* Protect us against watches firing on old details when the otherend details change, say immediately after a resume. */ @@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_ size_t stringlen; char *tmpstring; - XenbusState state = xenbus_read_driver_state(nodename); + enum xenbus_state state = xenbus_read_driver_state(nodename); if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a @@ -966,10 +966,8 @@ static int __init xenbus_probe_init(void DPRINTK(""); - if (xen_init() < 0) { - DPRINTK("failed"); + if (!is_running_on_xen()) return -ENODEV; - } /* Register ourselves with the kernel bus subsystem */ bus_register(&xenbus_frontend.bus); @@ -1069,10 +1067,8 @@ static int __init wait_for_devices(void) { unsigned long timeout = jiffies + 10*HZ; - if (xen_init() < 0) { - DPRINTK("failed"); + if (!is_running_on_xen()) return -ENODEV; - } while (time_before(jiffies, timeout)) { if (all_devices_ready()) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue May 30 14:30:34 2006 -0500 @@ -118,7 +118,7 @@ u64 jiffies_to_st(unsigned long jiffies) #define MULTI_UVMDOMID_INDEX 4 #endif -#define xen_init() (0) +#define is_running_on_xen() 1 static inline int HYPERVISOR_yield( diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue May 30 14:30:34 2006 -0500 @@ -10,10 +10,32 @@ static char * __init machine_specific_memory_setup(void) { - unsigned long max_pfn = xen_start_info->nr_pages; + int rc; + struct xen_memory_map memmap; + /* + * This is rather large for a stack variable but this early in + * the boot process we know we have plenty slack space. + */ + struct e820entry map[E820MAX]; - e820.nr_map = 0; - add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, map); + + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if ( rc == -ENOSYS ) { + memmap.nr_entries = 1; + map[0].addr = 0ULL; + map[0].size = xen_start_info->nr_pages << PAGE_SHIFT; + /* 8MB slack (to balance backend allocations). */ + map[0].size += 8 << 20; + map[0].type = E820_RAM; + rc = 0; + } + BUG_ON(rc); + + sanitize_e820_map(map, (char *)&memmap.nr_entries); + + BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); return "Xen"; } diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 14:30:34 2006 -0500 @@ -195,12 +195,42 @@ HYPERVISOR_multicall( return _hypercall2(int, multicall, call_list, nr_calls); } +#ifndef CONFIG_XEN_IA64_DOM0_VP static inline int HYPERVISOR_memory_op( unsigned int cmd, void *arg) { return _hypercall2(int, memory_op, cmd, arg); } +#else +//XXX xen/ia64 copy_from_guest() is broken. +// This is a temporal work around until it is fixed. +static inline int +____HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +#include <xen/interface/memory.h> +int ia64_xenmem_reservation_op(unsigned long op, + struct xen_memory_reservation* reservation__); +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + return ia64_xenmem_reservation_op(cmd, + (struct xen_memory_reservation*)arg); + default: + return ____HYPERVISOR_memory_op(cmd, arg); + } + /* NOTREACHED */ +} +#endif static inline int HYPERVISOR_event_channel_op( @@ -244,12 +274,19 @@ HYPERVISOR_physdev_op( return rc; } -static inline int -HYPERVISOR_grant_table_op( +//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant. +static inline int +____HYPERVISOR_grant_table_op( unsigned int cmd, void *uop, unsigned int count) { return _hypercall3(int, grant_table_op, cmd, uop, count); } +#ifndef CONFIG_XEN_IA64_DOM0_VP +#define HYPERVISOR_grant_table_op(cmd, uop, count) \ + ____HYPERVISOR_grant_table_op((cmd), (uop), (count)) +#else +int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); +#endif static inline int HYPERVISOR_vcpu_op( @@ -281,6 +318,7 @@ static inline void exit_idle(void) {} #define do_IRQ(irq, regs) __do_IRQ((irq), (regs)) #ifdef CONFIG_XEN_IA64_DOM0_VP +#include <linux/err.h> #include <asm/xen/privop.h> #define _hypercall_imm1(type, name, imm, a1) \ @@ -382,6 +420,10 @@ HYPERVISOR_ioremap(unsigned long ioaddr, unsigned long ret = ioaddr; if (running_on_xen) { ret = __HYPERVISOR_ioremap(ioaddr, size); + if (unlikely(IS_ERR_VALUE(ret))) + panic("hypercall %s failed with %ld. " + "Please check Xen and Linux config mismatch\n", + __func__, -ret); } return ret; } @@ -421,27 +463,6 @@ HYPERVISOR_machtophys(unsigned long mfn) } static inline unsigned long -__HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order, - unsigned int address_bits) -{ - return _hypercall_imm3(unsigned long, ia64_dom0vp_op, - IA64_DOM0VP_populate_physmap, gpfn, - extent_order, address_bits); -} - -static inline unsigned long -HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order, - unsigned int address_bits) -{ - unsigned long ret = 0; - if (running_on_xen) { - ret = __HYPERVISOR_populate_physmap(gpfn, extent_order, - address_bits); - } - return ret; -} - -static inline unsigned long __HYPERVISOR_zap_physmap(unsigned long gpfn, unsigned int extent_order) { return _hypercall_imm2(unsigned long, ia64_dom0vp_op, @@ -466,6 +487,7 @@ __HYPERVISOR_add_physmap(unsigned long g IA64_DOM0VP_add_physmap, gpfn, mfn, flags, domid); } + static inline unsigned long HYPERVISOR_add_physmap(unsigned long gpfn, unsigned long mfn, unsigned int flags, domid_t domid) @@ -477,13 +499,15 @@ HYPERVISOR_add_physmap(unsigned long gpf } return ret; } + +// for balloon driver +#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0) + #else -#define HYPERVISOR_ioremap(ioaddr, size) ({ioaddr;}) -#define HYPERVISOR_phystomach(gpfn) ({gpfn;}) -#define HYPERVISOR_machtophys(mfn) ({mfn;}) -#define HYPERVISOR_populate_physmap(gpfn, extent_order, address_bits) \ - ({0;}) -#define HYPERVISOR_zap_physmap(gpfn, extent_order) ({0;}) -#define HYPERVISOR_add_physmap(gpfn, mfn, flags) ({0;}) +#define HYPERVISOR_ioremap(ioaddr, size) (ioaddr) +#define HYPERVISOR_phystomach(gpfn) (gpfn) +#define HYPERVISOR_machtophys(mfn) (mfn) +#define HYPERVISOR_zap_physmap(gpfn, extent_order) (0) +#define HYPERVISOR_add_physmap(gpfn, mfn, flags) (0) #endif #endif /* __HYPERCALL_H__ */ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue May 30 14:30:34 2006 -0500 @@ -53,7 +53,7 @@ extern start_info_t *xen_start_info; void force_evtchn_callback(void); -int xen_init(void); +#define is_running_on_xen() running_on_xen /* Turn jiffies into Xen system time. XXX Implement me. */ #define jiffies_to_st(j) 0 @@ -118,11 +118,22 @@ HYPERVISOR_poll( } // for drivers/xen/privcmd/privcmd.c -#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e) #define machine_to_phys_mapping 0 #ifndef CONFIG_XEN_IA64_DOM0_VP +#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e) #define pfn_to_mfn(x) (x) #define mfn_to_pfn(x) (x) +#else +struct vm_area_struct; +int direct_remap_pfn_range(struct vm_area_struct *vma, + unsigned long address, + unsigned long mfn, + unsigned long size, + pgprot_t prot, + domid_t domid); +struct file; +int privcmd_mmap(struct file * file, struct vm_area_struct * vma); +#define HAVE_ARCH_PRIVCMD_MMAP #endif // for drivers/xen/balloon/balloon.c @@ -147,7 +158,7 @@ xen_create_contiguous_region(unsigned lo unsigned int order, unsigned int address_bits) { int ret = 0; - if (running_on_xen) { + if (is_running_on_xen()) { ret = __xen_create_contiguous_region(vstart, order, address_bits); } @@ -158,11 +169,24 @@ static inline void static inline void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) { - if (running_on_xen) + if (is_running_on_xen()) __xen_destroy_contiguous_region(vstart, order); } + +// for netfront.c, netback.c +#define MULTI_UVMFLAGS_INDEX 0 //XXX any value + +static inline void +MULTI_update_va_mapping( + multicall_entry_t *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->result = 0; +} + #else -#define xen_create_contiguous_region(vstart, order, address_bits) ({0;}) +#define xen_create_contiguous_region(vstart, order, address_bits) (0) #define xen_destroy_contiguous_region(vstart, order) do {} while (0) #endif diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-ia64/page.h --- a/linux-2.6-xen-sparse/include/asm-ia64/page.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h Tue May 30 14:30:34 2006 -0500 @@ -234,6 +234,43 @@ get_order (unsigned long size) #include <linux/kernel.h> #include <asm/hypervisor.h> +#include <xen/features.h> // to compile netback, netfront +typedef unsigned long maddr_t; // to compile netback, netfront + +// XXX hack! +// Linux/IA64 uses PG_arch_1. +// This hack will be removed once PG_foreign bit is taken. +//#include <xen/foreign_page.h> +#ifdef __ASM_XEN_FOREIGN_PAGE_H__ +# error "don't include include/xen/foreign_page.h!" +#endif + +extern struct address_space xen_ia64_foreign_dummy_mapping; +#define PageForeign(page) \ + ((page)->mapping == &xen_ia64_foreign_dummy_mapping) + +#define SetPageForeign(page, dtor) do { \ + set_page_private((page), (unsigned long)(dtor)); \ + (page)->mapping = &xen_ia64_foreign_dummy_mapping; \ + smp_rmb(); \ +} while (0) + +#define ClearPageForeign(page) do { \ + (page)->mapping = NULL; \ + smp_rmb(); \ + set_page_private((page), 0); \ +} while (0) + +#define PageForeignDestructor(page) \ + ( (void (*) (struct page *)) page_private(page) ) + +#define arch_free_page(_page,_order) \ +({ int foreign = PageForeign(_page); \ + if (foreign) \ + (PageForeignDestructor(_page))(_page); \ + foreign; \ +}) +#define HAVE_ARCH_FREE_PAGE //XXX xen page size != page size @@ -279,11 +316,14 @@ machine_to_phys_for_dma(unsigned long ma #define set_phys_to_machine(pfn, mfn) do { } while (0) #define xen_machphys_update(mfn, pfn) do { } while (0) -#define mfn_to_pfn(mfn) ({(mfn);}) -#define mfn_to_virt(mfn) ({__va((mfn) << PAGE_SHIFT);}) -#define pfn_to_mfn(pfn) ({(pfn);}) -#define virt_to_mfn(virt) ({__pa(virt) >> PAGE_SHIFT;}) -#define virt_to_machine(virt) ({__pa(virt);}) // for tpmfront.c +//XXX to compile set_phys_to_machine(vaddr, FOREIGN_FRAME(m)) +#define FOREIGN_FRAME(m) (INVALID_P2M_ENTRY) + +#define mfn_to_pfn(mfn) (mfn) +#define mfn_to_virt(mfn) (__va((mfn) << PAGE_SHIFT)) +#define pfn_to_mfn(pfn) (pfn) +#define virt_to_mfn(virt) (__pa(virt) >> PAGE_SHIFT) +#define virt_to_machine(virt) __pa(virt) // for tpmfront.c #endif /* CONFIG_XEN_IA64_DOM0_VP */ #endif /* CONFIG_XEN */ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-ia64/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/privop.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/privop.h Tue May 30 14:30:34 2006 -0500 @@ -44,13 +44,14 @@ #define ia64_ptcl __ia64_ptcl #define ia64_ptri __ia64_ptri #define ia64_ptrd __ia64_ptrd -#define ia64_get_psr_i __ia64_get_psr_i +#define ia64_get_psr_i __ia64_get_psr_i #define ia64_intrin_local_irq_restore __ia64_intrin_local_irq_restore #define ia64_pal_halt_light __ia64_pal_halt_light -#define ia64_leave_kernel __ia64_leave_kernel -#define ia64_leave_syscall __ia64_leave_syscall -#define ia64_switch_to __ia64_switch_to -#define ia64_pal_call_static __ia64_pal_call_static +#define ia64_leave_kernel __ia64_leave_kernel +#define ia64_leave_syscall __ia64_leave_syscall +#define ia64_trace_syscall __ia64_trace_syscall +#define ia64_switch_to __ia64_switch_to +#define ia64_pal_call_static __ia64_pal_call_static #endif /* !IA64_PARAVIRTUALIZED */ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Tue May 30 14:30:34 2006 -0500 @@ -11,45 +11,41 @@ #include <asm/xen/asm-xsi-offsets.h> +#include <xen/interface/arch-ia64.h> #define IA64_PARAVIRTUALIZED #ifdef __ASSEMBLY__ -#define XEN_HYPER_RFI break 0x1 -#define XEN_HYPER_RSM_PSR_DT break 0x2 -#define XEN_HYPER_SSM_PSR_DT break 0x3 -#define XEN_HYPER_COVER break 0x4 -#define XEN_HYPER_ITC_D break 0x5 -#define XEN_HYPER_ITC_I break 0x6 -#define XEN_HYPER_SSM_I break 0x7 -#define XEN_HYPER_GET_IVR break 0x8 -#define XEN_HYPER_GET_TPR break 0x9 -#define XEN_HYPER_SET_TPR break 0xa -#define XEN_HYPER_EOI break 0xb -#define XEN_HYPER_SET_ITM break 0xc -#define XEN_HYPER_THASH break 0xd -#define XEN_HYPER_PTC_GA break 0xe -#define XEN_HYPER_ITR_D break 0xf -#define XEN_HYPER_GET_RR break 0x10 -#define XEN_HYPER_SET_RR break 0x11 -#define XEN_HYPER_SET_KR break 0x12 -#define XEN_HYPER_FC break 0x13 -#define XEN_HYPER_GET_CPUID break 0x14 -#define XEN_HYPER_GET_PMD break 0x15 -#define XEN_HYPER_GET_EFLAG break 0x16 -#define XEN_HYPER_SET_EFLAG break 0x17 +#define XEN_HYPER_RFI break HYPERPRIVOP_RFI +#define XEN_HYPER_RSM_PSR_DT break HYPERPRIVOP_RSM_DT +#define XEN_HYPER_SSM_PSR_DT break HYPERPRIVOP_SSM_DT +#define XEN_HYPER_COVER break HYPERPRIVOP_COVER +#define XEN_HYPER_ITC_D break HYPERPRIVOP_ITC_D +#define XEN_HYPER_ITC_I break HYPERPRIVOP_ITC_I +#define XEN_HYPER_SSM_I break HYPERPRIVOP_SSM_I +#define XEN_HYPER_GET_IVR break HYPERPRIVOP_GET_IVR +#define XEN_HYPER_GET_TPR break HYPERPRIVOP_GET_TPR +#define XEN_HYPER_SET_TPR break HYPERPRIVOP_SET_TPR +#define XEN_HYPER_EOI break HYPERPRIVOP_EOI +#define XEN_HYPER_SET_ITM break HYPERPRIVOP_SET_ITM +#define XEN_HYPER_THASH break HYPERPRIVOP_THASH +#define XEN_HYPER_PTC_GA break HYPERPRIVOP_PTC_GA +#define XEN_HYPER_ITR_D break HYPERPRIVOP_ITR_D +#define XEN_HYPER_GET_RR break HYPERPRIVOP_GET_RR +#define XEN_HYPER_SET_RR break HYPERPRIVOP_SET_RR +#define XEN_HYPER_SET_KR break HYPERPRIVOP_SET_KR +#define XEN_HYPER_FC break HYPERPRIVOP_FC +#define XEN_HYPER_GET_CPUID break HYPERPRIVOP_GET_CPUID +#define XEN_HYPER_GET_PMD break HYPERPRIVOP_GET_PMD +#define XEN_HYPER_GET_EFLAG break HYPERPRIVOP_GET_EFLAG +#define XEN_HYPER_SET_EFLAG break HYPERPRIVOP_SET_EFLAG #endif #ifndef __ASSEMBLY__ -#ifdef MODULE -extern int is_running_on_xen(void); -#define running_on_xen (is_running_on_xen()) -#else extern int running_on_xen; -#endif - -#define XEN_HYPER_SSM_I asm("break 0x7"); -#define XEN_HYPER_GET_IVR asm("break 0x8"); + +#define XEN_HYPER_SSM_I asm("break %0" : : "i" (HYPERPRIVOP_SSM_I)) +#define XEN_HYPER_GET_IVR asm("break %0" : : "i" (HYPERPRIVOP_GET_IVR)) /************************************************/ /* Instructions paravirtualized for correctness */ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 12:52:02 2006 -0500 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 14:30:34 2006 -0500 @@ -75,7 +75,7 @@ struct xenbus_device { int otherend_id; struct xenbus_watch otherend_watch; struct device dev; - XenbusState state; + enum xenbus_state state; void *data; }; @@ -98,7 +98,7 @@ struct xenbus_driver { int (*probe)(struct xenbus_device *dev, const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, - XenbusState backend_state); + enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); @@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev * Return 0 on success, or -errno on error. On error, the device will switch * to XenbusStateClosing, and the error will be saved in the store. */ -int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state); +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); /** @@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev * Return the state of the driver rooted at the given store path, or * XenbusStateClosed if no state can be read. */ -XenbusState xenbus_read_driver_state(const char *path); +enum xenbus_state xenbus_read_driver_state(const char *path); /*** diff -r e74246451527 -r f54d38cea8ac tools/examples/network-bridge --- a/tools/examples/network-bridge Tue May 30 12:52:02 2006 -0500 +++ b/tools/examples/network-bridge Tue May 30 14:30:34 2006 -0500 @@ -59,7 +59,7 @@ findCommand "$@" findCommand "$@" evalVariables "$@" -vifnum=${vifnum:-$(ip route list | awk '/^default / { sub(/^(eth|xenbr)/,"",$NF); print $NF }')} +vifnum=${vifnum:-$(ip route list | awk '/^default / { print $NF }' | sed 's/^[^0-9]*//')} bridge=${bridge:-xenbr${vifnum}} netdev=${netdev:-eth${vifnum}} antispoof=${antispoof:-no} diff -r e74246451527 -r f54d38cea8ac tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Tue May 30 12:52:02 2006 -0500 +++ b/tools/examples/xmexample.vti Tue May 30 14:30:34 2006 -0500 @@ -36,7 +36,7 @@ name = "ExampleVTIDomain" # Random MACs are assigned if not given. #vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ] # type=ioemu specify the NIC is an ioemu device not netfront -#vif = [ 'type=ioemu, bridge=xenbr0' ] +vif = [ 'type=ioemu, bridge=xenbr0' ] # for multiple NICs in device model, 3 in this example #vif = [ 'type=ioemu, bridge=xenbr0', 'type=ioemu', 'type=ioemu'] diff -r e74246451527 -r f54d38cea8ac tools/libxc/Makefile --- a/tools/libxc/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/Makefile Tue May 30 14:30:34 2006 -0500 @@ -20,6 +20,7 @@ SRCS += xc_physdev.c SRCS += xc_physdev.c SRCS += xc_private.c SRCS += xc_sedf.c +SRCS += xc_csched.c SRCS += xc_tbuf.c ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86) diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_acm.c --- a/tools/libxc/xc_acm.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_acm.c Tue May 30 14:30:34 2006 -0500 @@ -1,13 +1,10 @@ /****************************************************************************** + * xc_acm.c * - * Copyright (C) 2005 IBM Corporation + * Copyright (C) 2005, 2006 IBM Corporation, R Sailer * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * - * Authors: - * Reiner Sailer <sailer@xxxxxxxxxxxxxx> - * Stefan Berger <stefanb@xxxxxxxxxxxxxx> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -17,29 +14,23 @@ #include "xc_private.h" -int xc_acm_op(int xc_handle, struct acm_op *op) + +int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size) { int ret = -1; DECLARE_HYPERCALL; - op->interface_version = ACM_INTERFACE_VERSION; + hypercall.op = __HYPERVISOR_acm_op; + hypercall.arg[0] = cmd; + hypercall.arg[1] = (unsigned long) arg; - hypercall.op = __HYPERVISOR_acm_op; - hypercall.arg[0] = (unsigned long) op; - - if (mlock(op, sizeof(*op)) != 0) { - PERROR("Could not lock memory for Xen policy hypercall"); - goto out1; + if (mlock(arg, arg_size) != 0) { + PERROR("xc_acm_op: arg mlock failed"); + goto out; } - ret = do_xen_hypercall(xc_handle, &hypercall); - ret = ioctl(xc_handle, IOCTL_PRIVCMD_HYPERCALL, &hypercall); - if (ret < 0) { - goto out2; - } - out2: - safe_munlock(op, sizeof(*op)); - out1: + safe_munlock(arg, arg_size); + out: return ret; } diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_ia64_stubs.c Tue May 30 14:30:34 2006 -0500 @@ -50,7 +50,7 @@ xc_plan9_build(int xc_handle, } /* VMM uses put_user to copy pfn_list to guest buffer, this maybe fail, - VMM don't handle this now. + VMM doesn't handle this now. This method will touch guest buffer to make sure the buffer's mapping is tracked by VMM, */ @@ -66,6 +66,7 @@ int xc_ia64_get_pfn_list(int xc_handle, unsigned int __start_page, __nr_pages; unsigned long max_pfns; unsigned long *__pfn_buf; + __start_page = start_page; __nr_pages = nr_pages; __pfn_buf = pfn_buf; @@ -75,6 +76,7 @@ int xc_ia64_get_pfn_list(int xc_handle, op.cmd = DOM0_GETMEMLIST; op.u.getmemlist.domain = (domid_t)domid; op.u.getmemlist.max_pfns = max_pfns; + op.u.getmemlist.num_pfns = 0; set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf); if ( (max_pfns != -1UL) @@ -723,7 +725,7 @@ int xc_hvm_build(int xc_handle, ctxt->flags = VGCF_VMX_GUEST; ctxt->regs.cr_iip = 0x80000000ffffffb0UL; - ctxt->vcpu.privregs = 0; + ctxt->privregs = 0; memset( &launch_op, 0, sizeof(launch_op) ); diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_linux_build.c Tue May 30 14:30:34 2006 -0500 @@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL; l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL; uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab; - unsigned long ppt_alloc, count, nmfn; + unsigned long ppt_alloc, count; /* First allocate page for page dir. */ ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT; - - if ( page_array[ppt_alloc] > 0xfffff ) - { - nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]); - if ( nmfn == 0 ) - { - fprintf(stderr, "Couldn't get a page below 4GB :-(\n"); - goto error_out; - } - page_array[ppt_alloc] = nmfn; - } alloc_pt(l3tab, vl3tab, pl3tab); vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)]; @@ -327,6 +316,13 @@ static int setup_pg_tables_pae(int xc_ha *vl1e &= ~_PAGE_RW; } vl1e++; + } + + /* Xen requires a mid-level pgdir mapping 0xC0000000 region. */ + if ( (vl3tab[3] & _PAGE_PRESENT) == 0 ) + { + alloc_pt(l2tab, vl2tab, pl2tab); + vl3tab[3] = l2tab | L3_PROT; } munmap(vl1tab, PAGE_SIZE); @@ -727,25 +723,28 @@ static int setup_guest(int xc_handle, v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ -#if defined(__i386__) - if ( dsi.pae_kernel ) - { - /* FIXME: assumes one L2 pgtable @ 0xc0000000 */ - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> - L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages ) - break; - } - else - { - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> - L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) - break; - } -#endif -#if defined(__x86_64__) #define NR(_l,_h,_s) \ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ ((_l) & ~((1UL<<(_s))-1))) >> (_s)) +#if defined(__i386__) + if ( dsi.pae_kernel ) + { + if ( (1 + /* # L3 */ + NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */ + NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT_PAE) + /* # L1 */ + /* Include a fourth mid-level page directory for Xen. */ + (v_end <= (3 << L3_PAGETABLE_SHIFT_PAE))) + <= nr_pt_pages ) + break; + } + else + { + if ( (1 + /* # L2 */ + NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ + <= nr_pt_pages ) + break; + } +#elif defined(__x86_64__) if ( (1 + /* # L4 */ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */ @@ -1116,7 +1115,7 @@ static int xc_linux_build_internal(int x ctxt->regs.ar_fpsr = xc_ia64_fpsr_default(); /* currently done by hypervisor, should move here */ /* ctxt->regs.r28 = dom_fw_setup(); */ - ctxt->vcpu.privregs = 0; + ctxt->privregs = 0; ctxt->sys_pgnr = 3; i = 0; /* silence unused variable warning */ #else /* x86 */ diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_linux_restore.c Tue May 30 14:30:34 2006 -0500 @@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int ** A page table page - need to 'uncanonicalize' it, i.e. ** replace all the references to pfns with the corresponding ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. */ - if(pt_levels != 3 || pagetype != L1TAB) { - - if(!uncanonicalize_pagetable(pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; - } - + if(!uncanonicalize_pagetable(pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; } } else if(pagetype != NOTAB) { @@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int } DPRINTF("Received all pages (%d races)\n", nraces); - - if(pt_levels == 3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This - ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] - ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** - ** This is relatively slow (and currently involves two passes through - ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. - */ - - int j, k; - - /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ - for (i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { - - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); - - if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { - ERR("Couldn't get a page below 4GB :-("); - goto out; - } - - p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE, i)) { - ERR("Couldn't m2p on PAE root pgdir"); - goto out; - } - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - - } - } - - /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for(i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { - - if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { - ERR("map batch failed"); - goto out; - } - - for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(L1TAB, - region_base + k*PAGE_SIZE)) { - ERR("failed uncanonicalize pt!"); - goto out; - } - } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - } if (xc_finish_mmu_updates(xc_handle, mmu)) { diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_load_elf.c Tue May 30 14:30:34 2006 -0500 @@ -59,10 +59,10 @@ static int parseelfimage(const char *ima Elf_Ehdr *ehdr = (Elf_Ehdr *)image; Elf_Phdr *phdr; Elf_Shdr *shdr; - unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base; + unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off; const char *shstrtab; char *guestinfo=NULL, *p; - int h; + int h, virt_base_defined, elf_pa_off_defined; if ( !IS_ELF(*ehdr) ) { @@ -164,34 +164,40 @@ static int parseelfimage(const char *ima dsi->xen_guest_string = guestinfo; - if ( (p = strstr(guestinfo, "VIRT_BASE=")) == NULL ) - { - ERROR("Malformed ELF image. No VIRT_BASE specified"); - return -EINVAL; - } - - virt_base = strtoul(p+10, &p, 0); - - dsi->elf_paddr_offset = virt_base; - if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL ) - dsi->elf_paddr_offset = strtoul(p+17, &p, 0); + /* Initial guess for virt_base is 0 if it is not explicitly defined. */ + p = strstr(guestinfo, "VIRT_BASE="); + virt_base_defined = (p != NULL); + virt_base = virt_base_defined ? strtoul(p+10, &p, 0) : 0; + + /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */ + p = strstr(guestinfo, "ELF_PADDR_OFFSET="); + elf_pa_off_defined = (p != NULL); + elf_pa_off = elf_pa_off_defined ? strtoul(p+17, &p, 0) : virt_base; + + if ( elf_pa_off_defined && !virt_base_defined ) + goto bad_image; for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; - vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base; + vaddr = phdr->p_paddr - elf_pa_off + virt_base; + if ( (vaddr + phdr->p_memsz) < vaddr ) + goto bad_image; if ( vaddr < kernstart ) kernstart = vaddr; if ( (vaddr + phdr->p_memsz) > kernend ) kernend = vaddr + phdr->p_memsz; } - if ( virt_base ) - dsi->v_start = virt_base; - else - dsi->v_start = kernstart; + /* + * Legacy compatibility and images with no __xen_guest section: assume + * header addresses are virtual addresses, and that guest memory should be + * mapped starting at kernel load address. + */ + dsi->v_start = virt_base_defined ? virt_base : kernstart; + dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start; dsi->v_kernentry = ehdr->e_entry; if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL ) @@ -199,11 +205,9 @@ static int parseelfimage(const char *ima if ( (kernstart > kernend) || (dsi->v_kernentry < kernstart) || - (dsi->v_kernentry > kernend) ) - { - ERROR("Malformed ELF image."); - return -EINVAL; - } + (dsi->v_kernentry > kernend) || + (dsi->v_start > kernstart) ) + goto bad_image; if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL ) dsi->load_symtab = 1; @@ -215,6 +219,10 @@ static int parseelfimage(const char *ima loadelfsymtab(image, 0, 0, NULL, dsi); return 0; + + bad_image: + ERROR("Malformed ELF image."); + return -EINVAL; } static int diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_private.c Tue May 30 14:30:34 2006 -0500 @@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v return rc; } -unsigned long xc_make_page_below_4G( - int xc_handle, uint32_t domid, unsigned long mfn) -{ - unsigned long new_mfn; - - if ( xc_domain_memory_decrease_reservation( - xc_handle, domid, 1, 0, &mfn) != 0 ) - { - fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); - return 0; - } - - if ( xc_domain_memory_increase_reservation( - xc_handle, domid, 1, 0, 32, &new_mfn) != 0 ) - { - fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn); - return 0; - } - - return new_mfn; -} - /* * Local variables: * mode: C diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_ptrace.c Tue May 30 14:30:34 2006 -0500 @@ -185,61 +185,36 @@ map_domain_va_32( void *guest_va, int perm) { - unsigned long pde, page; - unsigned long va = (unsigned long)guest_va; - - static unsigned long cr3_phys[MAX_VIRT_CPUS]; - static uint32_t *cr3_virt[MAX_VIRT_CPUS]; - static unsigned long pde_phys[MAX_VIRT_CPUS]; - static uint32_t *pde_virt[MAX_VIRT_CPUS]; - static unsigned long page_phys[MAX_VIRT_CPUS]; - static uint32_t *page_virt[MAX_VIRT_CPUS]; - static int prev_perm[MAX_VIRT_CPUS]; - - if (ctxt[cpu].ctrlreg[3] == 0) - return NULL; - if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] ) - { - cr3_phys[cpu] = ctxt[cpu].ctrlreg[3]; - if ( cr3_virt[cpu] ) - munmap(cr3_virt[cpu], PAGE_SIZE); - cr3_virt[cpu] = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - cr3_phys[cpu] >> PAGE_SHIFT); - if ( cr3_virt[cpu] == NULL ) - return NULL; - } - pde = to_ma(cpu, cr3_virt[cpu][vtopdi(va)]); - if ( pde != pde_phys[cpu] ) - { - pde_phys[cpu] = pde; - if ( pde_virt[cpu] ) - munmap(pde_virt[cpu], PAGE_SIZE); - pde_virt[cpu] = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - pde_phys[cpu] >> PAGE_SHIFT); - if ( pde_virt[cpu] == NULL ) - return NULL; - } - page = to_ma(cpu, pde_virt[cpu][vtopti(va)]); - - if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) ) - { - page_phys[cpu] = page; - if ( page_virt[cpu] ) - munmap(page_virt[cpu], PAGE_SIZE); - page_virt[cpu] = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, perm, - page_phys[cpu] >> PAGE_SHIFT); - if ( page_virt[cpu] == NULL ) - { - page_phys[cpu] = 0; - return NULL; - } - prev_perm[cpu] = perm; - } - - return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); + unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va; + uint32_t *l2, *l1; + static void *v[MAX_VIRT_CPUS]; + + l2 = xc_map_foreign_range( + xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + if ( l2 == NULL ) + return NULL; + + l2e = l2[l2_table_offset_i386(va)]; + munmap(l2, PAGE_SIZE); + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); + l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT); + if ( l1 == NULL ) + return NULL; + + l1e = l1[l1_table_offset_i386(va)]; + munmap(l1, PAGE_SIZE); + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); + if ( v[cpu] != NULL ) + munmap(v[cpu], PAGE_SIZE); + v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); + if ( v[cpu] == NULL ) + return NULL; + + return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); } @@ -250,36 +225,45 @@ map_domain_va_pae( void *guest_va, int perm) { - unsigned long l2p, l1p, p, va = (unsigned long)guest_va; + unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va; uint64_t *l3, *l2, *l1; - static void *v; + static void *v[MAX_VIRT_CPUS]; l3 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); if ( l3 == NULL ) return NULL; - l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]); + l3e = l3[l3_table_offset_pae(va)]; + munmap(l3, PAGE_SIZE); + if ( !(l3e & _PAGE_PRESENT) ) + return NULL; + l2p = to_ma(cpu, l3e); l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); - munmap(l3, PAGE_SIZE); if ( l2 == NULL ) return NULL; - l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]); + l2e = l2[l2_table_offset_pae(va)]; + munmap(l2, PAGE_SIZE); + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT); - munmap(l2, PAGE_SIZE); if ( l1 == NULL ) return NULL; - p = to_ma(cpu, l1[l1_table_offset_pae(va)]); - if ( v != NULL ) - munmap(v, PAGE_SIZE); - v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); + l1e = l1[l1_table_offset_pae(va)]; munmap(l1, PAGE_SIZE); - if ( v == NULL ) - return NULL; - - return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); + if ( v[cpu] != NULL ) + munmap(v[cpu], PAGE_SIZE); + v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); + if ( v[cpu] == NULL ) + return NULL; + + return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); } #ifdef __x86_64__ @@ -290,9 +274,10 @@ map_domain_va_64( void *guest_va, int perm) { - unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va; + unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned long)guest_va; uint64_t *l4, *l3, *l2, *l1; - static void *v; + static void *v[MAX_VIRT_CPUS]; + if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ return map_domain_va_32(xc_handle, cpu, guest_va, perm); @@ -302,41 +287,51 @@ map_domain_va_64( if ( l4 == NULL ) return NULL; - l3p = to_ma(cpu, l4[l4_table_offset(va)]); + l4e = l4[l4_table_offset(va)]; + munmap(l4, PAGE_SIZE); + if ( !(l4e & _PAGE_PRESENT) ) + return NULL; + l3p = to_ma(cpu, l4e); l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p >> PAGE_SHIFT); - munmap(l4, PAGE_SIZE); if ( l3 == NULL ) return NULL; - l2p = to_ma(cpu, l3[l3_table_offset(va)]); + l3e = l3[l3_table_offset(va)]; + munmap(l3, PAGE_SIZE); + if ( !(l3e & _PAGE_PRESENT) ) + return NULL; + l2p = to_ma(cpu, l3e); l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); - munmap(l3, PAGE_SIZE); if ( l2 == NULL ) return NULL; l1 = NULL; - l1e = to_ma(cpu, l2[l2_table_offset(va)]); - l1p = l1e >> PAGE_SHIFT; - if (l1e & 0x80) { /* 2M pages */ + l2e = l2[l2_table_offset(va)]; + munmap(l2, PAGE_SIZE); + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); + if (l2e & 0x80) { /* 2M pages */ p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT); } else { /* 4K pages */ - //l1p = to_ma(cpu, l1e[l1_table_offset(va)]); l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT); - munmap(l2, PAGE_SIZE); if ( l1 == NULL ) return NULL; - p = to_ma(cpu, l1[l1_table_offset(va)]); - } - if ( v != NULL ) - munmap(v, PAGE_SIZE); - v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); + l1e = l1[l1_table_offset(va)]; + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); + } + if ( v[cpu] != NULL ) + munmap(v[cpu], PAGE_SIZE); + v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); if (l1) munmap(l1, PAGE_SIZE); - if ( v == NULL ) - return NULL; - - return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); + if ( v[cpu] == NULL ) + return NULL; + + return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); } #endif diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_ptrace.h Tue May 30 14:30:34 2006 -0500 @@ -7,7 +7,6 @@ #define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ #define X86_CR0_PG 0x80000000 /* Paging (RW) */ #define BSD_PAGE_MASK (PAGE_SIZE-1) -#define PDRSHIFT 22 #define PSL_T 0x00000100 /* trace enable bit */ #ifdef __x86_64__ @@ -162,8 +161,6 @@ struct gdb_regs { #endif #define printval(x) printf("%s = %lx\n", #x, (long)x); -#define vtopdi(va) ((va) >> PDRSHIFT) -#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff) #endif typedef void (*thr_ev_handler_t)(long); diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_ptrace_core.c Tue May 30 14:30:34 2006 -0500 @@ -3,6 +3,7 @@ #include <sys/ptrace.h> #include <sys/wait.h> #include "xc_private.h" +#include "xg_private.h" #include "xc_ptrace.h" #include <time.h> @@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, } cr3_virt[cpu] = v; } - if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */ + if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; @@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, return NULL; pde_virt[cpu] = v; } - if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */ + if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT; @@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, map_mtop_offset(page_phys[cpu])); if (v == MAP_FAILED) { - printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va)); + printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, l1_table_offset_i386(va)); page_phys[cpu] = 0; return NULL; } diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_tbuf.c --- a/tools/libxc/xc_tbuf.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xc_tbuf.c Tue May 30 14:30:34 2006 -0500 @@ -18,53 +18,57 @@ static int tbuf_enable(int xc_handle, int enable) { - DECLARE_DOM0_OP; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - if (enable) - op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; - else - op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + if (enable) + op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; + else + op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; - return xc_dom0_op(xc_handle, &op); + return xc_dom0_op(xc_handle, &op); } int xc_tbuf_set_size(int xc_handle, unsigned long size) { - DECLARE_DOM0_OP; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; - op.u.tbufcontrol.size = size; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; + op.u.tbufcontrol.size = size; - return xc_dom0_op(xc_handle, &op); + return xc_dom0_op(xc_handle, &op); } int xc_tbuf_get_size(int xc_handle, unsigned long *size) { - int rc; - DECLARE_DOM0_OP; + int rc; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO; - rc = xc_dom0_op(xc_handle, &op); - if (rc == 0) - *size = op.u.tbufcontrol.size; - return rc; + rc = xc_dom0_op(xc_handle, &op); + if (rc == 0) + *size = op.u.tbufcontrol.size; + return rc; } int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn, - unsigned long *size) + unsigned long *size) { DECLARE_DOM0_OP; int rc; - if ( xc_tbuf_set_size(xc_handle, cnt) != 0 ) - return -1; + /* + * Ignore errors (at least for now) as we get an error if size is already + * set (since trace buffers cannot be reallocated). If we really have no + * buffers at all then tbuf_enable() will fail, so this is safe. + */ + (void)xc_tbuf_set_size(xc_handle, cnt); if ( tbuf_enable(xc_handle, 1) != 0 ) return -1; diff -r e74246451527 -r f54d38cea8ac tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xenctrl.h Tue May 30 14:30:34 2006 -0500 @@ -359,6 +359,14 @@ int xc_sedf_domain_get(int xc_handle, uint64_t *latency, uint16_t *extratime, uint16_t *weight); +int xc_sched_credit_domain_set(int xc_handle, + uint32_t domid, + struct sched_credit_adjdom *sdom); + +int xc_sched_credit_domain_get(int xc_handle, + uint32_t domid, + struct sched_credit_adjdom *sdom); + typedef evtchn_status_t xc_evtchn_status_t; /* @@ -449,9 +457,6 @@ int xc_domain_iomem_permission(int xc_ha unsigned long first_mfn, unsigned long nr_mfns, uint8_t allow_access); - -unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, - unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ @@ -599,6 +604,6 @@ int xc_add_mmu_update(int xc_handle, xc_ unsigned long long ptr, unsigned long long val); int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu); -int xc_acm_op(int xc_handle, struct acm_op *op); +int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size); #endif diff -r e74246451527 -r f54d38cea8ac tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Tue May 30 12:52:02 2006 -0500 +++ b/tools/libxc/xg_private.h Tue May 30 14:30:34 2006 -0500 @@ -48,6 +48,8 @@ unsigned long csum_page (void * page); #define L2_PAGETABLE_SHIFT_PAE 21 #define L3_PAGETABLE_SHIFT_PAE 30 +#define L2_PAGETABLE_SHIFT_I386 22 + #if defined(__i386__) #define L1_PAGETABLE_SHIFT 12 #define L2_PAGETABLE_SHIFT 22 @@ -61,6 +63,9 @@ unsigned long csum_page (void * page); #define L1_PAGETABLE_ENTRIES_PAE 512 #define L2_PAGETABLE_ENTRIES_PAE 512 #define L3_PAGETABLE_ENTRIES_PAE 4 + +#define L1_PAGETABLE_ENTRIES_I386 1024 +#define L2_PAGETABLE_ENTRIES_I386 1024 #if defined(__i386__) #define L1_PAGETABLE_ENTRIES 1024 @@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t; #define l3_table_offset_pae(_a) \ (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1)) +#define l1_table_offset_i386(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1)) +#define l2_table_offset_i386(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1)) + #if defined(__i386__) #define l1_table_offset(_a) \ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/acm/acm.c --- a/tools/python/xen/lowlevel/acm/acm.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/lowlevel/acm/acm.c Tue May 30 14:30:34 2006 -0500 @@ -38,7 +38,7 @@ fprintf(stderr, "ERROR: " _m " (%d = %s) /* generic shared function */ void * __getssid(int domid, uint32_t *buflen) { - struct acm_op op; + struct acm_getssid getssid; int xc_handle; #define SSID_BUFFER_SIZE 4096 void *buf = NULL; @@ -51,14 +51,13 @@ void * __getssid(int domid, uint32_t *bu goto out2; } memset(buf, 0, SSID_BUFFER_SIZE); - op.cmd = ACM_GETSSID; - op.interface_version = ACM_INTERFACE_VERSION; - op.u.getssid.ssidbuf = buf; - op.u.getssid.ssidbuf_size = SSID_BUFFER_SIZE; - op.u.getssid.get_ssid_by = DOMAINID; - op.u.getssid.id.domainid = domid; - - if (xc_acm_op(xc_handle, &op) < 0) { + getssid.interface_version = ACM_INTERFACE_VERSION; + getssid.ssidbuf = buf; + getssid.ssidbuf_size = SSID_BUFFER_SIZE; + getssid.get_ssid_by = DOMAINID; + getssid.id.domainid = domid; + + if (xc_acm_op(xc_handle, ACMOP_getssid, &getssid, sizeof(getssid)) < 0) { if (errno == EACCES) PERROR("ACM operation failed."); free(buf); @@ -147,7 +146,7 @@ static PyObject *getdecision(PyObject * static PyObject *getdecision(PyObject * self, PyObject * args) { char *arg1_name, *arg1, *arg2_name, *arg2, *decision = NULL; - struct acm_op op; + struct acm_getdecision getdecision; int xc_handle; if (!PyArg_ParseTuple(args, "ssss", &arg1_name, &arg1, &arg2_name, &arg2)) { @@ -163,34 +162,33 @@ static PyObject *getdecision(PyObject * (strcmp(arg2_name, "domid") && strcmp(arg2_name, "ssidref"))) return NULL; - op.cmd = ACM_GETDECISION; - op.interface_version = ACM_INTERFACE_VERSION; - op.u.getdecision.hook = SHARING; + getdecision.interface_version = ACM_INTERFACE_VERSION; + getdecision.hook = SHARING; if (!strcmp(arg1_name, "domid")) { - op.u.getdecision.get_decision_by1 = DOMAINID; - op.u.getdecision.id1.domainid = atoi(arg1); - } else { - op.u.getdecision.get_decision_by1 = SSIDREF; - op.u.getdecision.id1.ssidref = atol(arg1); + getdecision.get_decision_by1 = DOMAINID; + getdecision.id1.domainid = atoi(arg1); + } else { + getdecision.get_decision_by1 = SSIDREF; + getdecision.id1.ssidref = atol(arg1); } if (!strcmp(arg2_name, "domid")) { - op.u.getdecision.get_decision_by2 = DOMAINID; - op.u.getdecision.id2.domainid = atoi(arg2); - } else { - op.u.getdecision.get_decision_by2 = SSIDREF; - op.u.getdecision.id2.ssidref = atol(arg2); - } - - if (xc_acm_op(xc_handle, &op) < 0) { + getdecision.get_decision_by2 = DOMAINID; + getdecision.id2.domainid = atoi(arg2); + } else { + getdecision.get_decision_by2 = SSIDREF; + getdecision.id2.ssidref = atol(arg2); + } + + if (xc_acm_op(xc_handle, ACMOP_getdecision, &getdecision, sizeof(getdecision)) < 0) { if (errno == EACCES) PERROR("ACM operation failed."); } xc_interface_close(xc_handle); - if (op.u.getdecision.acm_decision == ACM_ACCESS_PERMITTED) + if (getdecision.acm_decision == ACM_ACCESS_PERMITTED) decision = "PERMITTED"; - else if (op.u.getdecision.acm_decision == ACM_ACCESS_DENIED) + else if (getdecision.acm_decision == ACM_ACCESS_DENIED) decision = "DENIED"; return Py_BuildValue("s", decision); diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/lowlevel/xc/xc.c Tue May 30 14:30:34 2006 -0500 @@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc "weight", weight); } +static PyObject *pyxc_sched_credit_domain_set(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t domid; + uint16_t weight; + uint16_t cap; + static char *kwd_list[] = { "dom", "weight", "cap", NULL }; + static char kwd_type[] = "I|HH"; + struct sched_credit_adjdom sdom; + + weight = 0; + cap = (uint16_t)~0U; + if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, + &domid, &weight, &cap) ) + return NULL; + + sdom.weight = weight; + sdom.cap = cap; + + if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args) +{ + uint32_t domid; + struct sched_credit_adjdom sdom; + + if( !PyArg_ParseTuple(args, "I", &domid) ) + return NULL; + + if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:H,s:H}", + "weight", sdom.weight, + "cap", sdom.cap); +} + static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args) { uint32_t dom; @@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = { " slice [long]: CPU reservation per period\n" " latency [long]: domain's wakeup latency hint\n" " extratime [int]: domain aware of extratime?\n"}, + + { "sched_credit_domain_set", + (PyCFunction)pyxc_sched_credit_domain_set, + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler.\n" + " domid [int]: domain id to set\n" + " weight [short]: domain's scheduling weight\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "sched_credit_domain_get", + (PyCFunction)pyxc_sched_credit_domain_get, + METH_VARARGS, "\n" + "Get the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler.\n" + " domid [int]: domain id to get\n" + "Returns: [dict]\n" + " weight [short]: domain's scheduling weight\n"}, { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, @@ -1172,7 +1233,7 @@ PyXc_init(XcObject *self, PyObject *args PyXc_init(XcObject *self, PyObject *args, PyObject *kwds) { if ((self->xc_handle = xc_interface_open()) == -1) { - PyErr_SetFromErrno(PyExc_RuntimeError); + PyErr_SetFromErrno(xc_error); return -1; } @@ -1245,7 +1306,7 @@ PyMODINIT_FUNC initxc(void) if (m == NULL) return; - xc_error = PyErr_NewException(PKG ".error", NULL, NULL); + xc_error = PyErr_NewException(PKG ".Error", PyExc_RuntimeError, NULL); zero = PyInt_FromLong(0); /* KAF: This ensures that we get debug output in a timely manner. */ @@ -1254,6 +1315,9 @@ PyMODINIT_FUNC initxc(void) Py_INCREF(&PyXcType); PyModule_AddObject(m, CLS, (PyObject *)&PyXcType); + + Py_INCREF(xc_error); + PyModule_AddObject(m, "Error", xc_error); } diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/lowlevel/xs/xs.c Tue May 30 14:30:34 2006 -0500 @@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs if (perms) { PyObject *val = PyList_New(perms_n); - for (i = 0; i < perms_n; i++, perms++) { - PyObject *p = Py_BuildValue("{s:i,s:i,s:i}", - "dom", perms->id, - "read", perms->perms & XS_PERM_READ, - "write",perms->perms & XS_PERM_WRITE); + for (i = 0; i < perms_n; i++) { + PyObject *p = + Py_BuildValue("{s:i,s:i,s:i}", + "dom", perms[i].id, + "read", perms[i].perms & XS_PERM_READ, + "write", perms[i].perms & XS_PERM_WRITE); PyList_SetItem(val, i, p); } diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/XendDomain.py Tue May 30 14:30:34 2006 -0500 @@ -522,6 +522,28 @@ class XendDomain: except Exception, ex: raise XendError(str(ex)) + def domain_sched_credit_get(self, domid): + """Get credit scheduler parameters for a domain. + """ + dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + return xc.sched_credit_domain_get(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + + def domain_sched_credit_set(self, domid, weight, cap): + """Set credit scheduler parameters for a domain. + """ + dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + return xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap) + except Exception, ex: + raise XendError(str(ex)) + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue May 30 14:30:34 2006 -0500 @@ -29,6 +29,7 @@ import string import string import time import threading +import os import xen.lowlevel.xc from xen.util import asserts @@ -700,6 +701,16 @@ class XendDomainInfo: log.debug("Storing VM details: %s", to_store) self.writeVm(to_store) + self.setVmPermissions() + + + def setVmPermissions(self): + """Allow the guest domain to read its UUID. We don't allow it to + access any other entry, for security.""" + xstransact.SetPermissions('%s/uuid' % self.vmpath, + { 'dom' : self.domid, + 'read' : True, + 'write' : False }) def storeDomDetails(self): @@ -1264,7 +1275,14 @@ class XendDomainInfo: m = self.image.getDomainMemory(self.info['memory'] * 1024) balloon.free(m) xc.domain_setmaxmem(self.domid, m) - xc.domain_memory_increase_reservation(self.domid, m, 0, 0) + + init_reservation = self.info['memory'] * 1024 + if os.uname()[4] == 'ia64': + # Workaround until ia64 properly supports ballooning. + init_reservation = m + + xc.domain_memory_increase_reservation(self.domid, init_reservation, + 0, 0) self.createChannels() @@ -1527,6 +1545,10 @@ class XendDomainInfo: self.configure_bootloader() config = self.sxpr() + + if self.infoIsSet('cpus') and len(self.info['cpus']) != 0: + config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y), + self.info['cpus'])]) if self.readVm(RESTART_IN_PROGRESS): log.error('Xend failed during restart of domain %d. ' diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/balloon.py Tue May 30 14:30:34 2006 -0500 @@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB. We need th BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are # rounded. RETRY_LIMIT = 10 +RETRY_LIMIT_INCR = 5 ## # The time to sleep between retries grows linearly, using this value (in # seconds). When the system is lightly loaded, memory should be scrubbed and @@ -118,7 +119,8 @@ def free(required): retries = 0 sleep_time = SLEEP_TIME_GROWTH last_new_alloc = None - while retries < RETRY_LIMIT: + rlimit = RETRY_LIMIT + while retries < rlimit: free_mem = xc.physinfo()['free_memory'] if free_mem >= need_mem: @@ -127,7 +129,9 @@ def free(required): return if retries == 0: - log.debug("Balloon: free %d; need %d.", free_mem, need_mem) + rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR + log.debug("Balloon: free %d; need %d; retries: %d.", + free_mem, need_mem, rlimit) if dom0_min_mem > 0: dom0_alloc = get_dom0_current_alloc() @@ -143,8 +147,9 @@ def free(required): # Continue to retry, waiting for ballooning. time.sleep(sleep_time) + if retries < 2 * RETRY_LIMIT: + sleep_time += SLEEP_TIME_GROWTH retries += 1 - sleep_time += SLEEP_TIME_GROWTH # Not enough memory; diagnose the problem. if dom0_min_mem == 0: diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/image.py Tue May 30 14:30:34 2006 -0500 @@ -19,6 +19,7 @@ import os, string import re +import math import xen.lowlevel.xc from xen.xend import sxp @@ -141,11 +142,16 @@ class ImageHandler: % (self.ostype, self.vm.getDomid(), str(result))) - def getDomainMemory(self, mem): + def getDomainMemory(self, mem_kb): """@return The memory required, in KiB, by the domain to store the - given amount, also in KiB. This is normally just mem, but HVM domains - have overheads to account for.""" - return mem + given amount, also in KiB.""" + if os.uname()[4] != 'ia64': + # A little extra because auto-ballooning is broken w.r.t. HVM + # guests. Also, slack is necessary for live migration since that + # uses shadow page tables. + if 'hvm' in xc.xeninfo()['xen_caps']: + mem_kb += 4*1024; + return mem_kb def buildDomain(self): """Build the domain. Define in subclass.""" @@ -377,15 +383,20 @@ class HVMImageHandler(ImageHandler): os.waitpid(self.pid, 0) self.pid = 0 - def getDomainMemory(self, mem): + def getDomainMemory(self, mem_kb): """@see ImageHandler.getDomainMemory""" - page_kb = 4 - extra_pages = 0 if os.uname()[4] == 'ia64': page_kb = 16 # ROM size for guest firmware, ioreq page and xenstore page extra_pages = 1024 + 2 - return mem + extra_pages * page_kb + else: + page_kb = 4 + # This was derived emperically: + # 2.4 MB overhead per 1024 MB RAM + 8 MB constant + # + 4 to avoid low-memory condition + extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12; + extra_pages = int( math.ceil( extra_mb*1024 / page_kb )) + return mem_kb + extra_pages * page_kb def register_shutdown_watch(self): """ add xen store watch on control/shutdown """ diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/server/SrvDomain.py Tue May 30 14:30:34 2006 -0500 @@ -129,6 +129,20 @@ class SrvDomain(SrvDir): ['latency', 'int'], ['extratime', 'int'], ['weight', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + + def op_domain_sched_credit_get(self, _, req): + fn = FormFn(self.xd.domain_sched_credit_get, + [['dom', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + + + def op_domain_sched_credit_set(self, _, req): + fn = FormFn(self.xd.domain_sched_credit_set, + [['dom', 'int'], + ['weight', 'int']]) val = fn(req.args, {'dom': self.dom.domid}) return val diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xend/xenstore/xstransact.py Tue May 30 14:30:34 2006 -0500 @@ -221,6 +221,34 @@ class xstransact: xshandle().mkdir(self.transaction, self.prependPath(key)) + def get_permissions(self, *args): + """If no arguments are given, return the permissions at this + transaction's path. If one argument is given, treat that argument as + a subpath to this transaction's path, and return the permissions at + that path. Otherwise, treat each argument as a subpath to this + transaction's path, and return a list composed of the permissions at + each of those instead. + """ + if len(args) == 0: + return xshandle().get_permissions(self.transaction, self.path) + if len(args) == 1: + return self._get_permissions(args[0]) + ret = [] + for key in args: + ret.append(self._get_permissions(key)) + return ret + + + def _get_permissions(self, key): + path = self.prependPath(key) + try: + return xshandle().get_permissions(self.transaction, path) + except RuntimeError, ex: + raise RuntimeError(ex.args[0], + '%s, while getting permissions from %s' % + (ex.args[1], path)) + + def set_permissions(self, *args): if len(args) == 0: raise TypeError diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/python/xen/xm/main.py Tue May 30 14:30:34 2006 -0500 @@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT specifies another way of setting a domain's\n\ cpu period/slice." +sched_credit_help = "sched-credit Set or get credit scheduler parameters" block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode> [BackDomId] Create a new virtual block device""" block_detach_help = """block-detach <DomId> <DevId> Destroy a domain's virtual block device, @@ -174,6 +175,7 @@ host_commands = [ ] scheduler_commands = [ + "sched-credit", "sched-bvt", "sched-bvt-ctxallow", "sched-sedf", @@ -735,6 +737,48 @@ def xm_sched_sedf(args): else: print_sedf(sedf_info) +def xm_sched_credit(args): + usage_msg = """sched-credit: Set or get credit scheduler parameters + Usage: + + sched-credit -d domain [-w weight] [-c cap] + """ + try: + opts, args = getopt.getopt(args[0:], "d:w:c:", + ["domain=", "weight=", "cap="]) + except getopt.GetoptError: + # print help information and exit: + print usage_msg + sys.exit(1) + + domain = None + weight = None + cap = None + + for o, a in opts: + if o == "-d": + domain = a + elif o == "-w": + weight = int(a) + elif o == "-c": + cap = int(a); + + if domain is None: + # place holder for system-wide scheduler parameters + print usage_msg + sys.exit(1) + + if weight is None and cap is None: + print server.xend.domain.sched_credit_get(domain) + else: + if weight is None: + weight = int(0) + if cap is None: + cap = int(~0) + + err = server.xend.domain.sched_credit_set(domain, weight, cap) + if err != 0: + print err def xm_info(args): arg_check(args, "info", 0) @@ -1032,6 +1076,7 @@ commands = { "sched-bvt": xm_sched_bvt, "sched-bvt-ctxallow": xm_sched_bvt_ctxallow, "sched-sedf": xm_sched_sedf, + "sched-credit": xm_sched_credit, # block "block-attach": xm_block_attach, "block-detach": xm_block_detach, diff -r e74246451527 -r f54d38cea8ac tools/security/secpol_tool.c --- a/tools/security/secpol_tool.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/security/secpol_tool.c Tue May 30 14:30:34 2006 -0500 @@ -231,14 +231,16 @@ uint8_t pull_buffer[PULL_CACHE_SIZE]; uint8_t pull_buffer[PULL_CACHE_SIZE]; int acm_domain_getpolicy(int xc_handle) { - struct acm_op op; + struct acm_getpolicy getpolicy; int ret; memset(pull_buffer, 0x00, sizeof(pull_buffer)); - op.cmd = ACM_GETPOLICY; - op.u.getpolicy.pullcache = (void *) pull_buffer; - op.u.getpolicy.pullcache_size = sizeof(pull_buffer); - if ((ret = xc_acm_op(xc_handle, &op)) < 0) { + getpolicy.interface_version = ACM_INTERFACE_VERSION; + getpolicy.pullcache = (void *) pull_buffer; + getpolicy.pullcache_size = sizeof(pull_buffer); + ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy)); + + if (ret < 0) { printf("ACM operation failed: errno=%d\n", errno); if (errno == EACCES) fprintf(stderr, "ACM operation failed -- need to" @@ -275,13 +277,13 @@ int acm_domain_loadpolicy(int xc_handle, goto free_out; } if (len == read(fd, buffer, len)) { - struct acm_op op; + struct acm_setpolicy setpolicy; /* dump it and then push it down into xen/acm */ acm_dump_policy_buffer(buffer, len); - op.cmd = ACM_SETPOLICY; - op.u.setpolicy.pushcache = (void *) buffer; - op.u.setpolicy.pushcache_size = len; - ret = xc_acm_op(xc_handle, &op); + setpolicy.interface_version = ACM_INTERFACE_VERSION; + setpolicy.pushcache = (void *) buffer; + setpolicy.pushcache_size = len; + ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, sizeof(setpolicy)); if (ret) printf @@ -322,15 +324,15 @@ int acm_domain_dumpstats(int xc_handle) int acm_domain_dumpstats(int xc_handle) { uint8_t stats_buffer[PULL_STATS_SIZE]; - struct acm_op op; + struct acm_dumpstats dumpstats; int ret; struct acm_stats_buffer *stats; memset(stats_buffer, 0x00, sizeof(stats_buffer)); - op.cmd = ACM_DUMPSTATS; - op.u.dumpstats.pullcache = (void *) stats_buffer; - op.u.dumpstats.pullcache_size = sizeof(stats_buffer); - ret = xc_acm_op(xc_handle, &op); + dumpstats.interface_version = ACM_INTERFACE_VERSION; + dumpstats.pullcache = (void *) stats_buffer; + dumpstats.pullcache_size = sizeof(stats_buffer); + ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats)); if (ret < 0) { printf diff -r e74246451527 -r f54d38cea8ac tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/tests/test_x86_emulator.c Tue May 30 14:30:34 2006 -0500 @@ -17,7 +17,8 @@ static int read_any( static int read_any( unsigned long addr, unsigned long *val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -32,7 +33,8 @@ static int write_any( static int write_any( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -48,7 +50,8 @@ static int cmpxchg_any( unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -65,34 +68,38 @@ static int cmpxchg8b_any( unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, - unsigned long new_hi) + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt) { ((unsigned long *)addr)[0] = new_lo; ((unsigned long *)addr)[1] = new_hi; return X86EMUL_CONTINUE; } -static struct x86_mem_emulator emulops = { +static struct x86_emulate_ops emulops = { read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any }; int main(int argc, char **argv) { + struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ unsigned int res = 0x7FFFFFFF; u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 }; - unsigned long cr2; int rc; + + ctxt.regs = ®s; + ctxt.mode = X86EMUL_MODE_PROT32; printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x7FFFFFFF; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x92345677) || (regs.eflags != 0xa94) || @@ -109,8 +116,8 @@ int main(int argc, char **argv) #else regs.ecx = 0x12345678UL; #endif - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x92345677) || (regs.ecx != 0x8000000FUL) || @@ -124,8 +131,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x92345677UL; regs.ecx = 0xAA; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x923456AA) || (regs.eflags != 0x244) || @@ -140,8 +147,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || @@ -156,8 +163,8 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x12345678) || (regs.eflags != 0x200) || @@ -173,8 +180,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0xDDEEFF00) || (regs.eflags != 0x244) || @@ -192,8 +199,8 @@ int main(int argc, char **argv) regs.esi = (unsigned long)&res + 0; regs.edi = (unsigned long)&res + 2; regs.error_code = 0; /* read fault */ - cr2 = regs.esi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.esi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x44554455) || (regs.eflags != 0x200) || @@ -210,8 +217,8 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)&res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || @@ -228,8 +235,8 @@ int main(int argc, char **argv) regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)cmpxchg8b_res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (cmpxchg8b_res[0] != 0x9999AAAA) || (cmpxchg8b_res[1] != 0xCCCCFFFF) || @@ -242,8 +249,8 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)cmpxchg8b_res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (cmpxchg8b_res[0] != 0x9999AAAA) || (cmpxchg8b_res[1] != 0xCCCCFFFF) || @@ -258,9 +265,9 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x82; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x82) || (regs.ecx != 0xFFFFFF82) || @@ -273,9 +280,9 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x1234aa82; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x1234aa82) || (regs.ecx != 0xaa82) || diff -r e74246451527 -r f54d38cea8ac tools/xenstore/Makefile --- a/tools/xenstore/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/tools/xenstore/Makefile Tue May 30 14:30:34 2006 -0500 @@ -27,6 +27,12 @@ CLIENTS += xenstore-write CLIENTS += xenstore-write CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS)) +XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o + +XENSTORED_Linux = xenstored_linux.o + +XENSTORED_OBJS += $(XENSTORED_$(OS)) + .PHONY: all all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls @@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte .PHONY: testcode testcode: xs_test xenstored_test xs_random -xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o +xenstored: $(XENSTORED_OBJS) $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/xenstore/xenstored_core.c Tue May 30 14:30:34 2006 -0500 @@ -451,6 +451,11 @@ static struct node *read_node(struct con static bool write_node(struct connection *conn, const struct node *node) { + /* + * conn will be null when this is called from manual_node. + * tdb_context copes with this. + */ + TDB_DATA key, data; void *p; @@ -478,7 +483,7 @@ static bool write_node(struct connection /* TDB should set errno, but doesn't even set ecode AFAICT. */ if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) { - corrupt(conn, "Write of %s = %s failed", key, data); + corrupt(conn, "Write of %s failed", key.dptr); goto error; } return true; diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Tue May 30 12:52:02 2006 -0500 +++ b/tools/xenstore/xenstored_core.h Tue May 30 14:30:34 2006 -0500 @@ -19,6 +19,8 @@ #ifndef _XENSTORED_CORE_H #define _XENSTORED_CORE_H + +#include <xenctrl.h> #include <sys/types.h> #include <dirent.h> @@ -163,6 +165,12 @@ void trace(const char *fmt, ...); extern int event_fd; +/* Map the kernel's xenstore page. */ +void *xenbus_map(void); + +/* Return the event channel used by xenbus. */ +evtchn_port_t xenbus_evtchn(void); + #endif /* _XENSTORED_CORE_H */ /* diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Tue May 30 12:52:02 2006 -0500 +++ b/tools/xenstore/xenstored_domain.c Tue May 30 14:30:34 2006 -0500 @@ -33,12 +33,11 @@ #include "talloc.h" #include "xenstored_core.h" #include "xenstored_domain.h" -#include "xenstored_proc.h" #include "xenstored_watch.h" #include "xenstored_test.h" #include <xenctrl.h> -#include <xen/linux/evtchn.h> +#include <xen/sys/evtchn.h> static int *xc_handle; static evtchn_port_t virq_port; @@ -476,44 +475,24 @@ void restore_existing_connections(void) static int dom0_init(void) { - int rc, fd; - evtchn_port_t port; - char str[20]; - struct domain *dom0; - - fd = open(XENSTORED_PROC_PORT, O_RDONLY); - if (fd == -1) + evtchn_port_t port; + struct domain *dom0; + + port = xenbus_evtchn(); + if (port == -1) return -1; - rc = read(fd, str, sizeof(str)); - if (rc == -1) - goto outfd; - str[rc] = '\0'; - port = strtoul(str, NULL, 0); - - close(fd); - dom0 = new_domain(NULL, 0, port); - fd = open(XENSTORED_PROC_KVA, O_RDWR); - if (fd == -1) + dom0->interface = xenbus_map(); + if (dom0->interface == NULL) return -1; - dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, - MAP_SHARED, fd, 0); - if (dom0->interface == MAP_FAILED) - goto outfd; - - close(fd); - talloc_steal(dom0->conn, dom0); evtchn_notify(dom0->port); return 0; -outfd: - close(fd); - return -1; } diff -r e74246451527 -r f54d38cea8ac tools/xentrace/xentrace_format --- a/tools/xentrace/xentrace_format Tue May 30 12:52:02 2006 -0500 +++ b/tools/xentrace/xentrace_format Tue May 30 14:30:34 2006 -0500 @@ -89,7 +89,7 @@ CPUREC = "I" CPUREC = "I" TRCREC = "QLLLLLL" -last_tsc = [0,0,0,0,0,0,0,0] +last_tsc = [0] i=0 @@ -111,7 +111,9 @@ while not interrupted: #print i, tsc - if tsc < last_tsc[cpu]: + if cpu >= len(last_tsc): + last_tsc += [0] * (cpu - len(last_tsc) + 1) + elif tsc < last_tsc[cpu]: print "TSC stepped backward cpu %d ! %d %d" % (cpu,tsc,last_tsc[cpu]) last_tsc[cpu] = tsc diff -r e74246451527 -r f54d38cea8ac tools/xm-test/configure.ac --- a/tools/xm-test/configure.ac Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/configure.ac Tue May 30 14:30:34 2006 -0500 @@ -99,6 +99,7 @@ AC_CONFIG_FILES([ tests/block-list/Makefile tests/block-create/Makefile tests/block-destroy/Makefile + tests/block-integrity/Makefile tests/console/Makefile tests/create/Makefile tests/destroy/Makefile diff -r e74246451527 -r f54d38cea8ac tools/xm-test/ramdisk/bin/create_disk_image --- a/tools/xm-test/ramdisk/bin/create_disk_image Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/ramdisk/bin/create_disk_image Tue May 30 14:30:34 2006 -0500 @@ -207,6 +207,13 @@ function dd_rootfs_to_image() dd if="$ROOTFS" of="$LOOPP" > /dev/null 2>&1 if [ $? -ne 0 ]; then die "Failed to dd $ROOTFS to $LOOPP." + fi + + # Resize fs to use full partition + e2fsck -f $LOOPP + resize2fs $LOOPP + if [ $? -ne 0 ]; then + die "Failed to resize rootfs on $LOOPP." fi } diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/Makefile.am --- a/tools/xm-test/tests/Makefile.am Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/Makefile.am Tue May 30 14:30:34 2006 -0500 @@ -1,14 +1,15 @@ SUBDIRS = \ SUBDIRS = \ block-create \ - block-list \ - block-destroy \ + block-list \ + block-destroy \ + block-integrity \ console \ create \ destroy \ dmesg \ domid \ domname \ - help \ + help \ info \ list \ memmax \ diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py --- a/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py Tue May 30 14:30:34 2006 -0500 @@ -65,13 +65,24 @@ if check_status and status != 0: FAIL("\"%s\" returned invalid %i != 0" %(cmd,status)) # 5) check /proc/cpuinfo for cpu count -cmd = "grep \"^processor\" /proc/cpuinfo | wc -l" -status, output = traceCommand(cmd) -if check_status and status != 0: - os.unsetenv("XEND_CONFIG") - restartXend() - FAIL("\"%s\" returned invalid %i != 0" %(cmd,status)) +# It takes some time for the CPU count to change, on multi-proc systems, so check the number of procs in a loop for 20 seconds. +#Sleep inside the loop for a second each time. +timeout = 20 +starttime = time.time() +while timeout + starttime > time.time(): +# Check /proc/cpuinfo + cmd = "grep \"^processor\" /proc/cpuinfo | wc -l" + status, output = traceCommand(cmd) + if check_status and status != 0: + os.unsetenv("XEND_CONFIG") + restartXend() + FAIL("\"%s\" returned invalid %i != 0" %(cmd,status)) +# Has it succeeded? If so, we can leave the loop + if output == str(enforce_dom0_cpus): + break +# Sleep for 1 second before trying again + time.sleep(1) if output != str(enforce_dom0_cpus): os.unsetenv("XEND_CONFIG") restartXend() @@ -94,7 +105,14 @@ if check_status and status != 0: FAIL("\"%s\" returned invalid %i != 0" %(cmd,status)) # check restore worked -num_online = int(getDomInfo("Domain-0", "VCPUs")) +# Since this also takes time, we will do it in a loop with a 20 second timeout. +timeout=20 +starttime=time.time() +while timeout + starttime > time.time(): + num_online = int(getDomInfo("Domain-0", "VCPUs")) + if num_online == dom0_online_vcpus: + break + time.sleep(1) if num_online != dom0_online_vcpus: os.unsetenv("XEND_CONFIG") restartXend() diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/03_network_local_tcp_pos.py --- a/tools/xm-test/tests/network/03_network_local_tcp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/03_network_local_tcp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -44,7 +44,7 @@ try: lofails="" for size in trysizes: out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -q -c 20 " - + "--fast -d " + str(size)) + + "--fast -d " + str(size) + " -N " + str(size)) if out["return"]: lofails += " " + str(size) @@ -54,7 +54,7 @@ try: ip = netdev.getNetDevIP() for size in trysizes: out = console.runCmd("hping2 " + ip + " -E /dev/urandom -q -c 20 " - + "--fast -d "+ str(size)) + + "--fast -d "+ str(size) + " -N " + str(size)) if out["return"]: eth0fails += " " + str(size) except ConsoleError, e: diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/04_network_local_udp_pos.py --- a/tools/xm-test/tests/network/04_network_local_udp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/04_network_local_udp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -43,7 +43,7 @@ try: lofails="" for size in trysizes: out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -2 -q -c 20 " - + "--fast -d " + str(size)) + + "--fast -d " + str(size) + " -N " + str(size)) if out["return"]: lofails += " " + str(size) print out["output"] @@ -54,7 +54,7 @@ try: ip = netdev.getNetDevIP() for size in trysizes: out = console.runCmd("hping2 " + ip + " -E /dev/urandom -2 -q -c 20 " - + "--fast -d " + str(size)) + + "--fast -d " + str(size) + " -N " + str(size)) if out["return"]: eth0fails += " " + str(size) print out["output"] diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/06_network_dom0_tcp_pos.py --- a/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -44,7 +44,7 @@ try: dom0ip = netdev.getDom0AliasIP() for size in trysizes: out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -q -c 20 " - + "--fast -d " + str(size)) + + "--fast -d " + str(size) + " -N " + str(size)) if out["return"]: fails += " " + str(size) print out["output"] diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/07_network_dom0_udp_pos.py --- a/tools/xm-test/tests/network/07_network_dom0_udp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/07_network_dom0_udp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -43,7 +43,7 @@ try: dom0ip = netdev.getDom0AliasIP() for size in trysizes: out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -2 -q -c 20" - + " --fast -d " + str(size)) + + " --fast -d " + str(size) + " -N " + str(size)) if out["return"]: fails += " " + str(size) print out["output"] diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/12_network_domU_tcp_pos.py --- a/tools/xm-test/tests/network/12_network_domU_tcp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/12_network_domU_tcp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -50,7 +50,7 @@ try: ip2 = dst_netdev.getNetDevIP() for size in pingsizes: out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -q -c 20 " - + "--fast -d " + str(size)) + + "--fast -d " + str(size) + " -N " + str(size)) if out["return"]: fails += " " + str(size) print out["output"] diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/network/13_network_domU_udp_pos.py --- a/tools/xm-test/tests/network/13_network_domU_udp_pos.py Tue May 30 12:52:02 2006 -0500 +++ b/tools/xm-test/tests/network/13_network_domU_udp_pos.py Tue May 30 14:30:34 2006 -0500 @@ -50,7 +50,7 @@ try: ip2 = dst_netdev.getNetDevIP() for size in pingsizes: out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -2 -q " - + "-c 20 --fast -d " + str(size)) + + "-c 20 --fast -d " + str(size) + " -N " + str(size)) if out["return"]: fails += " " + str(size) print out["output"] diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/Rules.mk Tue May 30 14:30:34 2006 -0500 @@ -3,31 +3,31 @@ HAS_ACPI := y VALIDATE_VT ?= n -xen_ia64_dom0_virtual_physical ?= n +xen_ia64_dom0_virtual_physical ?= y +no_warns ?= n ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux- endif -AFLAGS += -D__ASSEMBLY__ -nostdinc $(CPPFLAGS) -AFLAGS += -mconstant-gp -CPPFLAGS += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64 \ - -I$(BASEDIR)/include/asm-ia64/linux \ - -I$(BASEDIR)/include/asm-ia64/linux-xen \ - -I$(BASEDIR)/include/asm-ia64/linux-null \ - -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen + +# Used only by linux/Makefile. +AFLAGS_KERNEL += -mconstant-gp + +# Note: .S -> .o rule uses AFLAGS and CFLAGS. CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing CFLAGS += -mconstant-gp #CFLAGS += -O3 # -O3 over-inlines making debugging tough! CFLAGS += -O2 # but no optimization causes compile errors! -#CFLAGS += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE) -CFLAGS += -iwithprefix include -Wall -CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -CFLAGS += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \ +CFLAGS += -fomit-frame-pointer -D__KERNEL__ +CFLAGS += -iwithprefix include +CPPFLAGS+= -I$(BASEDIR)/include \ + -I$(BASEDIR)/include/asm-ia64 \ -I$(BASEDIR)/include/asm-ia64/linux \ -I$(BASEDIR)/include/asm-ia64/linux-xen \ -I$(BASEDIR)/include/asm-ia64/linux-null \ -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen +CFLAGS += $(CPPFLAGS) #CFLAGS += -Wno-pointer-arith -Wredundant-decls CFLAGS += -DIA64 -DXEN -DLINUX_2_6 -DV_IOSAPIC_READY CFLAGS += -ffixed-r13 -mfixed-range=f2-f5,f12-f127 @@ -39,4 +39,8 @@ ifeq ($(xen_ia64_dom0_virtual_physical), ifeq ($(xen_ia64_dom0_virtual_physical),y) CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP endif +ifeq ($(no_warns),y) +CFLAGS += -Wa,--fatal-warnings +endif + LDFLAGS := -g diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/asm-offsets.c Tue May 30 14:30:34 2006 -0500 @@ -50,8 +50,6 @@ void foo(void) DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0)); DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm)); DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva)); - DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, arch.dtlb_pte)); - DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, arch.itlb_pte)); DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0])); DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3])); DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3])); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/setup.c --- a/xen/arch/ia64/linux-xen/setup.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/linux-xen/setup.c Tue May 30 14:30:34 2006 -0500 @@ -800,8 +800,7 @@ cpu_init (void) cpu_data = per_cpu_init(); #ifdef XEN - printf ("cpu_init: current=%p, current->domain->arch.mm=%p\n", - current, current->domain->arch.mm); + printf ("cpu_init: current=%p\n", current); #endif /* @@ -872,12 +871,11 @@ cpu_init (void) #ifndef XEN current->active_mm = &init_mm; #endif -#ifdef XEN - if (current->domain->arch.mm) -#else +#ifndef XEN if (current->mm) -#endif BUG(); +#endif + #ifdef XEN ia64_fph_enable(); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/smp.c --- a/xen/arch/ia64/linux-xen/smp.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/linux-xen/smp.c Tue May 30 14:30:34 2006 -0500 @@ -53,28 +53,6 @@ #endif #ifdef XEN -// FIXME: MOVE ELSEWHERE -//Huh? This seems to be used on ia64 even if !CONFIG_SMP -void flush_tlb_mask(cpumask_t mask) -{ - int cpu; - - cpu = smp_processor_id(); - if (cpu_isset (cpu, mask)) { - cpu_clear(cpu, mask); - local_flush_tlb_all (); - } - -#ifdef CONFIG_SMP - if (cpus_empty(mask)) - return; - - for (cpu = 0; cpu < NR_CPUS; ++cpu) - if (cpu_isset(cpu, mask)) - smp_call_function_single - (cpu, (void (*)(void *))local_flush_tlb_all, NULL, 1, 1); -#endif -} //#if CONFIG_SMP || IA64 #if CONFIG_SMP //Huh? This seems to be used on ia64 even if !CONFIG_SMP @@ -276,7 +254,6 @@ smp_send_reschedule (int cpu) { platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } -#endif void smp_flush_tlb_all (void) @@ -284,15 +261,6 @@ smp_flush_tlb_all (void) on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); } -#ifdef XEN -void -smp_vhpt_flush_all(void) -{ - on_each_cpu((void (*)(void *))vhpt_flush, NULL, 1, 1); -} -#endif - -#ifndef XEN void smp_flush_tlb_mm (struct mm_struct *mm) { diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/unaligned.c --- a/xen/arch/ia64/linux-xen/unaligned.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/linux-xen/unaligned.c Tue May 30 14:30:34 2006 -0500 @@ -377,7 +377,7 @@ get_rse_reg (struct pt_regs *regs, unsig if (ridx >= sof) { /* read of out-of-frame register returns an undefined value; 0 in our case. */ DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); - panic("wrong stack register number (iip=%p)\n", regs->cr_iip); + panic("wrong stack register number (iip=%lx)\n", regs->cr_iip); } if (ridx < sor) diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/pal_emul.c --- a/xen/arch/ia64/vmx/pal_emul.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/pal_emul.c Tue May 30 14:30:34 2006 -0500 @@ -62,8 +62,8 @@ pal_cache_flush (VCPU *vcpu) { // ia64_pal_call_static(gr28 ,gr29, gr30, // result.v1,1LL); // } - while (result.status != 0) { - panic("PAL_CACHE_FLUSH ERROR, status %ld", result.status); + if(result.status != 0) { + panic_domain(vcpu_regs(vcpu),"PAL_CACHE_FLUSH ERROR, status %ld", result.status); } return result; @@ -445,7 +445,7 @@ pal_emul( VCPU *vcpu) { break; default: - panic("pal_emul(): guest call unsupported pal" ); + panic_domain(vcpu_regs(vcpu),"pal_emul(): guest call unsupported pal" ); } set_pal_result (vcpu, result); } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vlsapic.c --- a/xen/arch/ia64/vmx/vlsapic.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vlsapic.c Tue May 30 14:30:34 2006 -0500 @@ -568,7 +568,7 @@ int vmx_check_pending_irq(VCPU *vcpu) if ( vpsr.i && IRQ_NO_MASKED == mask ) { isr = vpsr.val & IA64_PSR_RI; if ( !vpsr.ic ) - panic("Interrupt when IC=0\n"); + panic_domain(regs,"Interrupt when IC=0\n"); vmx_reflect_interruption(0,isr,0, 12, regs ); // EXT IRQ injected = 1; } @@ -595,7 +595,8 @@ void guest_write_eoi(VCPU *vcpu) uint64_t spsr; vec = highest_inservice_irq(vcpu); - if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n"); + if ( vec == NULL_VECTOR ) + panic_domain(vcpu_regs(vcpu),"Wrong vector to EOI\n"); local_irq_save(spsr); VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63)); local_irq_restore(spsr); @@ -634,7 +635,7 @@ static void generate_exirq(VCPU *vcpu) update_vhpi(vcpu, NULL_VECTOR); isr = vpsr.val & IA64_PSR_RI; if ( !vpsr.ic ) - panic("Interrupt when IC=0\n"); + panic_domain(regs,"Interrupt when IC=0\n"); vmx_reflect_interruption(0,isr,0, 12, regs); // EXT IRQ } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmmu.c Tue May 30 14:30:34 2006 -0500 @@ -134,11 +134,11 @@ static void init_domain_vhpt(struct vcpu void * vbase; page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0); if ( page == NULL ) { - panic("No enough contiguous memory for init_domain_vhpt\n"); + panic_domain(vcpu_regs(v),"No enough contiguous memory for init_domain_vhpt\n"); } vbase = page_to_virt(page); memset(vbase, 0, VCPU_VHPT_SIZE); - printk("Allocate domain tlb at 0x%p\n", vbase); + printk("Allocate domain vhpt at 0x%p\n", vbase); VHPT(v,hash) = vbase; VHPT(v,hash_sz) = VCPU_VHPT_SIZE/2; @@ -157,11 +157,11 @@ void init_domain_tlb(struct vcpu *v) init_domain_vhpt(v); page = alloc_domheap_pages (NULL, VCPU_VTLB_ORDER, 0); if ( page == NULL ) { - panic("No enough contiguous memory for init_domain_tlb\n"); + panic_domain(vcpu_regs(v),"No enough contiguous memory for init_domain_tlb\n"); } vbase = page_to_virt(page); memset(vbase, 0, VCPU_VTLB_SIZE); - printk("Allocate domain tlb at 0x%p\n", vbase); + printk("Allocate domain vtlb at 0x%p\n", vbase); VTLB(v,hash) = vbase; VTLB(v,hash_sz) = VCPU_VTLB_SIZE/2; @@ -202,7 +202,7 @@ void machine_tlb_insert(struct vcpu *d, mtlb.ppn = get_mfn(d->domain,tlb->ppn); mtlb_ppn=mtlb.ppn; if (mtlb_ppn == INVALID_MFN) - panic("Machine tlb insert with invalid mfn number.\n"); + panic_domain(vcpu_regs(d),"Machine tlb insert with invalid mfn number.\n"); psr = ia64_clear_ic(); if ( cl == ISIDE_TLB ) { @@ -325,12 +325,12 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod } if( gpip){ mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); - if( mfn == INVALID_MFN ) panic("fetch_code: invalid memory\n"); + if( mfn == INVALID_MFN ) panic_domain(vcpu_regs(vcpu),"fetch_code: invalid memory\n"); vpa =(u64 *)__va( (gip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT)); }else{ tlb = vhpt_lookup(gip); if( tlb == NULL) - panic("No entry found in ITLB and DTLB\n"); + panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n"); vpa =(u64 *)__va((tlb->ppn>>(PAGE_SHIFT-ARCH_PAGE_SHIFT)<<PAGE_SHIFT)|(gip&(PAGE_SIZE-1))); } *code1 = *vpa++; @@ -347,7 +347,7 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UIN slot = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); if (slot >=0) { // generate MCA. - panic("Tlb conflict!!"); + panic_domain(vcpu_regs(vcpu),"Tlb conflict!!"); return IA64_FAULT; } thash_purge_and_insert(vcpu, pte, itir, ifa); @@ -363,7 +363,7 @@ IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UIN slot = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); if (slot >=0) { // generate MCA. - panic("Tlb conflict!!"); + panic_domain(vcpu_regs(vcpu),"Tlb conflict!!"); return IA64_FAULT; } gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; @@ -385,7 +385,7 @@ IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64 index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); if (index >=0) { // generate MCA. - panic("Tlb conflict!!"); + panic_domain(vcpu_regs(vcpu),"Tlb conflict!!"); return IA64_FAULT; } thash_purge_entries(vcpu, va, ps); @@ -407,7 +407,7 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64 index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); if (index>=0) { // generate MCA. - panic("Tlb conflict!!"); + panic_domain(vcpu_regs(vcpu),"Tlb conflict!!"); return IA64_FAULT; } thash_purge_entries(vcpu, va, ps); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_entry.S --- a/xen/arch/ia64/vmx/vmx_entry.S Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_entry.S Tue May 30 14:30:34 2006 -0500 @@ -290,10 +290,59 @@ GLOBAL_ENTRY(ia64_leave_hypervisor) mov ar.ccv=r18 ;; //rbs_switch - // loadrs has already been shifted + + shr.u r18=r20,16 + ;; + movl r19= THIS_CPU(ia64_phys_stacked_size_p8) + ;; + ld4 r19=[r19] + +vmx_dont_preserve_current_frame: +/* + * To prevent leaking bits between the hypervisor and guest domain, + * we must clear the stacked registers in the "invalid" partition here. + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +# define Nregs 14 + + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) +vmx_rse_clear_invalid: + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=vmx_rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 + +# undef pRecurse +# undef pReturn + +// loadrs has already been shifted alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - mov ar.rsc=r20 ;; loadrs ;; @@ -315,7 +364,9 @@ vmx_dorfirfi_back: adds r18=IA64_VPD_BASE_OFFSET,r21 ;; ld8 r18=[r18] //vpd - ;; + adds r17=IA64_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] adds r19=VPD(VPSR),r18 ;; ld8 r19=[r19] //vpsr @@ -331,12 +382,14 @@ vmx_dorfirfi_back: mov b0=r16 br.cond.sptk b0 // call the service ;; +END(ia64_leave_hypervisor) switch_rr7: // fall through GLOBAL_ENTRY(ia64_vmm_entry) /* * must be at bank 0 * parameter: + * r17:cr.isr * r18:vpd * r19:vpsr * r20:__vsa_base @@ -348,13 +401,19 @@ GLOBAL_ENTRY(ia64_vmm_entry) tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic ;; (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p1) br.sptk.many ia64_vmm_entry_out + ;; + tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 ;; +ia64_vmm_entry_out: mov pr=r23,-2 mov b0=r29 ;; br.cond.sptk b0 // call pal service -END(ia64_leave_hypervisor) +END(ia64_vmm_entry) //r24 rfi_pfs //r17 address of rfi_pfs diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_init.c --- a/xen/arch/ia64/vmx/vmx_init.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_init.c Tue May 30 14:30:34 2006 -0500 @@ -208,8 +208,9 @@ vmx_create_vp(struct vcpu *v) ivt_base = (u64) &vmx_ia64_ivt; printk("ivt_base: 0x%lx\n", ivt_base); ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0); - if (ret != PAL_STATUS_SUCCESS) - panic("ia64_pal_vp_create failed. \n"); + if (ret != PAL_STATUS_SUCCESS){ + panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n"); + } } /* Other non-context related tasks can be done in context switch */ @@ -220,8 +221,9 @@ vmx_save_state(struct vcpu *v) /* FIXME: about setting of pal_proc_vector... time consuming */ status = ia64_pal_vp_save((u64 *)v->arch.privregs, 0); - if (status != PAL_STATUS_SUCCESS) - panic("Save vp status failed\n"); + if (status != PAL_STATUS_SUCCESS){ + panic_domain(vcpu_regs(v),"Save vp status failed\n"); + } /* Need to save KR when domain switch, though HV itself doesn;t @@ -244,8 +246,9 @@ vmx_load_state(struct vcpu *v) u64 status; status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0); - if (status != PAL_STATUS_SUCCESS) - panic("Restore vp status failed\n"); + if (status != PAL_STATUS_SUCCESS){ + panic_domain(vcpu_regs(v),"Restore vp status failed\n"); + } ia64_set_kr(0, v->arch.arch_vmx.vkr[0]); ia64_set_kr(1, v->arch.arch_vmx.vkr[1]); @@ -343,17 +346,18 @@ int vmx_build_physmap_table(struct domai for (j = io_ranges[i].start; j < io_ranges[i].start + io_ranges[i].size; j += PAGE_SIZE) - assign_domain_page(d, j, io_ranges[i].type); + __assign_domain_page(d, j, io_ranges[i].type); } /* Map normal memory below 3G */ end = VMX_CONFIG_PAGES(d) << PAGE_SHIFT; tmp = end < MMIO_START ? end : MMIO_START; for (i = 0; (i < tmp) && (list_ent != &d->page_list); i += PAGE_SIZE) { - mfn = page_to_mfn(list_entry( - list_ent, struct page_info, list)); + mfn = page_to_mfn(list_entry(list_ent, struct page_info, list)); + list_ent = mfn_to_page(mfn)->list.next; + if (VGA_IO_START <= i && i < VGA_IO_START + VGA_IO_SIZE) + continue; assign_domain_page(d, i, mfn << PAGE_SHIFT); - list_ent = mfn_to_page(mfn)->list.next; } ASSERT(list_ent != &d->page_list); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_interrupt.c --- a/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 14:30:34 2006 -0500 @@ -91,8 +91,12 @@ inject_guest_interruption(VCPU *vcpu, u6 { u64 viva; REGS *regs; + ISR pt_isr; regs=vcpu_regs(vcpu); - + // clear cr.isr.ri + pt_isr.val = VMX(vcpu,cr_isr); + pt_isr.ir = 0; + VMX(vcpu,cr_isr) = pt_isr.val; collect_interruption(vcpu); vmx_vcpu_get_iva(vcpu,&viva); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_ivt.S --- a/xen/arch/ia64/vmx/vmx_ivt.S Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Tue May 30 14:30:34 2006 -0500 @@ -143,35 +143,62 @@ ENTRY(vmx_itlb_miss) thash r17 = r16 ;; ttag r20 = r16 + mov r18 = r17 ;; vmx_itlb_loop: cmp.eq p6,p0 = r0, r17 -(p6) br vmx_itlb_out - ;; - adds r22 = VLE_TITAG_OFFSET, r17 - adds r23 = VLE_CCHAIN_OFFSET, r17 - ;; - ld8 r24 = [r22] - ld8 r25 = [r23] - ;; - lfetch [r25] - cmp.eq p6,p7 = r20, r24 - ;; -(p7) mov r17 = r25; -(p7) br.sptk vmx_itlb_loop +(p6)br vmx_itlb_out + ;; + adds r16 = VLE_TITAG_OFFSET, r17 + adds r19 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r22 = [r16] + ld8 r23 = [r19] + ;; + lfetch [r23] + cmp.eq p6,p7 = r20, r22 + ;; +(p7)mov r17 = r23; +(p7)br.sptk vmx_itlb_loop ;; adds r23 = VLE_PGFLAGS_OFFSET, r17 adds r24 = VLE_ITIR_OFFSET, r17 ;; - ld8 r26 = [r23] - ld8 r25 = [r24] - ;; - mov cr.itir = r25 - ;; - itc.i r26 + ld8 r25 = [r23] + ld8 r26 = [r24] + ;; + cmp.eq p6,p7=r18,r17 +(p6) br vmx_itlb_loop1 + ;; + ld8 r27 = [r18] + ;; + extr.u r19 = r27, 56, 8 + extr.u r20 = r25, 56, 8 + ;; + dep r27 = r20, r27, 56, 8 + dep r25 = r19, r25, 56, 8 + ;; + st8 [r18] = r25,8 + st8 [r23] = r27 + ;; + ld8 r28 = [r18] + ;; + st8 [r18] = r26,8 + st8 [r24] = r28 + ;; + ld8 r30 = [r18] + ;; + st8 [r18] = r22 + st8 [r16] = r30 + ;; +vmx_itlb_loop1: + mov cr.itir = r26 + ;; + itc.i r25 ;; srlz.i ;; + mov r17=cr.isr mov r23=r31 mov r22=b0 adds r16=IA64_VPD_BASE_OFFSET,r21 @@ -201,42 +228,68 @@ ENTRY(vmx_dtlb_miss) mov r29=cr.ipsr; ;; tbit.z p6,p7=r29,IA64_PSR_VM_BIT; - (p6)br.sptk vmx_alt_dtlb_miss_1 -//(p6)br.sptk vmx_fault_2 +(p6)br.sptk vmx_alt_dtlb_miss_1 mov r16 = cr.ifa ;; thash r17 = r16 ;; ttag r20 = r16 + mov r18 = r17 ;; vmx_dtlb_loop: cmp.eq p6,p0 = r0, r17 (p6)br vmx_dtlb_out ;; - adds r22 = VLE_TITAG_OFFSET, r17 - adds r23 = VLE_CCHAIN_OFFSET, r17 - ;; - ld8 r24 = [r22] - ld8 r25 = [r23] - ;; - lfetch [r25] - cmp.eq p6,p7 = r20, r24 - ;; -(p7)mov r17 = r25; + adds r16 = VLE_TITAG_OFFSET, r17 + adds r19 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r22 = [r16] + ld8 r23 = [r19] + ;; + lfetch [r23] + cmp.eq p6,p7 = r20, r22 + ;; +(p7)mov r17 = r23; (p7)br.sptk vmx_dtlb_loop ;; adds r23 = VLE_PGFLAGS_OFFSET, r17 adds r24 = VLE_ITIR_OFFSET, r17 ;; - ld8 r26 = [r23] - ld8 r25 = [r24] - ;; - mov cr.itir = r25 - ;; - itc.d r26 + ld8 r25 = [r23] + ld8 r26 = [r24] + ;; + cmp.eq p6,p7=r18,r17 +(p6) br vmx_dtlb_loop1 + ;; + ld8 r27 = [r18] + ;; + extr.u r19 = r27, 56, 8 + extr.u r20 = r25, 56, 8 + ;; + dep r27 = r20, r27, 56, 8 + dep r25 = r19, r25, 56, 8 + ;; + st8 [r18] = r25,8 + st8 [r23] = r27 + ;; + ld8 r28 = [r18] + ;; + st8 [r18] = r26,8 + st8 [r24] = r28 + ;; + ld8 r30 = [r18] + ;; + st8 [r18] = r22 + st8 [r16] = r30 + ;; +vmx_dtlb_loop1: + mov cr.itir = r26 + ;; + itc.d r25 ;; srlz.d; ;; + mov r17=cr.isr mov r23=r31 mov r22=b0 adds r16=IA64_VPD_BASE_OFFSET,r21 diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Tue May 30 14:30:34 2006 -0500 @@ -186,8 +186,10 @@ vmx_load_all_rr(VCPU *vcpu) * mode in same region */ if (is_physical_mode(vcpu)) { - if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic("Unexpected domain switch in phy emul\n"); + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL){ + panic_domain(vcpu_regs(vcpu), + "Unexpected domain switch in phy emul\n"); + } phy_rr.rrval = vcpu->arch.metaphysical_rr0; //phy_rr.ps = PAGE_SHIFT; phy_rr.ve = 1; @@ -322,8 +324,7 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_ break; default: /* Sanity check */ - printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val); - panic("Unexpected virtual <--> physical mode transition"); + panic_domain(vcpu_regs(vcpu),"Unexpected virtual <--> physical mode transition,old:%lx,new:%lx\n",old_psr.val,new_psr.val); break; } return; diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_process.c Tue May 30 14:30:34 2006 -0500 @@ -338,7 +338,7 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r } if(vec == 1) type = ISIDE_TLB; else if(vec == 2) type = DSIDE_TLB; - else panic("wrong vec\n"); + else panic_domain(regs,"wrong vec:%0xlx\n",vec); // prepare_if_physical_mode(v); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_support.c --- a/xen/arch/ia64/vmx/vmx_support.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_support.c Tue May 30 14:30:34 2006 -0500 @@ -92,12 +92,12 @@ void vmx_io_assist(struct vcpu *v) */ vio = get_vio(v->domain, v->vcpu_id); if (!vio) - panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); + panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", (unsigned long)vio); p = &vio->vp_ioreq; if (p->state == STATE_IORESP_HOOK) - panic("Not supported: No hook available for DM request\n"); + panic_domain(vcpu_regs(v),"Not supported: No hook available for DM request\n"); if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { if (p->state != STATE_IORESP_READY) { @@ -135,7 +135,7 @@ void vmx_intr_assist(struct vcpu *v) * out of vmx_wait_io, when guest is still waiting for response. */ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) - panic("!!!Bad resume to guest before I/O emulation is done.\n"); + panic_domain(vcpu_regs(v),"!!!Bad resume to guest before I/O emulation is done.\n"); /* Clear indicator specific to interrupt delivered from DM */ if (test_and_clear_bit(port, @@ -154,7 +154,7 @@ void vmx_intr_assist(struct vcpu *v) */ vio = get_vio(v->domain, v->vcpu_id); if (!vio) - panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); + panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", (unsigned long)vio); #ifdef V_IOSAPIC_READY /* Confirm virtual interrupt line signals, and set pending bits in vpd */ diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_vcpu.c --- a/xen/arch/ia64/vmx/vmx_vcpu.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c Tue May 30 14:30:34 2006 -0500 @@ -91,7 +91,7 @@ vmx_vcpu_set_psr(VCPU *vcpu, unsigned lo * Otherwise panic */ if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) { - panic ("Setting unsupport guest psr!"); + panic_domain (regs,"Setting unsupport guest psr!"); } /* @@ -206,7 +206,7 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI vcpu_get_rr(vcpu, reg, &oldrr.rrval); newrr.rrval=val; if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits)) - panic_domain (NULL, "use of invalid rid %lx\n", newrr.rid); + panic_domain (NULL, "use of invalid rid %x\n", newrr.rid); if(oldrr.ps!=newrr.ps){ thash_purge_all(vcpu); } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_virt.c --- a/xen/arch/ia64/vmx/vmx_virt.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vmx_virt.c Tue May 30 14:30:34 2006 -0500 @@ -182,8 +182,9 @@ IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst) { UINT64 val; + if(vcpu_get_gr_nat(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT) - panic(" get_psr nat bit fault\n"); + panic_domain(vcpu_regs(vcpu),"get_psr nat bit fault\n"); val = (val & MASK(0, 32)) | (VCPU(vcpu, vpsr) & MASK(32, 32)); #if 0 @@ -216,7 +217,7 @@ IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST6 regs=vcpu_regs(vcpu); vpsr.val=regs->cr_ipsr; if ( vpsr.is == 1 ) { - panic ("We do not support IA32 instruction yet"); + panic_domain(regs,"We do not support IA32 instruction yet"); } return vmx_vcpu_rfi(vcpu); @@ -715,8 +716,9 @@ IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *v { // I27 and M30 are identical for these fields UINT64 imm; + if(inst.M30.ar3!=44){ - panic("Can't support ar register other than itc"); + panic_domain(vcpu_regs(vcpu),"Can't support ar register other than itc"); } #ifdef CHECK_FAULT IA64_PSR vpsr; @@ -741,7 +743,7 @@ IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *v // I26 and M29 are identical for these fields u64 r2; if(inst.M29.ar3!=44){ - panic("Can't support ar register other than itc"); + panic_domain(vcpu_regs(vcpu),"Can't support ar register other than itc"); } if(vcpu_get_gr_nat(vcpu,inst.M29.r2,&r2)){ #ifdef CHECK_FAULT @@ -769,7 +771,7 @@ IA64FAULT vmx_emul_mov_from_ar_reg(VCPU // I27 and M30 are identical for these fields u64 r1; if(inst.M31.ar3!=44){ - panic("Can't support ar register other than itc"); + panic_domain(vcpu_regs(vcpu),"Can't support ar register other than itc"); } #ifdef CHECK_FAULT if(check_target_register(vcpu,inst.M31.r1)){ @@ -1359,8 +1361,7 @@ if ( (cause == 0xff && opcode == 0x1e000 slot_type = slot_types[bundle.template][slot]; ia64_priv_decoder(slot_type, inst, &cause); if(cause==0){ - printf("This instruction at 0x%lx slot %d can't be virtualized", iip, slot); - panic("123456\n"); + panic_domain(regs,"This instruction at 0x%lx slot %d can't be virtualized", iip, slot); } #else inst.inst=opcode; @@ -1494,12 +1495,8 @@ if ( (cause == 0xff && opcode == 0x1e000 status=IA64_FAULT; break; default: - printf("unknown cause %ld, iip: %lx, ipsr: %lx\n", cause,regs->cr_iip,regs->cr_ipsr); - while(1); - /* For unknown cause, let hardware to re-execute */ - status=IA64_RETRY; - break; -// panic("unknown cause in virtualization intercept"); + panic_domain(regs,"unknown cause %ld, iip: %lx, ipsr: %lx\n", cause,regs->cr_iip,regs->cr_ipsr); + break; }; #if 0 diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/vmx/vtlb.c Tue May 30 14:30:34 2006 -0500 @@ -274,36 +274,36 @@ static void vtlb_purge(thash_cb_t *hcb, static void vtlb_purge(thash_cb_t *hcb, u64 va, u64 ps) { thash_data_t *hash_table, *prev, *next; - u64 start, end, size, tag, rid; + u64 start, end, size, tag, rid, def_size; ia64_rr vrr; vcpu_get_rr(current, va, &vrr.rrval); rid = vrr.rid; size = PSIZE(ps); start = va & (-size); end = start + size; + def_size = PSIZE(vrr.ps); while(start < end){ hash_table = vsa_thash(hcb->pta, start, vrr.rrval, &tag); -// tag = ia64_ttag(start); if(!INVALID_TLB(hash_table)){ - if(hash_table->etag == tag){ - __rem_hash_head(hcb, hash_table); - } - else{ - prev=hash_table; - next=prev->next; - while(next){ - if(next->etag == tag){ - prev->next=next->next; - cch_free(hcb,next); - hash_table->len--; - break; - } - prev=next; - next=next->next; - } - } - } - start += PAGE_SIZE; + if(hash_table->etag == tag){ + __rem_hash_head(hcb, hash_table); + } + else{ + prev=hash_table; + next=prev->next; + while(next){ + if(next->etag == tag){ + prev->next=next->next; + cch_free(hcb,next); + hash_table->len--; + break; + } + prev=next; + next=next->next; + } + } + } + start += def_size; } // machine_tlb_purge(va, ps); } @@ -319,26 +319,26 @@ static void vhpt_purge(thash_cb_t *hcb, start = va & (-size); end = start + size; while(start < end){ - hash_table = (thash_data_t *)ia64_thash(start); - tag = ia64_ttag(start); - if(hash_table->etag == tag ){ + hash_table = (thash_data_t *)ia64_thash(start); + tag = ia64_ttag(start); + if(hash_table->etag == tag ){ __rem_hash_head(hcb, hash_table); - } - else{ - prev=hash_table; - next=prev->next; - while(next){ - if(next->etag == tag){ - prev->next=next->next; - cch_free(hcb,next); - hash_table->len--; - break; - } - prev=next; - next=next->next; - } - } - start += PAGE_SIZE; + } + else{ + prev=hash_table; + next=prev->next; + while(next){ + if(next->etag == tag){ + prev->next=next->next; + cch_free(hcb,next); + hash_table->len--; + break; + } + prev=next; + next=next->next; + } + } + start += PAGE_SIZE; } machine_tlb_purge(va, ps); } @@ -390,9 +390,9 @@ void vtlb_insert(thash_cb_t *hcb, u64 pt vcpu_get_rr(current, va, &vrr.rrval); if (vrr.ps != ps) { // machine_tlb_insert(hcb->vcpu, entry); - panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d ps=%d\n", - va, vrr.ps, ps); - return; + panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d ps=%ld\n", + va, vrr.ps, ps); + return; } hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag); if( INVALID_TLB(hash_table) ) { diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/Makefile Tue May 30 14:30:34 2006 -0500 @@ -2,6 +2,7 @@ obj-y += dom0_ops.o obj-y += dom0_ops.o obj-y += domain.o obj-y += dom_fw.o +obj-y += efi_emul.o obj-y += hpsimserial.o obj-y += hypercall.o obj-y += hyperprivop.o diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/dom0_ops.c Tue May 30 14:30:34 2006 -0500 @@ -151,10 +151,7 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ put_domain(d); } break; - /* - * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 - - * it actually allocates and maps pages. - */ + case DOM0_GETMEMLIST: { unsigned long i = 0; @@ -198,7 +195,8 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ ret = -ENOMEM; op->u.getmemlist.num_pfns = i - start_page; - copy_to_guest(u_dom0_op, op, 1); + if (copy_to_guest(u_dom0_op, op, 1)) + ret = -EFAULT; put_domain(d); } @@ -264,10 +262,6 @@ do_dom0vp_op(unsigned long cmd, } ret = get_gpfn_from_mfn(arg0); break; - case IA64_DOM0VP_populate_physmap: - ret = dom0vp_populate_physmap(d, arg0, - (unsigned int)arg1, (unsigned int)arg2); - break; case IA64_DOM0VP_zap_physmap: ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1); break; diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/dom_fw.c Tue May 30 14:30:34 2006 -0500 @@ -462,7 +462,7 @@ static void print_md(efi_memory_desc_t * static void print_md(efi_memory_desc_t *md) { #if 1 - printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + printk("domain mem: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) (%luMB)\n", md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), md->num_pages >> (20 - EFI_PAGE_SHIFT)); @@ -541,7 +541,7 @@ struct fake_acpi_tables { struct fadt_descriptor_rev2 fadt; struct facs_descriptor_rev2 facs; struct acpi_table_header dsdt; - u8 aml[16]; + u8 aml[8 + 11 * MAX_VIRT_CPUS]; struct acpi_table_madt madt; struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS]; u8 pm1a_evt_blk[4]; @@ -561,6 +561,7 @@ dom_fw_fake_acpi(struct domain *d, struc struct acpi_table_madt *madt = &tables->madt; struct acpi_table_lsapic *lsapic = tables->lsapic; int i; + int aml_len; memset(tables, 0, sizeof(struct fake_acpi_tables)); @@ -629,7 +630,6 @@ dom_fw_fake_acpi(struct domain *d, struc /* setup DSDT with trivial namespace. */ strncpy(dsdt->signature, DSDT_SIG, 4); dsdt->revision = 1; - dsdt->length = sizeof(struct acpi_table_header) + sizeof(tables->aml); strcpy(dsdt->oem_id, "XEN"); strcpy(dsdt->oem_table_id, "Xen/ia64"); strcpy(dsdt->asl_compiler_id, "XEN"); @@ -637,15 +637,33 @@ dom_fw_fake_acpi(struct domain *d, struc /* Trivial namespace, avoids ACPI CA complaints */ tables->aml[0] = 0x10; /* Scope */ - tables->aml[1] = 0x12; /* length/offset to next object */ - strncpy((char *)&tables->aml[2], "_SB_", 4); + tables->aml[1] = 0x40; /* length/offset to next object (patched) */ + tables->aml[2] = 0x00; + strncpy((char *)&tables->aml[3], "_SB_", 4); /* The processor object isn't absolutely necessary, revist for SMP */ - tables->aml[6] = 0x5b; /* processor object */ - tables->aml[7] = 0x83; - tables->aml[8] = 0x0b; /* next */ - strncpy((char *)&tables->aml[9], "CPU0", 4); - + aml_len = 7; + for (i = 0; i < 3; i++) { + unsigned char *p = tables->aml + aml_len; + p[0] = 0x5b; /* processor object */ + p[1] = 0x83; + p[2] = 0x0b; /* next */ + p[3] = 'C'; + p[4] = 'P'; + snprintf ((char *)p + 5, 3, "%02x", i); + if (i < 16) + p[5] = 'U'; + p[7] = i; /* acpi_id */ + p[8] = 0; /* pblk_addr */ + p[9] = 0; + p[10] = 0; + p[11] = 0; + p[12] = 0; /* pblk_len */ + aml_len += 13; + } + tables->aml[1] = 0x40 + ((aml_len - 1) & 0x0f); + tables->aml[2] = (aml_len - 1) >> 4; + dsdt->length = sizeof(struct acpi_table_header) + aml_len; dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length); /* setup MADT */ @@ -662,6 +680,7 @@ dom_fw_fake_acpi(struct domain *d, struc for (i = 0; i < MAX_VIRT_CPUS; i++) { lsapic[i].header.type = ACPI_MADT_LSAPIC; lsapic[i].header.length = sizeof(struct acpi_table_lsapic); + lsapic[i].acpi_id = i; lsapic[i].id = i; lsapic[i].eid = 0; lsapic[i].flags.enabled = (d->vcpu[i] != NULL); @@ -798,6 +817,9 @@ dom_fw_init (struct domain *d, const cha pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn); cmd_line = (void *) cp; + /* Initialise for EFI_SET_VIRTUAL_ADDRESS_MAP emulation */ + d->arch.efi_runtime = efi_runtime; + if (args) { if (arglen >= 1024) arglen = 1023; @@ -959,7 +981,7 @@ dom_fw_init (struct domain *d, const cha MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX #endif /* hypercall patches live here, masquerade as reserved PAL memory */ - MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0); + MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END, 0); MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE, 0);//XXX make sure this doesn't overlap on i/o, runtime area. #ifndef CONFIG_XEN_IA64_DOM0_VP /* hack */ MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1); @@ -993,7 +1015,7 @@ dom_fw_init (struct domain *d, const cha MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1); #endif /* hypercall patches live here, masquerade as reserved PAL memory */ - MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1); + MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END, 1); MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1); /* Create a dummy entry for IO ports, so that IO accesses are trapped by Xen. */ @@ -1009,7 +1031,7 @@ dom_fw_init (struct domain *d, const cha BUG_ON(i > NUM_MEM_DESCS); bp->efi_memmap_size = i * sizeof(efi_memory_desc_t); bp->efi_memdesc_size = sizeof(efi_memory_desc_t); - bp->efi_memdesc_version = 1; + bp->efi_memdesc_version = EFI_MEMDESC_VERSION; bp->command_line = dom_pa((unsigned long) cmd_line); bp->console_info.num_cols = 80; bp->console_info.num_rows = 25; @@ -1019,7 +1041,8 @@ dom_fw_init (struct domain *d, const cha if (d == dom0) { // XXX CONFIG_XEN_IA64_DOM0_VP // initrd_start address is hard coded in start_kernel() - bp->initrd_start = ia64_boot_param->initrd_start; + bp->initrd_start = (dom0_start+dom0_size) - + (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024); bp->initrd_size = ia64_boot_param->initrd_size; } else { diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/domain.c Tue May 30 14:30:34 2006 -0500 @@ -77,36 +77,19 @@ static void init_switch_stack(struct vcp static void init_switch_stack(struct vcpu *v); void build_physmap_table(struct domain *d); +static void try_to_clear_PGC_allocate(struct domain* d, + struct page_info* page); + /* this belongs in include/asm, but there doesn't seem to be a suitable place */ void arch_domain_destroy(struct domain *d) { - struct page_info *page; - struct list_head *ent, *prev; - - if (d->arch.mm->pgd != NULL) - { - list_for_each ( ent, &d->arch.mm->pt_list ) - { - page = list_entry(ent, struct page_info, list); - prev = ent->prev; - list_del(ent); - free_xenheap_page(page_to_virt(page)); - ent = prev; - } - pgd_free(d->arch.mm->pgd); - } - if (d->arch.mm != NULL) - xfree(d->arch.mm); + BUG_ON(d->arch.mm.pgd != NULL); if (d->shared_info != NULL) free_xenheap_page(d->shared_info); + domain_flush_destroy (d); + deallocate_rid_range(d); - - /* It is really good in this? */ - flush_tlb_all(); - - /* It is really good in this? */ - vhpt_flush_all(); } static void default_idle(void) @@ -179,7 +162,6 @@ struct vcpu *alloc_vcpu_struct(struct do memset(&d->shared_info->evtchn_mask[0], 0xff, sizeof(d->shared_info->evtchn_mask)); - v->vcpu_info = &(d->shared_info->vcpu_info[0]); v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0; v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4; v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0; @@ -239,7 +221,8 @@ int arch_domain_create(struct domain *d) // the following will eventually need to be negotiated dynamically d->xen_vastart = XEN_START_ADDR; d->xen_vaend = XEN_END_ADDR; - d->shared_info_va = SHAREDINFO_ADDR; + d->arch.shared_info_va = SHAREDINFO_ADDR; + d->arch.breakimm = 0x1000; if (is_idle_domain(d)) return 0; @@ -255,26 +238,20 @@ int arch_domain_create(struct domain *d) */ if (!allocate_rid_range(d,0)) goto fail_nomem; - d->arch.breakimm = 0x1000; d->arch.sys_pgnr = 0; - if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL) - goto fail_nomem; - memset(d->arch.mm, 0, sizeof(*d->arch.mm)); - INIT_LIST_HEAD(&d->arch.mm->pt_list); + memset(&d->arch.mm, 0, sizeof(d->arch.mm)); d->arch.physmap_built = 0; - if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL) + if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL) goto fail_nomem; printf ("arch_domain_create: domain=%p\n", d); return 0; fail_nomem: - if (d->arch.mm->pgd != NULL) - pgd_free(d->arch.mm->pgd); - if (d->arch.mm != NULL) - xfree(d->arch.mm); + if (d->arch.mm.pgd != NULL) + pgd_free(d->arch.mm.pgd); if (d->shared_info != NULL) free_xenheap_page(d->shared_info); return -ENOMEM; @@ -282,11 +259,7 @@ fail_nomem: void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) { - struct pt_regs *regs = vcpu_regs (v); - - c->regs = *regs; - c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector; - + c->regs = *vcpu_regs (v); c->shared = v->domain->shared_info->arch; } @@ -325,11 +298,10 @@ int arch_set_info_guest(struct vcpu *v, } new_thread(v, regs->cr_iip, 0, 0); - v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector; - if ( c->vcpu.privregs && copy_from_user(v->arch.privregs, - c->vcpu.privregs, sizeof(mapped_regs_t))) { + if ( c->privregs && copy_from_user(v->arch.privregs, + c->privregs, sizeof(mapped_regs_t))) { printk("Bad ctxt address in arch_set_info_guest: %p\n", - c->vcpu.privregs); + c->privregs); return -EFAULT; } @@ -394,19 +366,129 @@ static void relinquish_memory(struct dom /* Follow the list chain and /then/ potentially free the page. */ ent = ent->next; +#ifdef CONFIG_XEN_IA64_DOM0_VP +#if 1 + BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY); +#else + //XXX this should be done at traversing the P2M table. + if (page_get_owner(page) == d) + set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); +#endif +#endif put_page(page); } spin_unlock_recursive(&d->page_alloc_lock); } +static void +relinquish_pte(struct domain* d, pte_t* pte) +{ + unsigned long mfn = pte_pfn(*pte); + struct page_info* page; + + // vmx domain use bit[58:56] to distinguish io region from memory. + // see vmx_build_physmap_table() in vmx_init.c + if (((mfn << PAGE_SHIFT) & GPFN_IO_MASK) != GPFN_MEM) + return; + + // domain might map IO space or acpi table pages. check it. + if (!mfn_valid(mfn)) + return; + page = mfn_to_page(mfn); + // struct page_info corresponding to mfn may exist or not depending + // on CONFIG_VIRTUAL_FRAME_TABLE. + // This check is too easy. + // The right way is to check whether this page is of io area or acpi pages + if (page_get_owner(page) == NULL) { + BUG_ON(page->count_info != 0); + return; + } + +#ifdef CONFIG_XEN_IA64_DOM0_VP + if (page_get_owner(page) == d) { + BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + } +#endif + try_to_clear_PGC_allocate(d, page); + put_page(page); +} + +static void +relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset) +{ + unsigned long i; + pte_t* pte = pte_offset_map(pmd, offset); + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + if (!pte_present(*pte)) + continue; + + relinquish_pte(d, pte); + } + pte_free_kernel(pte_offset_map(pmd, offset)); +} + +static void +relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset) +{ + unsigned long i; + pmd_t *pmd = pmd_offset(pud, offset); + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + if (!pmd_present(*pmd)) + continue; + + relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT)); + } + pmd_free(pmd_offset(pud, offset)); +} + +static void +relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset) +{ + unsigned long i; + pud_t *pud = pud_offset(pgd, offset); + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + if (!pud_present(*pud)) + continue; + + relinquish_pud(d, pud, offset + (i << PUD_SHIFT)); + } + pud_free(pud_offset(pgd, offset)); +} + +static void +relinquish_mm(struct domain* d) +{ + struct mm_struct* mm = &d->arch.mm; + unsigned long i; + pgd_t* pgd; + + if (mm->pgd == NULL) + return; + + pgd = pgd_offset(mm, 0); + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + if (!pgd_present(*pgd)) + continue; + + relinquish_pgd(d, pgd, i << PGDIR_SHIFT); + } + pgd_free(mm->pgd); + mm->pgd = NULL; +} + void domain_relinquish_resources(struct domain *d) { /* Relinquish every page of memory. */ - /* xenheap_list is not used in ia64. */ - BUG_ON(!list_empty(&d->xenpage_list)); - + // relase page traversing d->arch.mm. + relinquish_mm(d); + + relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); } @@ -483,11 +565,58 @@ void new_thread(struct vcpu *v, } } +// stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c +void +share_xen_page_with_guest(struct page_info *page, + struct domain *d, int readonly) +{ + if ( page_get_owner(page) == d ) + return; + +#if 1 + if (readonly) { + printk("%s:%d readonly is not supported yet\n", __func__, __LINE__); + } +#endif + + // alloc_xenheap_pages() doesn't initialize page owner. + //BUG_ON(page_get_owner(page) != NULL); +#if 0 + if (get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY) { + printk("%s:%d page 0x%p mfn 0x%lx gpfn 0x%lx\n", __func__, __LINE__, + page, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page))); + } +#endif + // grant_table_destroy() release these pages. + // but it doesn't clear m2p entry. So there might remain stale entry. + // We clear such a stale entry here. + set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); + + spin_lock(&d->page_alloc_lock); + +#ifndef __ia64__ + /* The incremented type count pins as writable or read-only. */ + page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); + page->u.inuse.type_info |= PGT_validated | 1; +#endif + + page_set_owner(page, d); + wmb(); /* install valid domain ptr before updating refcnt. */ + ASSERT(page->count_info == 0); + page->count_info |= PGC_allocated | 1; + + if ( unlikely(d->xenheap_pages++ == 0) ) + get_knownalive_domain(d); + list_add_tail(&page->list, &d->xenpage_list); + + spin_unlock(&d->page_alloc_lock); +} + +//XXX !xxx_present() should be used instread of !xxx_none()? static pte_t* lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr) { - struct page_info *pt; - struct mm_struct *mm = d->arch.mm; + struct mm_struct *mm = &d->arch.mm; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -496,22 +625,16 @@ lookup_alloc_domain_pte(struct domain* d pgd = pgd_offset(mm, mpaddr); if (pgd_none(*pgd)) { pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); - pt = maddr_to_page(pgd_val(*pgd)); - list_add_tail(&pt->list, &d->arch.mm->pt_list); } pud = pud_offset(pgd, mpaddr); if (pud_none(*pud)) { pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); - pt = maddr_to_page(pud_val(*pud)); - list_add_tail(&pt->list, &d->arch.mm->pt_list); } pmd = pmd_offset(pud, mpaddr); if (pmd_none(*pmd)) { pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr)); - pt = maddr_to_page(pmd_val(*pmd)); - list_add_tail(&pt->list, &d->arch.mm->pt_list); } return pte_offset_map(pmd, mpaddr); @@ -521,7 +644,7 @@ static pte_t* static pte_t* lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr) { - struct mm_struct *mm = d->arch.mm; + struct mm_struct *mm = &d->arch.mm; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -549,7 +672,7 @@ static pte_t* static pte_t* lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr) { - struct mm_struct *mm = d->arch.mm; + struct mm_struct *mm = &d->arch.mm; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -581,6 +704,7 @@ __assign_new_domain_page(struct domain * { struct page_info *p = NULL; unsigned long maddr; + int ret; BUG_ON(!pte_none(*pte)); @@ -601,14 +725,13 @@ __assign_new_domain_page(struct domain * #endif p = alloc_domheap_page(d); - // zero out pages for security reasons - if (p) - clear_page(page_to_virt(p)); - if (unlikely(!p)) { printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n"); return(p); } + + // zero out pages for security reasons + clear_page(page_to_virt(p)); maddr = page_to_maddr (p); if (unlikely(maddr > __get_cpu_var(vhpt_paddr) && maddr < __get_cpu_var(vhpt_pend))) { @@ -618,13 +741,15 @@ __assign_new_domain_page(struct domain * maddr); } + ret = get_page(p, d); + BUG_ON(ret == 0); set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); + mb (); //XXX CONFIG_XEN_IA64_DOM0_VP // TODO racy - if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM) - set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT); + set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT); return p; } @@ -668,21 +793,38 @@ assign_new_domain0_page(struct domain *d } /* map a physical address to the specified metaphysical addr */ -void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr) -{ - pte_t *pte; - - pte = lookup_alloc_domain_pte(d, mpaddr); - if (pte_none(*pte)) { - set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); - - //XXX CONFIG_XEN_IA64_DOM0_VP - // TODO racy - if ((physaddr & GPFN_IO_MASK) == GPFN_MEM) - set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT); - } - else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr); +void +__assign_domain_page(struct domain *d, + unsigned long mpaddr, unsigned long physaddr) +{ + pte_t *pte; + + pte = lookup_alloc_domain_pte(d, mpaddr); + if (pte_none(*pte)) { + set_pte(pte, + pfn_pte(physaddr >> PAGE_SHIFT, + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); + mb (); + } else + printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr); +} + +/* get_page() and map a physical address to the specified metaphysical addr */ +void +assign_domain_page(struct domain *d, + unsigned long mpaddr, unsigned long physaddr) +{ + struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT); + int ret; + + BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM); + ret = get_page(page, d); + BUG_ON(ret == 0); + __assign_domain_page(d, mpaddr, physaddr); + + //XXX CONFIG_XEN_IA64_DOM0_VP + // TODO racy + set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT); } #ifdef CONFIG_XEN_IA64_DOM0_VP @@ -693,8 +835,58 @@ assign_domain_same_page(struct domain *d //XXX optimization unsigned long end = mpaddr + size; for (; mpaddr < end; mpaddr += PAGE_SIZE) { - assign_domain_page(d, mpaddr, mpaddr); - } + __assign_domain_page(d, mpaddr, mpaddr); + } +} + +static int +efi_mmio(unsigned long physaddr, unsigned long size) +{ + void *efi_map_start, *efi_map_end; + u64 efi_desc_size; + void* p; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + efi_memory_desc_t* md = (efi_memory_desc_t *)p; + unsigned long start = md->phys_addr; + unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (start <= physaddr && physaddr < end) { + if ((physaddr + size) > end) { + DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n", + __func__, __LINE__, physaddr, size); + return 0; + } + + // for io space + if (md->type == EFI_MEMORY_MAPPED_IO || + md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + return 1; + } + + // for runtime + // see efi_enter_virtual_mode(void) + // in linux/arch/ia64/kernel/efi.c + if ((md->attribute & EFI_MEMORY_RUNTIME) && + !(md->attribute & EFI_MEMORY_WB)) { + return 1; + } + + DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n", + __func__, __LINE__, physaddr, size); + return 0; + } + + if (physaddr < start) { + break; + } + } + + return 1; } unsigned long @@ -704,6 +896,11 @@ assign_domain_mmio_page(struct domain *d if (size == 0) { DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n", __func__, d, mpaddr, size); + } + if (!efi_mmio(mpaddr, size)) { + DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n", + __func__, __LINE__, d, mpaddr, size); + return -EINVAL; } assign_domain_same_page(d, mpaddr, size); return mpaddr; @@ -723,23 +920,55 @@ domain_page_flush(struct domain* d, unsi domain_page_flush(struct domain* d, unsigned long mpaddr, unsigned long old_mfn, unsigned long new_mfn) { - struct vcpu* v; - //XXX SMP - for_each_vcpu(d, v) { - vcpu_purge_tr_entry(&v->arch.dtlb); - vcpu_purge_tr_entry(&v->arch.itlb); - } - - // flush vhpt - vhpt_flush(); - // flush tlb - flush_tlb_all(); -} - + domain_flush_vtlb_all(); +} +#endif + +//XXX heavily depends on the struct page_info layout. +// +// if (page_get_owner(page) == d && +// test_and_clear_bit(_PGC_allocated, &page->count_info)) { +// put_page(page); +// } static void -zap_domain_page_one(struct domain *d, unsigned long mpaddr) -{ - struct mm_struct *mm = d->arch.mm; +try_to_clear_PGC_allocate(struct domain* d, struct page_info* page) +{ + u32 _d, _nd; + u64 x, nx, y; + + _d = pickle_domptr(d); + y = *((u64*)&page->count_info); + do { + x = y; + _nd = x >> 32; + nx = x - 1; + __clear_bit(_PGC_allocated, &nx); + + if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) { + struct domain* nd = unpickle_domptr(_nd); + if (nd == NULL) { + DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, " + "sd=%p 0x%x," + " caf=%016lx, taf=%" PRtype_info "\n", + (void *) page_to_mfn(page), + d, d->domain_id, _d, + nd, _nd, + x, + page->u.inuse.type_info); + } + break; + } + + BUG_ON((nx & PGC_count_mask) < 1); + y = cmpxchg((u64*)&page->count_info, x, nx); + } while (unlikely(y != x)); +} + +#ifdef CONFIG_XEN_IA64_DOM0_VP +static void +zap_domain_page_one(struct domain *d, unsigned long mpaddr, int do_put_page) +{ + struct mm_struct *mm = &d->arch.mm; pte_t *pte; pte_t old_pte; unsigned long mfn; @@ -755,6 +984,7 @@ zap_domain_page_one(struct domain *d, un old_pte = ptep_get_and_clear(mm, mpaddr, pte); mfn = pte_pfn(old_pte); page = mfn_to_page(mfn); + BUG_ON((page->count_info & PGC_count_mask) == 0); if (page_get_owner(page) == d) { BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT)); @@ -763,7 +993,10 @@ zap_domain_page_one(struct domain *d, un domain_page_flush(d, mpaddr, mfn, INVALID_MFN); - put_page(page); + if (do_put_page) { + try_to_clear_PGC_allocate(d, page); + put_page(page); + } } #endif @@ -867,66 +1100,6 @@ unsigned long lookup_domain_mpa(struct d #ifdef CONFIG_XEN_IA64_DOM0_VP //XXX SMP unsigned long -dom0vp_populate_physmap(struct domain *d, unsigned long gpfn, - unsigned int extent_order, unsigned int address_bits) -{ - unsigned long ret = 0; - int flags = 0; - unsigned long mpaddr = gpfn << PAGE_SHIFT; - unsigned long extent_size = 1UL << extent_order; - unsigned long offset; - struct page_info* page; - unsigned long physaddr; - - if (extent_order > 0 && !multipage_allocation_permitted(d)) { - ret = -EINVAL; - goto out; - } - - if (gpfn + (1 << extent_order) < gpfn) { - ret = -EINVAL; - goto out; - } - if (gpfn > d->max_pages || gpfn + (1 << extent_order) > d->max_pages) { - ret = -EINVAL; - goto out; - } - if ((extent_size << PAGE_SHIFT) < extent_size) { - ret = -EINVAL; - goto out; - } - - //XXX check address_bits and set flags = ALLOC_DOM_DMA if needed - - // check the rage is not populated yet. - //XXX loop optimization - for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) { - if (____lookup_domain_mpa(d, mpaddr + offset) != INVALID_MFN) { - ret = -EBUSY; - goto out; - } - } - - page = alloc_domheap_pages(d, extent_order, flags); - if (page == NULL) { - ret = -ENOMEM; - DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n", - extent_order, d->domain_id, flags); - goto out; - } - - //XXX loop optimization - physaddr = page_to_maddr(page); - for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) { - assign_domain_page(d, mpaddr + offset, physaddr + offset); - } - -out: - return ret; -} - -//XXX SMP -unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order) { @@ -937,26 +1110,28 @@ dom0vp_zap_physmap(struct domain *d, uns goto out; } - zap_domain_page_one(d, gpfn << PAGE_SHIFT); + zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1); out: return ret; } +// caller must get_page(mfn_to_page(mfn)) before +// caller must call set_gpfn_from_mfn(). static void assign_domain_page_replace(struct domain *d, unsigned long mpaddr, unsigned long mfn, unsigned int flags) { - struct mm_struct *mm = d->arch.mm; + struct mm_struct *mm = &d->arch.mm; pte_t* pte; pte_t old_pte; + pte_t npte; pte = lookup_alloc_domain_pte(d, mpaddr); // update pte - old_pte = ptep_get_and_clear(mm, mpaddr, pte); - set_pte(pte, pfn_pte(mfn, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); + npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)); + old_pte = ptep_xchg(mm, mpaddr, pte, npte); if (!pte_none(old_pte)) { unsigned long old_mfn; struct page_info* old_page; @@ -973,8 +1148,10 @@ assign_domain_page_replace(struct domain domain_page_flush(d, mpaddr, old_mfn, mfn); + try_to_clear_PGC_allocate(d, old_page); put_page(old_page); } else { + BUG_ON(!mfn_valid(mfn)); BUG_ON(page_get_owner(mfn_to_page(mfn)) == d && get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); } @@ -1002,17 +1179,195 @@ dom0vp_add_physmap(struct domain* d, uns } assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* flags:XXX */); + //don't update p2m table because this page belongs to rd, not d. out1: put_domain(rd); out0: return error; } + +// grant table host mapping +// mpaddr: host_addr: pseudo physical address +// mfn: frame: machine page frame +// flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte +int +create_grant_host_mapping(unsigned long gpaddr, + unsigned long mfn, unsigned int flags) +{ + struct domain* d = current->domain; + struct page_info* page; + int ret; + + if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) { + DPRINTK("%s: flags 0x%x\n", __func__, flags); + return GNTST_general_error; + } + if (flags & GNTMAP_readonly) { +#if 0 + DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n", + __func__, flags); +#endif + flags &= ~GNTMAP_readonly; + } + + page = mfn_to_page(mfn); + ret = get_page(page, page_get_owner(page)); + BUG_ON(ret == 0); + assign_domain_page_replace(d, gpaddr, mfn, flags); + + return GNTST_okay; +} + +// grant table host unmapping +int +destroy_grant_host_mapping(unsigned long gpaddr, + unsigned long mfn, unsigned int flags) +{ + struct domain* d = current->domain; + pte_t* pte; + pte_t old_pte; + unsigned long old_mfn = INVALID_MFN; + struct page_info* old_page; + + if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) { + DPRINTK("%s: flags 0x%x\n", __func__, flags); + return GNTST_general_error; + } + if (flags & GNTMAP_readonly) { +#if 0 + DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n", + __func__, flags); +#endif + flags &= ~GNTMAP_readonly; + } + + pte = lookup_noalloc_domain_pte(d, gpaddr); + if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn) + return GNTST_general_error;//XXX GNTST_bad_pseudo_phys_addr + + // update pte + old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte); + if (pte_present(old_pte)) { + old_mfn = pte_pfn(old_pte);//XXX + } + domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN); + + old_page = mfn_to_page(old_mfn); + BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. + put_page(old_page); + + return GNTST_okay; +} + +//XXX needs refcount patch +//XXX heavily depends on the struct page layout. +//XXX SMP +int +steal_page_for_grant_transfer(struct domain *d, struct page_info *page) +{ +#if 0 /* if big endian */ +# error "implement big endian version of steal_page_for_grant_transfer()" +#endif + u32 _d, _nd; + u64 x, nx, y; + unsigned long mpaddr = get_gpfn_from_mfn(page_to_mfn(page)) << PAGE_SHIFT; + struct page_info *new; + + zap_domain_page_one(d, mpaddr, 0); + put_page(page); + + spin_lock(&d->page_alloc_lock); + + /* + * The tricky bit: atomically release ownership while there is just one + * benign reference to the page (PGC_allocated). If that reference + * disappears then the deallocation routine will safely spin. + */ + _d = pickle_domptr(d); + y = *((u64*)&page->count_info); + do { + x = y; + nx = x & 0xffffffff; + // page->count_info: untouched + // page->u.inused._domain = 0; + _nd = x >> 32; + + if (unlikely((x & (PGC_count_mask | PGC_allocated)) != + (1 | PGC_allocated)) || + unlikely(_nd != _d)) { + struct domain* nd = unpickle_domptr(_nd); + if (nd == NULL) { + DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, " + "sd=%p 0x%x," + " caf=%016lx, taf=%" PRtype_info "\n", + (void *) page_to_mfn(page), + d, d->domain_id, _d, + nd, _nd, + x, + page->u.inuse.type_info); + } else { + DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, " + "sd=%p(%u) 0x%x," + " caf=%016lx, taf=%" PRtype_info "\n", + (void *) page_to_mfn(page), + d, d->domain_id, _d, + nd, nd->domain_id, _nd, + x, + page->u.inuse.type_info); + } + spin_unlock(&d->page_alloc_lock); + return -1; + } + + y = cmpxchg((u64*)&page->count_info, x, nx); + } while (unlikely(y != x)); + + /* + * Unlink from 'd'. At least one reference remains (now anonymous), so + * noone else is spinning to try to delete this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + spin_unlock(&d->page_alloc_lock); + +#if 1 + //XXX Until net_rx_action() fix + // assign new page for this mpaddr + new = assign_new_domain_page(d, mpaddr); + BUG_ON(new == NULL);//XXX +#endif + + return 0; +} + +void +guest_physmap_add_page(struct domain *d, unsigned long gpfn, + unsigned long mfn) +{ + int ret; + + ret = get_page(mfn_to_page(mfn), d); + BUG_ON(ret == 0); + assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* XXX */); + set_gpfn_from_mfn(mfn, gpfn);//XXX SMP + + //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT)); +} + +void +guest_physmap_remove_page(struct domain *d, unsigned long gpfn, + unsigned long mfn) +{ + BUG_ON(mfn == 0);//XXX + zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1); +} #endif /* Flush cache of domain d. */ void domain_cache_flush (struct domain *d, int sync_only) { - struct mm_struct *mm = d->arch.mm; + struct mm_struct *mm = &d->arch.mm; pgd_t *pgd = mm->pgd; unsigned long maddr; int i,j,k, l; @@ -1478,9 +1833,9 @@ void domain_pend_keyboard_interrupt(int void sync_vcpu_execstate(struct vcpu *v) { - __ia64_save_fpu(v->arch._thread.fph); - if (VMX_DOMAIN(v)) - vmx_save_state(v); +// __ia64_save_fpu(v->arch._thread.fph); +// if (VMX_DOMAIN(v)) +// vmx_save_state(v); // FIXME SMP: Anything else needed here for SMP? } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/hypercall.c Tue May 30 14:30:34 2006 -0500 @@ -26,7 +26,6 @@ #include <public/physdev.h> #include <xen/domain.h> -extern unsigned long translate_domain_mpaddr(unsigned long); static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop); static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg); /* FIXME: where these declarations should be there ? */ @@ -71,13 +70,39 @@ hypercall_t ia64_hypercall_table[] = (hypercall_t)do_ni_hypercall, /* */ /* 30 */ (hypercall_t)do_ni_hypercall, /* */ (hypercall_t)do_event_channel_op, - (hypercall_t)do_physdev_op + (hypercall_t)do_physdev_op, + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ /* 35 */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ /* 40 */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ /* 45 */ + (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_ni_hypercall, /* */ +#ifdef CONFIG_XEN_IA64_DOM0_VP + (hypercall_t)do_dom0vp_op, /* dom0vp_op */ +#else + (hypercall_t)do_ni_hypercall, /* arch_0 */ +#endif + (hypercall_t)do_ni_hypercall, /* arch_1 */ + (hypercall_t)do_ni_hypercall, /* arch_2 */ /* 50 */ + (hypercall_t)do_ni_hypercall, /* arch_3 */ + (hypercall_t)do_ni_hypercall, /* arch_4 */ + (hypercall_t)do_ni_hypercall, /* arch_5 */ + (hypercall_t)do_ni_hypercall, /* arch_6 */ + (hypercall_t)do_ni_hypercall /* arch_7 */ /* 55 */ }; uint32_t nr_hypercalls = sizeof(ia64_hypercall_table) / sizeof(hypercall_t); -static int +static IA64FAULT xen_hypercall (struct pt_regs *regs) { uint32_t cmd = (uint32_t)regs->r2; @@ -91,15 +116,9 @@ xen_hypercall (struct pt_regs *regs) regs->r18, regs->r19); else -#ifdef CONFIG_XEN_IA64_DOM0_VP - if (cmd == __HYPERVISOR_ia64_dom0vp_op) - regs->r8 = do_dom0vp_op(regs->r14, regs->r15, regs->r16, - regs->r17, regs->r18); - else -#endif regs->r8 = -ENOSYS; - return 1; + return IA64_NO_FAULT; } @@ -134,9 +153,6 @@ fw_hypercall_ipi (struct pt_regs *regs) c.regs.cr_iip = targ_regs->cr_iip; c.regs.r1 = targ_regs->r1; - /* Copy from vcpu 0. */ - c.vcpu.evtchn_vector = - current->domain->vcpu[0]->vcpu_info->arch.evtchn_vector; if (arch_set_info_guest (targ, &c) != 0) { printf ("arch_boot_vcpu: failure\n"); return; @@ -162,14 +178,16 @@ fw_hypercall_ipi (struct pt_regs *regs) return; } -static int +static IA64FAULT fw_hypercall (struct pt_regs *regs) { struct vcpu *v = current; struct sal_ret_values x; - unsigned long *tv, *tc; - - switch (regs->r2) { + efi_status_t efi_ret_value; + IA64FAULT fault; + unsigned long index = regs->r2 & FW_HYPERCALL_NUM_MASK_HIGH; + + switch (index) { case FW_HYPERCALL_PAL_CALL: //printf("*** PAL hypercall: index=%d\n",regs->r28); //FIXME: This should call a C routine @@ -227,40 +245,10 @@ fw_hypercall (struct pt_regs *regs) regs->r8 = x.r8; regs->r9 = x.r9; regs->r10 = x.r10; regs->r11 = x.r11; break; - case FW_HYPERCALL_EFI_RESET_SYSTEM: - printf("efi.reset_system called "); - if (current->domain == dom0) { - printf("(by dom0)\n "); - (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); - } - else - domain_shutdown (current->domain, SHUTDOWN_reboot); - regs->r8 = EFI_UNSUPPORTED; - break; - case FW_HYPERCALL_EFI_GET_TIME: - tv = (unsigned long *) vcpu_get_gr(v,32); - tc = (unsigned long *) vcpu_get_gr(v,33); - //printf("efi_get_time(%p,%p) called...",tv,tc); - tv = (unsigned long *) __va(translate_domain_mpaddr((unsigned long) tv)); - if (tc) tc = (unsigned long *) __va(translate_domain_mpaddr((unsigned long) tc)); - regs->r8 = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc); - //printf("and returns %lx\n",regs->r8); - break; - case FW_HYPERCALL_EFI_SET_TIME: - case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: - case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: - // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED - // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS - // POINTER ARGUMENTS WILL BE VIRTUAL!! - case FW_HYPERCALL_EFI_GET_VARIABLE: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: - case FW_HYPERCALL_EFI_SET_VARIABLE: - case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: - // FIXME: need fixes in efi.h from 2.6.9 - regs->r8 = EFI_UNSUPPORTED; + case FW_HYPERCALL_EFI_CALL: + efi_ret_value = efi_emulator (regs, &fault); + if (fault != IA64_NO_FAULT) return fault; + regs->r8 = efi_ret_value; break; case FW_HYPERCALL_IPI: fw_hypercall_ipi (regs); @@ -269,7 +257,7 @@ fw_hypercall (struct pt_regs *regs) printf("unknown ia64 fw hypercall %lx\n", regs->r2); regs->r8 = do_ni_hypercall(); } - return 1; + return IA64_NO_FAULT; } /* opt_unsafe_hypercall: If true, unsafe debugging hypercalls are allowed. @@ -277,7 +265,7 @@ static int opt_unsafe_hypercall = 0; static int opt_unsafe_hypercall = 0; boolean_param("unsafe_hypercall", opt_unsafe_hypercall); -int +IA64FAULT ia64_hypercall (struct pt_regs *regs) { struct vcpu *v = current; @@ -307,7 +295,7 @@ ia64_hypercall (struct pt_regs *regs) printf("unknown user xen/ia64 hypercall %lx\n", index); regs->r8 = do_ni_hypercall(); } - return 1; + return IA64_NO_FAULT; } /* Hypercalls are only allowed by kernel. @@ -316,7 +304,7 @@ ia64_hypercall (struct pt_regs *regs) /* FIXME: Return a better error value ? Reflection ? Illegal operation ? */ regs->r8 = -1; - return 1; + return IA64_NO_FAULT; } if (index >= FW_HYPERCALL_FIRST_ARCH) diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/hyperprivop.S Tue May 30 14:30:34 2006 -0500 @@ -30,7 +30,7 @@ #undef FAST_ITC //XXX CONFIG_XEN_IA64_DOM0_VP // TODO fast_itc doesn't suport dom0 vp yet. #else -//#define FAST_ITC // working but default off for now +//#define FAST_ITC // to be reviewed #endif #define FAST_BREAK #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -46,27 +46,8 @@ #undef RFI_TO_INTERRUPT // not working yet #endif -#define XEN_HYPER_RFI 0x1 -#define XEN_HYPER_RSM_DT 0x2 -#define XEN_HYPER_SSM_DT 0x3 -#define XEN_HYPER_COVER 0x4 -#define XEN_HYPER_ITC_D 0x5 -#define XEN_HYPER_ITC_I 0x6 -#define XEN_HYPER_SSM_I 0x7 -#define XEN_HYPER_GET_IVR 0x8 -#define XEN_HYPER_GET_TPR 0x9 -#define XEN_HYPER_SET_TPR 0xa -#define XEN_HYPER_EOI 0xb -#define XEN_HYPER_SET_ITM 0xc -#define XEN_HYPER_THASH 0xd -#define XEN_HYPER_PTC_GA 0xe -#define XEN_HYPER_ITR_D 0xf -#define XEN_HYPER_GET_RR 0x10 -#define XEN_HYPER_SET_RR 0x11 -#define XEN_HYPER_SET_KR 0x12 - #ifdef CONFIG_SMP -#warning "FIXME: ptc.ga instruction requires spinlock for SMP" +//#warning "FIXME: ptc.ga instruction requires spinlock for SMP" #undef FAST_PTC_GA #endif @@ -106,7 +87,7 @@ GLOBAL_ENTRY(fast_hyperprivop) #endif // HYPERPRIVOP_SSM_I? // assumes domain interrupts pending, so just do it - cmp.eq p7,p6=XEN_HYPER_SSM_I,r17 + cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17 (p7) br.sptk.many hyper_ssm_i;; // FIXME. This algorithm gives up (goes to the slow path) if there @@ -127,75 +108,75 @@ 1: // when we get to here r20=~=interrup 1: // when we get to here r20=~=interrupts pending // HYPERPRIVOP_RFI? - cmp.eq p7,p6=XEN_HYPER_RFI,r17 + cmp.eq p7,p6=HYPERPRIVOP_RFI,r17 (p7) br.sptk.many hyper_rfi;; // HYPERPRIVOP_GET_IVR? - cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17 + cmp.eq p7,p6=HYPERPRIVOP_GET_IVR,r17 (p7) br.sptk.many hyper_get_ivr;; cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; // HYPERPRIVOP_COVER? - cmp.eq p7,p6=XEN_HYPER_COVER,r17 + cmp.eq p7,p6=HYPERPRIVOP_COVER,r17 (p7) br.sptk.many hyper_cover;; // HYPERPRIVOP_SSM_DT? - cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17 + cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17 (p7) br.sptk.many hyper_ssm_dt;; // HYPERPRIVOP_RSM_DT? - cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17 + cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17 (p7) br.sptk.many hyper_rsm_dt;; // HYPERPRIVOP_GET_TPR? - cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17 + cmp.eq p7,p6=HYPERPRIVOP_GET_TPR,r17 (p7) br.sptk.many hyper_get_tpr;; // HYPERPRIVOP_SET_TPR? - cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17 + cmp.eq p7,p6=HYPERPRIVOP_SET_TPR,r17 (p7) br.sptk.many hyper_set_tpr;; // HYPERPRIVOP_EOI? - cmp.eq p7,p6=XEN_HYPER_EOI,r17 + cmp.eq p7,p6=HYPERPRIVOP_EOI,r17 (p7) br.sptk.many hyper_eoi;; // HYPERPRIVOP_SET_ITM? - cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17 + cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17 (p7) br.sptk.many hyper_set_itm;; // HYPERPRIVOP_SET_RR? - cmp.eq p7,p6=XEN_HYPER_SET_RR,r17 + cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17 (p7) br.sptk.many hyper_set_rr;; // HYPERPRIVOP_GET_RR? - cmp.eq p7,p6=XEN_HYPER_GET_RR,r17 + cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17 (p7) br.sptk.many hyper_get_rr;; // HYPERPRIVOP_PTC_GA? - cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17 + cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17 (p7) br.sptk.many hyper_ptc_ga;; // HYPERPRIVOP_ITC_D? - cmp.eq p7,p6=XEN_HYPER_ITC_D,r17 + cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17 (p7) br.sptk.many hyper_itc_d;; // HYPERPRIVOP_ITC_I? - cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 + cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17 (p7) br.sptk.many hyper_itc_i;; // HYPERPRIVOP_THASH? - cmp.eq p7,p6=XEN_HYPER_THASH,r17 + cmp.eq p7,p6=HYPERPRIVOP_THASH,r17 (p7) br.sptk.many hyper_thash;; // HYPERPRIVOP_SET_KR? - cmp.eq p7,p6=XEN_HYPER_SET_KR,r17 + cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17 (p7) br.sptk.many hyper_set_kr;; // if not one of the above, give up for now and do it the slow way br.sptk.many dispatch_break_fault ;; - +END(fast_hyperprivop) // give up for now if: ipsr.be==1, ipsr.pp==1 // from reflect_interruption, don't need to: @@ -250,7 +231,7 @@ ENTRY(hyper_ssm_i) cmp.ne p7,p0=r21,r0 (p7) br.sptk.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_I);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -348,6 +329,7 @@ ENTRY(hyper_ssm_i) mov pr=r31,-1 ;; rfi ;; +END(hyper_ssm_i) // reflect domain clock interrupt // r31 == pr @@ -594,7 +576,7 @@ 1: adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r17;; // fall through - +END(fast_break_reflect) // reflect to domain ivt+r20 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too) @@ -723,6 +705,7 @@ ENTRY(fast_reflect) mov pr=r31,-1 ;; rfi ;; +END(fast_reflect) // reflect access faults (0x2400,0x2800,0x5300) directly to domain // r16 == isr @@ -762,6 +745,7 @@ GLOBAL_ENTRY(fast_access_reflect) and r22=~3,r22;; st8 [r23]=r22;; br.cond.sptk.many fast_reflect;; +END(fast_access_reflect) // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything // is as it was at the time of original miss. We want to preserve that @@ -769,7 +753,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect) GLOBAL_ENTRY(fast_tlb_miss_reflect) #ifndef FAST_TLB_MISS_REFLECT // see beginning of file br.spnt.few page_fault ;; -#endif +#else mov r31=pr mov r30=cr.ipsr mov r29=cr.iip @@ -957,6 +941,7 @@ 1: // check the guest VHPT extr.u r24=r24,2,6;; // IFA already in PSCB br.cond.sptk.many fast_insert;; +END(fast_tlb_miss_reflect) // we get here if fast_insert fails (e.g. due to metaphysical lookup) ENTRY(recover_and_page_fault) @@ -1007,6 +992,7 @@ 1: extr.u r25=r17,61,3;; mov r29=cr.iip mov r30=cr.ipsr br.sptk.many fast_reflect;; +#endif END(fast_tlb_miss_reflect) // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged @@ -1065,7 +1051,7 @@ 1: 1: // OK now, let's do an rfi. #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RFI);; ld8 r23=[r20];; adds r23=1,r23;; st8 [r20]=r23;; @@ -1145,9 +1131,10 @@ 1: mov pr=r31,-1 ;; rfi ;; - +END(hyper_rfi) + #ifdef RFI_TO_INTERRUPT -GLOBAL_ENTRY(rfi_check_extint) +ENTRY(rfi_check_extint) //br.sptk.many dispatch_break_fault ;; // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip @@ -1214,11 +1201,12 @@ GLOBAL_ENTRY(rfi_check_extint) adds r29=15,r29;; cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi (p6) br.cond.spnt.few just_do_rfi;; +END(rfi_check_extint) // this doesn't work yet (dies early after getting to user mode) // but happens relatively infrequently, so fix it later. // NOTE that these will be counted incorrectly for now (for privcnt output) -GLOBAL_ENTRY(rfi_with_interrupt) +ENTRY(rfi_with_interrupt) #if 1 br.sptk.many dispatch_break_fault ;; #endif @@ -1313,11 +1301,12 @@ GLOBAL_ENTRY(rfi_with_interrupt) st4 [r20]=r0 ;; mov pr=r31,-1 ;; rfi +END(rfi_with_interrupt) #endif // RFI_TO_INTERRUPT ENTRY(hyper_cover) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_COVER);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1348,11 +1337,12 @@ ENTRY(hyper_cover) mov pr=r31,-1 ;; rfi ;; +END(hyper_cover) // return from metaphysical mode (meta=1) to virtual mode (meta=0) ENTRY(hyper_ssm_dt) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_DT);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1384,11 +1374,12 @@ 1: extr.u r26=r24,41,2 ;; mov pr=r31,-1 ;; rfi ;; +END(hyper_ssm_dt) // go to metaphysical mode (meta=1) from virtual mode (meta=0) ENTRY(hyper_rsm_dt) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RSM_DT);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1421,10 +1412,11 @@ 1: extr.u r26=r24,41,2 ;; mov pr=r31,-1 ;; rfi ;; +END(hyper_rsm_dt) ENTRY(hyper_get_tpr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_TPR);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1453,7 +1445,7 @@ END(hyper_get_tpr) // (or accidentally missing) delivering an interrupt ENTRY(hyper_set_tpr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_TPR);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1481,7 +1473,7 @@ END(hyper_set_tpr) ENTRY(hyper_get_ivr) #ifdef FAST_HYPERPRIVOP_CNT - movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);; + movl r22=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_IVR);; ld8 r21=[r22];; adds r21=1,r21;; st8 [r22]=r21;; @@ -1593,7 +1585,7 @@ ENTRY(hyper_eoi) cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_EOI);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1657,7 +1649,7 @@ ENTRY(hyper_set_itm) cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_ITM);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1698,7 +1690,7 @@ END(hyper_set_itm) ENTRY(hyper_get_rr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_RR);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1730,7 +1722,7 @@ ENTRY(hyper_set_rr) cmp.leu p7,p0=7,r25 // punt on setting rr7 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_RR);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1788,7 +1780,7 @@ ENTRY(hyper_set_kr) cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_KR);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1844,9 +1836,9 @@ END(hyper_set_kr) // On entry: // r18 == XSI_PSR_IC // r31 == pr -GLOBAL_ENTRY(hyper_thash) +ENTRY(hyper_thash) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_THASH);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1915,7 +1907,7 @@ ENTRY(hyper_ptc_ga) #endif // FIXME: validate not flushing Xen addresses #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);; + movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_PTC_GA);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -1992,18 +1984,19 @@ ENTRY(recover_and_dispatch_break_fault) #endif mov b0=r29 ;; br.sptk.many dispatch_break_fault;; +END(recover_and_dispatch_break_fault) // Registers at entry -// r17 = break immediate (XEN_HYPER_ITC_D or I) +// r17 = break immediate (HYPERPRIVOP_ITC_D or I) // r18 == XSI_PSR_IC_OFS // r31 == pr -GLOBAL_ENTRY(hyper_itc) -ENTRY(hyper_itc_i) +ENTRY(hyper_itc) +hyper_itc_i: // fall through, hyper_itc_d handles both i and d -ENTRY(hyper_itc_d) +hyper_itc_d: #ifndef FAST_ITC br.sptk.many dispatch_break_fault ;; -#endif +#else // ensure itir.ps >= xen's pagesize adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r23=[r23];; @@ -2027,9 +2020,9 @@ ENTRY(hyper_itc_d) cmp.ne p7,p0=r27,r28 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; -(p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);; -(p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);; + cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;; +(p6) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_D);; +(p7) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_I);; ld8 r21=[r20];; adds r21=1,r21;; st8 [r20]=r21;; @@ -2040,7 +2033,10 @@ ENTRY(hyper_itc_d) movl r30=recover_and_dispatch_break_fault ;; mov r16=r8;; // fall through - +#endif +END(hyper_itc) + +#if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT) // fast_insert(PSCB(ifa),r24=ps,r16=pte) // r16 == pte @@ -2050,7 +2046,7 @@ ENTRY(hyper_itc_d) // r29 == saved value of b0 in case of recovery // r30 == recovery ip if failure occurs // r31 == pr -GLOBAL_ENTRY(fast_insert) +ENTRY(fast_insert) // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir) mov r19=1;; shl r20=r19,r24;; @@ -2175,4 +2171,4 @@ no_inc_iip: rfi ;; END(fast_insert) - +#endif diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/ivt.S Tue May 30 14:30:34 2006 -0500 @@ -100,6 +100,15 @@ mov r19=n;; /* prepare to save predicates */ \ br.sptk.many dispatch_to_fault_handler +#define FAULT_OR_REFLECT(n) \ + mov r31=pr; \ + mov r20=cr.ipsr;; \ + mov r19=n; /* prepare to save predicates */ \ + extr.u r20=r20,IA64_PSR_CPL0_BIT,2;; \ + cmp.ne p6,p0=r0,r20; /* cpl != 0?*/ \ +(p6) br.dptk.many dispatch_reflection; \ + br.sptk.few dispatch_to_fault_handler + #ifdef XEN #define REFLECT(n) \ mov r31=pr; \ @@ -697,7 +706,7 @@ ENTRY(ikey_miss) ENTRY(ikey_miss) DBG_FAULT(6) #ifdef XEN - REFLECT(6) + FAULT_OR_REFLECT(6) #endif FAULT(6) END(ikey_miss) @@ -746,7 +755,7 @@ ENTRY(dkey_miss) ENTRY(dkey_miss) DBG_FAULT(7) #ifdef XEN - REFLECT(7) + FAULT_OR_REFLECT(7) #endif FAULT(7) END(dkey_miss) @@ -757,7 +766,7 @@ ENTRY(dirty_bit) ENTRY(dirty_bit) DBG_FAULT(8) #ifdef XEN - REFLECT(8) + FAULT_OR_REFLECT(8) #endif /* * What we do here is to simply turn on the dirty bit in the PTE. We need to @@ -1523,7 +1532,7 @@ ENTRY(page_not_present) ENTRY(page_not_present) DBG_FAULT(20) #ifdef XEN - REFLECT(20) + FAULT_OR_REFLECT(20) #endif mov r16=cr.ifa rsm psr.dt @@ -1546,7 +1555,7 @@ ENTRY(key_permission) ENTRY(key_permission) DBG_FAULT(21) #ifdef XEN - REFLECT(21) + FAULT_OR_REFLECT(21) #endif mov r16=cr.ifa rsm psr.dt @@ -1562,7 +1571,7 @@ ENTRY(iaccess_rights) ENTRY(iaccess_rights) DBG_FAULT(22) #ifdef XEN - REFLECT(22) + FAULT_OR_REFLECT(22) #endif mov r16=cr.ifa rsm psr.dt @@ -1637,7 +1646,7 @@ ENTRY(disabled_fp_reg) mov pr=r20,-1 ;; #endif - REFLECT(25) + FAULT_OR_REFLECT(25) //floating_panic: // br.sptk.many floating_panic ;; @@ -1656,7 +1665,7 @@ ENTRY(nat_consumption) ENTRY(nat_consumption) DBG_FAULT(26) #ifdef XEN - REFLECT(26) + FAULT_OR_REFLECT(26) #endif FAULT(26) END(nat_consumption) @@ -1668,7 +1677,7 @@ ENTRY(speculation_vector) DBG_FAULT(27) #ifdef XEN // this probably need not reflect... - REFLECT(27) + FAULT_OR_REFLECT(27) #endif /* * A [f]chk.[as] instruction needs to take the branch to the recovery code but @@ -1714,7 +1723,7 @@ ENTRY(debug_vector) ENTRY(debug_vector) DBG_FAULT(29) #ifdef XEN - REFLECT(29) + FAULT_OR_REFLECT(29) #endif FAULT(29) END(debug_vector) @@ -1725,7 +1734,7 @@ ENTRY(unaligned_access) ENTRY(unaligned_access) DBG_FAULT(30) #ifdef XEN - REFLECT(30) + FAULT_OR_REFLECT(30) #endif mov r16=cr.ipsr mov r31=pr // prepare to save predicates @@ -1739,7 +1748,7 @@ ENTRY(unsupported_data_reference) ENTRY(unsupported_data_reference) DBG_FAULT(31) #ifdef XEN - REFLECT(31) + FAULT_OR_REFLECT(31) #endif FAULT(31) END(unsupported_data_reference) @@ -1750,7 +1759,7 @@ ENTRY(floating_point_fault) ENTRY(floating_point_fault) DBG_FAULT(32) #ifdef XEN - REFLECT(32) + FAULT_OR_REFLECT(32) #endif FAULT(32) END(floating_point_fault) @@ -1761,7 +1770,7 @@ ENTRY(floating_point_trap) ENTRY(floating_point_trap) DBG_FAULT(33) #ifdef XEN - REFLECT(33) + FAULT_OR_REFLECT(33) #endif FAULT(33) END(floating_point_trap) @@ -1772,7 +1781,7 @@ ENTRY(lower_privilege_trap) ENTRY(lower_privilege_trap) DBG_FAULT(34) #ifdef XEN - REFLECT(34) + FAULT_OR_REFLECT(34) #endif FAULT(34) END(lower_privilege_trap) @@ -1783,7 +1792,7 @@ ENTRY(taken_branch_trap) ENTRY(taken_branch_trap) DBG_FAULT(35) #ifdef XEN - REFLECT(35) + FAULT_OR_REFLECT(35) #endif FAULT(35) END(taken_branch_trap) @@ -1794,7 +1803,7 @@ ENTRY(single_step_trap) ENTRY(single_step_trap) DBG_FAULT(36) #ifdef XEN - REFLECT(36) + FAULT_OR_REFLECT(36) #endif FAULT(36) END(single_step_trap) @@ -1853,7 +1862,7 @@ ENTRY(ia32_exception) ENTRY(ia32_exception) DBG_FAULT(45) #ifdef XEN - REFLECT(45) + FAULT_OR_REFLECT(45) #endif FAULT(45) END(ia32_exception) @@ -1864,7 +1873,7 @@ ENTRY(ia32_intercept) ENTRY(ia32_intercept) DBG_FAULT(46) #ifdef XEN - REFLECT(46) + FAULT_OR_REFLECT(46) #endif #ifdef CONFIG_IA32_SUPPORT mov r31=pr @@ -1897,7 +1906,7 @@ ENTRY(ia32_interrupt) ENTRY(ia32_interrupt) DBG_FAULT(47) #ifdef XEN - REFLECT(47) + FAULT_OR_REFLECT(47) #endif #ifdef CONFIG_IA32_SUPPORT mov r31=pr diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/privop.c Tue May 30 14:30:34 2006 -0500 @@ -793,33 +793,6 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr); return fault; } - - -// FIXME: Move these to include/public/arch-ia64? -#define HYPERPRIVOP_RFI 0x1 -#define HYPERPRIVOP_RSM_DT 0x2 -#define HYPERPRIVOP_SSM_DT 0x3 -#define HYPERPRIVOP_COVER 0x4 -#define HYPERPRIVOP_ITC_D 0x5 -#define HYPERPRIVOP_ITC_I 0x6 -#define HYPERPRIVOP_SSM_I 0x7 -#define HYPERPRIVOP_GET_IVR 0x8 -#define HYPERPRIVOP_GET_TPR 0x9 -#define HYPERPRIVOP_SET_TPR 0xa -#define HYPERPRIVOP_EOI 0xb -#define HYPERPRIVOP_SET_ITM 0xc -#define HYPERPRIVOP_THASH 0xd -#define HYPERPRIVOP_PTC_GA 0xe -#define HYPERPRIVOP_ITR_D 0xf -#define HYPERPRIVOP_GET_RR 0x10 -#define HYPERPRIVOP_SET_RR 0x11 -#define HYPERPRIVOP_SET_KR 0x12 -#define HYPERPRIVOP_FC 0x13 -#define HYPERPRIVOP_GET_CPUID 0x14 -#define HYPERPRIVOP_GET_PMD 0x15 -#define HYPERPRIVOP_GET_EFLAG 0x16 -#define HYPERPRIVOP_SET_EFLAG 0x17 -#define HYPERPRIVOP_MAX 0x17 static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = { 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i", diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/process.c Tue May 30 14:30:34 2006 -0500 @@ -15,7 +15,6 @@ #include <asm/ptrace.h> #include <xen/delay.h> -#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ #include <asm/sal.h> /* FOR struct ia64_sal_retval */ #include <asm/system.h> @@ -40,7 +39,7 @@ extern void panic_domain(struct pt_regs extern void panic_domain(struct pt_regs *, const char *, ...); extern long platform_is_hp_ski(void); extern int ia64_hyperprivop(unsigned long, REGS *); -extern int ia64_hypercall(struct pt_regs *regs); +extern IA64FAULT ia64_hypercall(struct pt_regs *regs); extern void vmx_do_launch(struct vcpu *); extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); @@ -195,10 +194,10 @@ void check_bad_nested_interruption(unsig } vector &= ~0xf; if (vector != IA64_DATA_TLB_VECTOR && - vector != IA64_ALT_DATA_TLB_VECTOR && - vector != IA64_VHPT_TRANS_VECTOR) { -panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n", - vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip)); + vector != IA64_ALT_DATA_TLB_VECTOR && + vector != IA64_VHPT_TRANS_VECTOR) { + panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n", + vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip)); } } @@ -265,7 +264,8 @@ void deliver_pending_interrupt(struct pt } unsigned long lazy_cover_count = 0; -int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs) +static int +handle_lazy_cover(struct vcpu *v, struct pt_regs *regs) { if (!PSCB(v,interrupt_collection_enabled)) { PSCB(v,ifs) = regs->cr_ifs; @@ -285,7 +285,7 @@ void ia64_do_page_fault (unsigned long a unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); IA64FAULT fault; - if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) return; + if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return; if ((isr & IA64_ISR_SP) || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { @@ -299,7 +299,7 @@ void ia64_do_page_fault (unsigned long a } again: - fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha); + fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { u64 logps; pteval = translate_domain_pte(pteval, address, itir, &logps); @@ -307,11 +307,7 @@ void ia64_do_page_fault (unsigned long a if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) { /* dtlb has been purged in-between. This dtlb was matching. Undo the work. */ -#ifdef VHPT_GLOBAL - vhpt_flush_address (address, 1); -#endif - ia64_ptcl(address, 1<<2); - ia64_srlz_i(); + vcpu_flush_tlb_vhpt_range (address, 1); goto again; } return; @@ -357,7 +353,7 @@ ia64_fault (unsigned long vector, unsign struct pt_regs *regs = (struct pt_regs *) &stack; unsigned long code; char buf[128]; - static const char * const reason[] = { + static const char *reason[] = { "IA-64 Illegal Operation fault", "IA-64 Privileged Operation fault", "IA-64 Privileged Register fault", @@ -367,10 +363,10 @@ ia64_fault (unsigned long vector, unsign "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" }; -#if 0 -printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n", - vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); -#endif + + printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx, ipsr=0x%016lx, isr=0x%016lx\n", + vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { /* @@ -383,15 +379,48 @@ printf("ia64_fault, vector=0x%p, ifa=%p, } switch (vector) { - case 24: /* General Exception */ + case 0: + printk("VHPT Translation.\n"); + break; + + case 4: + printk("Alt DTLB.\n"); + break; + + case 6: + printk("Instruction Key Miss.\n"); + break; + + case 7: + printk("Data Key Miss.\n"); + break; + + case 8: + printk("Dirty-bit.\n"); + break; + + case 20: + printk("Page Not Found.\n"); + break; + + case 21: + printk("Key Permission.\n"); + break; + + case 22: + printk("Instruction Access Rights.\n"); + break; + + case 24: /* General Exception */ code = (isr >> 4) & 0xf; sprintf(buf, "General Exception: %s%s", reason[code], - (code == 3) ? ((isr & (1UL << 37)) - ? " (RSE access)" : " (data access)") : ""); + (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" : + " (data access)") : ""); if (code == 8) { # ifdef CONFIG_IA64_PRINT_HAZARDS printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, + current->comm, current->pid, + regs->cr_iip + ia64_psr(regs)->ri, regs->pr); # endif printf("ia64_fault: returning on hazard\n"); @@ -399,162 +428,65 @@ printf("ia64_fault, vector=0x%p, ifa=%p, } break; - case 25: /* Disabled FP-Register */ - if (isr & 2) { - //disabled_fph_fault(regs); - //return; - } - sprintf(buf, "Disabled FPL fault---not supposed to happen!"); - break; - - case 26: /* NaT Consumption */ - if (user_mode(regs)) { - void *addr; - - if (((isr >> 4) & 0xf) == 2) { - /* NaT page consumption */ - //sig = SIGSEGV; - //code = SEGV_ACCERR; - addr = (void *) ifa; - } else { - /* register NaT consumption */ - //sig = SIGILL; - //code = ILL_ILLOPN; - addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - } - //siginfo.si_signo = sig; - //siginfo.si_code = code; - //siginfo.si_errno = 0; - //siginfo.si_addr = addr; - //siginfo.si_imm = vector; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(sig, &siginfo, current); - //return; - } //else if (ia64_done_with_exception(regs)) - //return; - sprintf(buf, "NaT consumption"); - break; - - case 31: /* Unsupported Data Reference */ - if (user_mode(regs)) { - //siginfo.si_signo = SIGILL; - //siginfo.si_code = ILL_ILLOPN; - //siginfo.si_errno = 0; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //siginfo.si_imm = vector; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(SIGILL, &siginfo, current); - //return; - } - sprintf(buf, "Unsupported data reference"); - break; - - case 29: /* Debug */ - case 35: /* Taken Branch Trap */ - case 36: /* Single Step Trap */ - //if (fsys_mode(current, regs)) {} - switch (vector) { - case 29: - //siginfo.si_code = TRAP_HWBKPT; -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) now has - * "NoFix" status. There are no plans for fixing this. - */ - if (ia64_psr(regs)->is == 0) - ifa = regs->cr_iip; -#endif - break; - case 35: ifa = 0; break; - case 36: ifa = 0; break; - //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; - } - //siginfo.si_signo = SIGTRAP; - //siginfo.si_errno = 0; - //siginfo.si_addr = (void *) ifa; - //siginfo.si_imm = 0; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(SIGTRAP, &siginfo, current); - //return; - - case 32: /* fp fault */ - case 33: /* fp trap */ - //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); - //if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { - //siginfo.si_signo = SIGFPE; - //siginfo.si_errno = 0; - //siginfo.si_code = FPE_FLTINV; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //siginfo.si_imm = 0; - //force_sig_info(SIGFPE, &siginfo, current); - //} - //return; - sprintf(buf, "FP fault/trap"); - break; - - case 34: - if (isr & 0x2) { - /* Lower-Privilege Transfer Trap */ - /* - * Just clear PSR.lp and then return immediately: all the - * interesting work (e.g., signal delivery is done in the kernel - * exit path). - */ - //ia64_psr(regs)->lp = 0; - //return; - sprintf(buf, "Lower-Privilege Transfer trap"); - } else { - /* Unimplemented Instr. Address Trap */ - if (user_mode(regs)) { - //siginfo.si_signo = SIGILL; - //siginfo.si_code = ILL_BADIADDR; - //siginfo.si_errno = 0; - //siginfo.si_flags = 0; - //siginfo.si_isr = 0; - //siginfo.si_imm = 0; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //force_sig_info(SIGILL, &siginfo, current); - //return; - } - sprintf(buf, "Unimplemented Instruction Address fault"); - } - break; - - case 45: - printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", - regs->cr_iip, ifa, isr); - //force_sig(SIGSEGV, current); - break; - - case 46: - printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", - regs->cr_iip, ifa, isr, iim); - //force_sig(SIGSEGV, current); - return; - - case 47: - sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); - break; - - default: - sprintf(buf, "Fault %lu", vector); - break; - } - //die_if_kernel(buf, regs, error); -printk("ia64_fault: %s: reflecting\n",buf); -PSCB(current,itir) = vcpu_get_itir_on_fault(current,ifa); -PSCB(current,ifa) = ifa; -reflect_interruption(isr,regs,IA64_GENEX_VECTOR); -//while(1); - //force_sig(SIGILL, current); + case 25: + printk("Disabled FP-Register.\n"); + break; + + case 26: + printk("NaT consumption.\n"); + break; + + case 29: + printk("Debug.\n"); + break; + + case 30: + printk("Unaligned Reference.\n"); + break; + + case 31: + printk("Unsupported data reference.\n"); + break; + + case 32: + printk("Floating-Point Fault.\n"); + break; + + case 33: + printk("Floating-Point Trap.\n"); + break; + + case 34: + printk("Lower Privilege Transfer Trap.\n"); + break; + + case 35: + printk("Taken Branch Trap.\n"); + break; + + case 36: + printk("Single Step Trap.\n"); + break; + + case 45: + printk("IA-32 Exception.\n"); + break; + + case 46: + printk("IA-32 Intercept.\n"); + break; + + case 47: + printk("IA-32 Interrupt.\n"); + break; + + default: + printk("Fault %lu\n", vector); + break; + } + + show_registers(regs); + panic("Fault in Xen.\n"); } unsigned long running_on_sim = 0; @@ -679,6 +611,7 @@ ia64_handle_break (unsigned long ifa, st { struct domain *d = current->domain; struct vcpu *v = current; + IA64FAULT vector; if (first_break) { if (platform_is_hp_ski()) running_on_sim = 1; @@ -699,9 +632,11 @@ ia64_handle_break (unsigned long ifa, st /* by default, do not continue */ v->arch.hypercall_continuation = 0; - if (ia64_hypercall(regs) && - !PSCBX(v, hypercall_continuation)) - vcpu_increment_iip(current); + if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) { + if (!PSCBX(v, hypercall_continuation)) + vcpu_increment_iip(current); + } + else reflect_interruption(isr, regs, vector); } else if (!PSCB(v,interrupt_collection_enabled)) { if (ia64_hyperprivop(iim,regs)) @@ -813,7 +748,7 @@ printf("*** Handled privop masquerading while(vector); return; } - if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, isr, regs)) return; + if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, regs)) return; PSCB(current,ifa) = ifa; PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa); reflect_interruption(isr,regs,vector); diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/regionreg.c Tue May 30 14:30:34 2006 -0500 @@ -17,9 +17,7 @@ #include <asm/vcpu.h> /* Defined in xemasm.S */ -extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info, unsigned long p_vhpt, unsigned long v_pal); - -extern void *pal_vaddr; +extern void ia64_new_rr7(unsigned long rid, void *shared_info, void *shared_arch_info, unsigned long shared_info_va, unsigned long p_vhpt); /* RID virtualization mechanism is really simple: domains have less rid bits than the host and the host rid space is shared among the domains. (Values @@ -260,9 +258,9 @@ int set_one_rr(unsigned long rr, unsigne if (!PSCB(v,metaphysical_mode)) set_rr(rr,newrrv.rrval); } else if (rreg == 7) { - ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, - v->arch.privregs, __get_cpu_var(vhpt_paddr), - (unsigned long) pal_vaddr); + ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, + v->arch.privregs, v->domain->arch.shared_info_va, + __get_cpu_var(vhpt_paddr)); } else { set_rr(rr,newrrv.rrval); } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/vcpu.c Tue May 30 14:30:34 2006 -0500 @@ -28,8 +28,6 @@ extern void setfpreg (unsigned long regn extern void panic_domain(struct pt_regs *, const char *, ...); extern unsigned long translate_domain_mpaddr(unsigned long); -extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); - typedef union { struct ia64_psr ia64_psr; @@ -682,9 +680,9 @@ UINT64 vcpu_check_pending_interrupts(VCP */ check_start: if (event_pending(vcpu) && - !test_bit(vcpu->vcpu_info->arch.evtchn_vector, + !test_bit(vcpu->domain->shared_info->arch.evtchn_vector, &PSCBX(vcpu, insvc[0]))) - vcpu_pend_interrupt(vcpu, vcpu->vcpu_info->arch.evtchn_vector); + vcpu_pend_interrupt(vcpu, vcpu->domain->shared_info->arch.evtchn_vector); p = &PSCBX(vcpu,irr[3]); r = &PSCBX(vcpu,insvc[3]); @@ -1290,8 +1288,7 @@ static inline int vcpu_match_tr_entry(TR return trp->pte.p && vcpu_match_tr_entry_no_p(trp, ifa, rid); } -// in_tpa is not used when CONFIG_XEN_IA64_DOM0_VP -IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, BOOLEAN in_tpa, UINT64 *pteval, UINT64 *itir, UINT64 *iha) +IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir, UINT64 *iha) { unsigned long region = address >> 61; unsigned long pta, rid, rr; @@ -1368,12 +1365,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN pte = trp->pte; if (/* is_data && */ pte.p && vcpu_match_tr_entry_no_p(trp,address,rid)) { -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (vcpu->domain==dom0 && !in_tpa) - *pteval = pte.val; - else -#endif - *pteval = vcpu->arch.dtlb_pte; + *pteval = pte.val; *itir = trp->itir; dtlb_translate_count++; return IA64_USE_TLB; @@ -1422,7 +1414,7 @@ IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 va UINT64 pteval, itir, mask, iha; IA64FAULT fault; - fault = vcpu_translate(vcpu, vadr, TRUE, TRUE, &pteval, &itir, &iha); + fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha); if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { mask = itir_mask(itir); @@ -1708,11 +1700,6 @@ IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT6 VCPU translation register access routines **************************************************************************/ -void vcpu_purge_tr_entry(TR_ENTRY *trp) -{ - trp->pte.val = 0; -} - static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 ifa) { UINT64 ps; @@ -1800,12 +1787,10 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB return; if (IorD & 0x1) { - vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr); - PSCBX(vcpu,itlb_pte) = mp_pte; + vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr); } if (IorD & 0x2) { - vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr); - PSCBX(vcpu,dtlb_pte) = mp_pte; + vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),mp_pte,ps<<2,vaddr); } } @@ -1875,20 +1860,14 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vad return fault; } -int ptce_count = 0; IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) { // Note that this only needs to be called once, i.e. the // architected loop to purge the entire TLB, should use // base = stride1 = stride2 = 0, count0 = count 1 = 1 -#ifdef VHPT_GLOBAL - vhpt_flush(); // FIXME: This is overdoing it -#endif - local_flush_tlb_all(); - // just invalidate the "whole" tlb - vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); + vcpu_flush_vtlb_all (); + return IA64_NO_FAULT; } @@ -1905,33 +1884,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE //printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range); -#ifdef CONFIG_XEN_SMP - struct domain *d = vcpu->domain; - struct vcpu *v; - - for_each_vcpu (d, v) { - if (v == vcpu) - continue; - - /* Purge TC entries. - FIXME: clear only if match. */ - vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); - -#ifdef VHPT_GLOBAL - /* Invalidate VHPT entries. */ - vhpt_flush_address_remote (v->processor, vadr, addr_range); -#endif - } -#endif - -#ifdef VHPT_GLOBAL - vhpt_flush_address(vadr,addr_range); -#endif - ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); - /* Purge tc. */ - vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); + domain_flush_vtlb_range (vcpu->domain, vadr, addr_range); + return IA64_NO_FAULT; } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/vhpt.c Tue May 30 14:30:34 2006 -0500 @@ -12,32 +12,31 @@ #include <asm/system.h> #include <asm/pgalloc.h> #include <asm/page.h> -#include <asm/dma.h> #include <asm/vhpt.h> +#include <asm/vcpu.h> + +/* Defined in tlb.c */ +extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); extern long running_on_sim; DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v =__va(__ia64_per_cpu_var(vhpt_paddr)); - int i; -#if 0 -static int firsttime = 2; - -if (firsttime) firsttime--; -else { -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -} -#endif +static void vhpt_flush(void) +{ + struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); + int i; + + for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) + v->ti_tag = INVALID_TI_TAG; +} + +static void vhpt_erase(void) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; + int i; + for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { v->itir = 0; v->CChain = 0; @@ -47,51 +46,6 @@ printf("vhpt_flush: ******************** // initialize cache too??? } -#ifdef VHPT_GLOBAL -void vhpt_flush_address(unsigned long vadr, unsigned long addr_range) -{ - struct vhpt_lf_entry *vlfe; - - if ((vadr >> 61) == 7) { - // no vhpt for region 7 yet, see vcpu_itc_no_srlz - printf("vhpt_flush_address: region 7, spinning...\n"); - while(1); - } -#if 0 - // this only seems to occur at shutdown, but it does occur - if ((!addr_range) || addr_range & (addr_range - 1)) { - printf("vhpt_flush_address: weird range, spinning...\n"); - while(1); - } -//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range); -#endif - while ((long)addr_range > 0) { - vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr); - // FIXME: for now, just blow it away even if it belongs to - // another domain. Later, use ttag to check for match -//if (!(vlfe->ti_tag & INVALID_TI_TAG)) { -//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr); -//} - vlfe->ti_tag |= INVALID_TI_TAG; - addr_range -= PAGE_SIZE; - vadr += PAGE_SIZE; - } -} - -void vhpt_flush_address_remote(int cpu, - unsigned long vadr, unsigned long addr_range) -{ - while ((long)addr_range > 0) { - /* Get the VHPT entry. */ - unsigned int off = ia64_thash(vadr) - VHPT_ADDR; - volatile struct vhpt_lf_entry *v; - v =__va(per_cpu(vhpt_paddr, cpu) + off); - v->ti_tag = INVALID_TI_TAG; - addr_range -= PAGE_SIZE; - vadr += PAGE_SIZE; - } -} -#endif static void vhpt_map(unsigned long pte) { @@ -147,17 +101,11 @@ void vhpt_multiple_insert(unsigned long void vhpt_init(void) { - unsigned long vhpt_total_size, vhpt_alignment; unsigned long paddr, pte; struct page_info *page; #if !VHPT_ENABLED return; #endif - // allocate a huge chunk of physical memory.... how??? - vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB - vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB - printf("vhpt_init: vhpt size=0x%lx, align=0x%lx\n", - vhpt_total_size, vhpt_alignment); /* This allocation only holds true if vhpt table is unique for * all domains. Or else later new vhpt table should be allocated * from domain heap when each domain is created. Assume xen buddy @@ -167,17 +115,135 @@ void vhpt_init(void) if (!page) panic("vhpt_init: can't allocate VHPT!\n"); paddr = page_to_maddr(page); + if (paddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("vhpt_init: bad VHPT alignment!\n"); __get_cpu_var(vhpt_paddr) = paddr; - __get_cpu_var(vhpt_pend) = paddr + vhpt_total_size - 1; + __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); vhpt_map(pte); ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED); - vhpt_flush(); -} - + vhpt_erase(); +} + + +void vcpu_flush_vtlb_all (void) +{ + struct vcpu *v = current; + + /* First VCPU tlb. */ + vcpu_purge_tr_entry(&PSCBX(v,dtlb)); + vcpu_purge_tr_entry(&PSCBX(v,itlb)); + + /* Then VHPT. */ + vhpt_flush (); + + /* Then mTLB. */ + local_flush_tlb_all (); + + /* We could clear bit in d->domain_dirty_cpumask only if domain d in + not running on this processor. There is currently no easy way to + check this. */ +} + +void domain_flush_vtlb_all (void) +{ + int cpu = smp_processor_id (); + struct vcpu *v; + + for_each_vcpu (current->domain, v) + if (v->processor == cpu) + vcpu_flush_vtlb_all (); + else + smp_call_function_single + (v->processor, + (void(*)(void *))vcpu_flush_vtlb_all, + NULL,1,1); +} + +static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) +{ + void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); + + while ((long)addr_range > 0) { + /* Get the VHPT entry. */ + unsigned int off = ia64_thash(vadr) - VHPT_ADDR; + volatile struct vhpt_lf_entry *v; + v = vhpt_base + off; + v->ti_tag = INVALID_TI_TAG; + addr_range -= PAGE_SIZE; + vadr += PAGE_SIZE; + } +} + +void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) +{ + cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); + ia64_ptcl(vadr, log_range << 2); + ia64_srlz_i(); +} + +void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range) +{ + struct vcpu *v; + +#if 0 + // this only seems to occur at shutdown, but it does occur + if ((!addr_range) || addr_range & (addr_range - 1)) { + printf("vhpt_flush_address: weird range, spinning...\n"); + while(1); + } +#endif + + for_each_vcpu (d, v) { + /* Purge TC entries. + FIXME: clear only if match. */ + vcpu_purge_tr_entry(&PSCBX(v,dtlb)); + vcpu_purge_tr_entry(&PSCBX(v,itlb)); + + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range (v->processor, vadr, addr_range); + } + + /* ptc.ga */ + ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); +} + +static void flush_tlb_vhpt_all (struct domain *d) +{ + /* First VHPT. */ + vhpt_flush (); + + /* Then mTLB. */ + local_flush_tlb_all (); +} + +void domain_flush_destroy (struct domain *d) +{ + /* Very heavy... */ + on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + cpus_clear (d->domain_dirty_cpumask); +} + +void flush_tlb_mask(cpumask_t mask) +{ + int cpu; + + cpu = smp_processor_id(); + if (cpu_isset (cpu, mask)) { + cpu_clear(cpu, mask); + flush_tlb_vhpt_all (NULL); + } + + if (cpus_empty(mask)) + return; + + for_each_cpu_mask (cpu, mask) + smp_call_function_single + (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); +} void zero_vhpt_stats(void) { diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenasm.S --- a/xen/arch/ia64/xen/xenasm.S Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/xenasm.S Tue May 30 14:30:34 2006 -0500 @@ -11,242 +11,160 @@ #include <asm/pgtable.h> #include <asm/vhpt.h> -#if 0 -// FIXME: there's gotta be a better way... -// ski and spaski are different... moved to xenmisc.c -#define RunningOnHpSki(rx,ry,pn) \ - addl rx = 2, r0; \ - addl ry = 3, r0; \ - ;; \ - mov rx = cpuid[rx]; \ - mov ry = cpuid[ry]; \ - ;; \ - cmp.eq pn,p0 = 0, rx; \ - ;; \ - (pn) movl rx = 0x7000004 ; \ - ;; \ - (pn) cmp.ge pn,p0 = ry, rx; \ - ;; - -//int platform_is_hp_ski(void) -GLOBAL_ENTRY(platform_is_hp_ski) - mov r8 = 0 - RunningOnHpSki(r3,r9,p8) -(p8) mov r8 = 1 - br.ret.sptk.many b0 -END(platform_is_hp_ski) -#endif - // Change rr7 to the passed value while ensuring // Xen is mapped into the new region. -// in0: new rr7 value -// in1: Xen virtual address of shared info (to be pinned) #define PSR_BITS_TO_CLEAR \ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) + IA64_PSR_DFL | IA64_PSR_DFH | IA64_PSR_IC) // FIXME? Note that this turns off the DB bit (debug) #define PSR_BITS_TO_SET IA64_PSR_BN -//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info, unsigned long p_vhpt, unsigned long v_pal); +//extern void ia64_new_rr7(unsigned long rid, /* in0 */ +// void *shared_info, /* in1 */ +// void *shared_arch_info, /* in2 */ +// unsigned long shared_info_va, /* in3 */ +// unsigned long p_vhpt) /* in4 */ +//Local usage: +// loc0=rp, loc1=ar.pfs, loc2=percpu_paddr, loc3=psr, loc4=ar.rse +// loc5=pal_vaddr, loc6=xen_paddr, loc7=shared_archinfo_paddr, GLOBAL_ENTRY(ia64_new_rr7) // not sure this unwind statement is correct... .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1) - alloc loc1 = ar.pfs, 5, 9, 0, 0 + alloc loc1 = ar.pfs, 5, 8, 0, 0 + movl loc2=PERCPU_ADDR 1: { - mov r28 = in0 // copy procedure index + mov loc3 = psr // save psr + mov loc0 = rp // save rp mov r8 = ip // save ip to compute branch - mov loc0 = rp // save rp };; .body - movl loc2=PERCPU_ADDR - ;; tpa loc2=loc2 // grab this BEFORE changing rr7 - ;; - dep loc8=0,in4,60,4 - ;; -#if VHPT_ENABLED - mov loc6=in3 - ;; - //tpa loc6=loc6 // grab this BEFORE changing rr7 - ;; -#endif - mov loc5=in1 - ;; - tpa loc5=loc5 // grab this BEFORE changing rr7 - ;; - mov loc7=in2 // arch_vcpu_info_t - ;; - tpa loc7=loc7 // grab this BEFORE changing rr7 - ;; - mov loc3 = psr // save psr - adds r8 = 1f-1b,r8 // calculate return address for call - ;; + tpa in1=in1 // grab shared_info BEFORE changing rr7 + adds r8 = 1f-1b,r8 // calculate return address for call + ;; + tpa loc7=in2 // grab arch_vcpu_info BEFORE chg rr7 + movl r17=PSR_BITS_TO_SET + mov loc4=ar.rsc // save RSE configuration + movl r16=PSR_BITS_TO_CLEAR + ;; tpa r8=r8 // convert rp to physical - ;; - mov loc4=ar.rsc // save RSE configuration - ;; mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PSR_BITS_TO_CLEAR - movl r17=PSR_BITS_TO_SET - ;; or loc3=loc3,r17 // add in psr the bits to set ;; andcm r16=loc3,r16 // removes bits to clear from psr + dep loc6=0,r8,0,KERNEL_TR_PAGE_SHIFT // Xen code paddr br.call.sptk.many rp=ia64_switch_mode_phys 1: // now in physical mode with psr.i/ic off so do rr7 switch - dep r16=-1,r0,61,3 - ;; + movl r16=pal_vaddr // Note: belong to region 7! + ;; mov rr[r16]=in0 + ;; srlz.d - ;; + dep r16=0,r16,60,4 // Get physical address. + ;; + ld8 loc5=[r16] // read pal_vaddr + movl r26=PAGE_KERNEL + ;; // re-pin mappings for kernel text and data - mov r18=KERNEL_TR_PAGE_SHIFT<<2 + mov r24=KERNEL_TR_PAGE_SHIFT<<2 movl r17=KERNEL_START ;; - rsm psr.i | psr.ic - ;; - srlz.i - ;; - ptr.i r17,r18 - ptr.d r17,r18 - ;; - mov cr.itir=r18 + ptr.i r17,r24 + ptr.d r17,r24 + mov r16=IA64_TR_KERNEL + mov cr.itir=r24 mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - //mov r3=ip - movl r18=PAGE_KERNEL - ;; - dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT - ;; - or r18=r2,r18 - ;; - srlz.i + or r18=loc6,r26 ;; itr.i itr[r16]=r18 - ;; + ;; itr.d dtr[r16]=r18 - ;; - - // re-pin mappings for stack (current), per-cpu, vhpt, and shared info + + // re-pin mappings for stack (current) // unless overlaps with KERNEL_TR dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT ;; cmp.eq p7,p0=r17,r18 (p7) br.cond.sptk .stack_overlaps - ;; - movl r25=PAGE_KERNEL + mov r25=IA64_GRANULE_SHIFT<<2 dep r21=0,r13,60,4 // physical address of "current" ;; - or r23=r25,r21 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; ptr.d r13,r25 - ;; + or r23=r21,r26 // construct PA | page properties mov cr.itir=r25 mov cr.ifa=r13 // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK + mov r21=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r21]=r23 // wire in new mapping... + + // Per-cpu +.stack_overlaps: + mov r24=PERCPU_PAGE_SHIFT<<2 + movl r22=PERCPU_ADDR + ;; + ptr.d r22,r24 + or r23=loc2,r26 // construct PA | page properties + mov cr.itir=r24 + mov cr.ifa=r22 + mov r25=IA64_TR_PERCPU_DATA ;; itr.d dtr[r25]=r23 // wire in new mapping... - ;; -.stack_overlaps: - - movl r22=PERCPU_ADDR - ;; - movl r25=PAGE_KERNEL - ;; - mov r21=loc2 // saved percpu physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PERCPU_PAGE_SHIFT<<2 + + // VHPT +#if VHPT_ENABLED + mov r24=VHPT_SIZE_LOG2<<2 + movl r22=VHPT_ADDR + mov r21=IA64_TR_VHPT ;; ptr.d r22,r24 - ;; + or r23=in4,r26 // construct PA | page properties mov cr.itir=r24 mov cr.ifa=r22 ;; - mov r25=IA64_TR_PERCPU_DATA - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - -#if VHPT_ENABLED - movl r22=VHPT_ADDR - ;; - movl r25=PAGE_KERNEL - ;; - mov r21=loc6 // saved vhpt physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=VHPT_SIZE_LOG2<<2 + itr.d dtr[r21]=r23 // wire in new mapping... +#endif + + // Shared info + mov r24=PAGE_SHIFT<<2 + movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) + ;; + ptr.d in3,r24 + or r23=in1,r25 // construct PA | page properties + mov cr.itir=r24 + mov cr.ifa=in3 + mov r21=IA64_TR_SHARED_INFO + ;; + itr.d dtr[r21]=r23 // wire in new mapping... + + // Map for arch_vcpu_info_t + movl r22=XSI_OFS + mov r24=PAGE_SHIFT<<2 + ;; + add r22=r22,in3 ;; ptr.d r22,r24 - ;; + or r23=loc7,r25 // construct PA | page properties mov cr.itir=r24 mov cr.ifa=r22 - ;; - mov r25=IA64_TR_VHPT - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; -#endif - - movl r22=SHAREDINFO_ADDR - ;; - movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) - ;; - mov r21=loc5 // saved sharedinfo physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_SHARED_INFO - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - // Map for arch_vcpu_info_t - movl r22=SHARED_ARCHINFO_ADDR - ;; - movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) - ;; - mov r21=loc7 // saved sharedinfo physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_ARCH_INFO - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - - //Purge/insert PAL TR + mov r21=IA64_TR_ARCH_INFO + ;; + itr.d dtr[r21]=r23 // wire in new mapping... + + // Purge/insert PAL TR mov r24=IA64_TR_PALCODE - movl r25=PAGE_KERNEL - ;; - or loc8=r25,loc8 mov r23=IA64_GRANULE_SHIFT<<2 - ;; - ptr.i in4,r23 - ;; + dep r25=0,loc5,60,4 // convert pal vaddr to paddr + ;; + ptr.i loc5,r23 + or r25=r25,r26 // construct PA | page properties mov cr.itir=r23 - mov cr.ifa=in4 - ;; - itr.i itr[r24]=loc8 - ;; + mov cr.ifa=loc5 + ;; + itr.i itr[r24]=r25 // done, switch back to virtual and return mov r16=loc3 // r16= original psr @@ -261,6 +179,7 @@ 1: br.ret.sptk.many rp END(ia64_new_rr7) +#if 0 /* Not used */ #include "minstate.h" GLOBAL_ENTRY(ia64_prepare_handle_privop) @@ -301,6 +220,7 @@ GLOBAL_ENTRY(ia64_prepare_handle_reflect DO_LOAD_SWITCH_STACK br.cond.sptk.many rp // goes to ia64_leave_kernel END(ia64_prepare_handle_reflection) +#endif GLOBAL_ENTRY(__get_domain_bundle) EX(.failure_in_get_bundle,ld8 r8=[r32],8) @@ -331,80 +251,9 @@ GLOBAL_ENTRY(dorfirfi) mov cr.ipsr=r17 mov cr.ifs=r18 ;; - // fall through + rfi + ;; END(dorfirfi) - -GLOBAL_ENTRY(dorfi) - rfi - ;; -END(dorfirfi) - -// -// Long's Peak UART Offsets -// -#define COM_TOP 0xff5e0000 -#define COM_BOT 0xff5e2000 - -// UART offsets -#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */ -#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */ -#define UART_INT_ID 2 /* Interrupt ID register */ -#define UART_LINE_CTL 3 /* Line control register */ -#define UART_MODEM_CTL 4 /* Modem Control Register */ -#define UART_LSR 5 /* In: Line Status Register */ -#define UART_MSR 6 /* Modem status register */ -#define UART_DLATCH_LOW UART_TX -#define UART_DLATCH_HIGH UART_INT_ENB -#define COM1 0x3f8 -#define COM2 0x2F8 -#define COM3 0x3E8 - -/* interrupt enable bits (offset 1) */ -#define DATA_AVAIL_INT 1 -#define XMIT_HOLD_EMPTY_INT 2 -#define LINE_STAT_INT 4 -#define MODEM_STAT_INT 8 - -/* line status bits (offset 5) */ -#define REC_DATA_READY 1 -#define OVERRUN 2 -#define PARITY_ERROR 4 -#define FRAMING_ERROR 8 -#define BREAK_INTERRUPT 0x10 -#define XMIT_HOLD_EMPTY 0x20 -#define XMIT_SHIFT_EMPTY 0x40 - -// Write a single character -// input: r32 = character to be written -// output: none -GLOBAL_ENTRY(longs_peak_putc) - rsm psr.dt - movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR - ;; - srlz.i - ;; - -.Chk_THRE_p: - ld1.acq r18=[r16] - ;; - - and r18 = XMIT_HOLD_EMPTY, r18 - ;; - cmp4.eq p6,p0=0,r18 - ;; - -(p6) br .Chk_THRE_p - ;; - movl r16 = 0x8000000000000000 + COM_TOP + UART_TX - ;; - st1.rel [r16]=r32 - ;; - ssm psr.dt - ;; - srlz.i - ;; - br.ret.sptk.many b0 -END(longs_peak_putc) /* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */ GLOBAL_ENTRY(pal_emulator_static) diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/xenmisc.c Tue May 30 14:30:34 2006 -0500 @@ -267,6 +267,9 @@ void context_switch(struct vcpu *prev, s vmx_load_state(next); /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/ prev = ia64_switch_to(next); + + //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask); + if (!VMX_DOMAIN(current)){ vcpu_set_next_timer(current); } diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/ia64/xen/xensetup.c Tue May 30 14:30:34 2006 -0500 @@ -415,8 +415,7 @@ printk("About to call domain_create()\n" printk("About to call construct_dom0()\n"); dom0_memory_start = (unsigned long) __va(initial_images_start); dom0_memory_size = ia64_boot_param->domain_size; - dom0_initrd_start = (unsigned long) __va(initial_images_start + - PAGE_ALIGN(ia64_boot_param->domain_size)); + dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start); dom0_initrd_size = ia64_boot_param->initrd_size; if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_size, diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/dom0_ops.c Tue May 30 14:30:34 2006 -0500 @@ -404,27 +404,6 @@ long arch_do_dom0_op(struct dom0_op *op, } break; - case DOM0_PHYSICAL_MEMORY_MAP: - { - struct dom0_memory_map_entry entry; - int i; - - for ( i = 0; i < e820.nr_map; i++ ) - { - if ( i >= op->u.physical_memory_map.max_map_entries ) - break; - entry.start = e820.map[i].addr; - entry.end = e820.map[i].addr + e820.map[i].size; - entry.is_ram = (e820.map[i].type == E820_RAM); - (void)copy_to_guest_offset( - op->u.physical_memory_map.memory_map, i, &entry, 1); - } - - op->u.physical_memory_map.nr_map_entries = i; - (void)copy_to_guest(u_dom0_op, op, 1); - } - break; - case DOM0_HYPERCALL_INIT: { struct domain *d; diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/domain.c Tue May 30 14:30:34 2006 -0500 @@ -146,6 +146,8 @@ struct vcpu *alloc_vcpu_struct(struct do v->arch.guest_vl4table = __linear_l4_table; #endif + pae_l3_cache_init(&v->arch.pae_l3_cache); + return v; } diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/domain_build.c Tue May 30 14:30:34 2006 -0500 @@ -367,7 +367,10 @@ int construct_dom0(struct domain *d, if ( (1UL << order) > nr_pages ) panic("Domain 0 allocation is too small for kernel image.\n"); - /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */ + /* + * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1 + * mapping covers the allocation. + */ if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL ) panic("Not enough RAM for domain 0 allocation.\n"); alloc_spfn = page_to_mfn(page); diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/hvm.c Tue May 30 14:30:34 2006 -0500 @@ -185,11 +185,16 @@ void hvm_setup_platform(struct domain* d void hvm_setup_platform(struct domain* d) { struct hvm_domain *platform; - - if ( !hvm_guest(current) || (current->vcpu_id != 0) ) + struct vcpu *v=current; + + if ( !hvm_guest(v) || (v->vcpu_id != 0) ) return; - shadow_direct_map_init(d); + if ( shadow_direct_map_init(d) == 0 ) + { + printk("Can not allocate shadow direct map for HVM domain.\n"); + domain_crash_synchronous(); + } hvm_map_io_shared_page(d); hvm_get_info(d); @@ -204,7 +209,8 @@ void hvm_setup_platform(struct domain* d hvm_vioapic_init(d); } - pit_init(&platform->vpit, current); + init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor); + pit_init(v, cpu_khz); } void pic_irq_request(void *data, int level) @@ -234,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v) } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs ); do_pic_irqs(pic, irqs); } +} + +u64 hvm_get_guest_time(struct vcpu *v) +{ + u64 host_tsc; + + rdtscll(host_tsc); + return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset; } int cpu_get_interrupt(struct vcpu *v, int *type) diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/i8254.c Tue May 30 14:30:34 2006 -0500 @@ -22,11 +22,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -/* Edwin Zhai <edwin.zhai@xxxxxxxxx> +/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx> * Ported to xen: - * use actimer for intr generation; + * Add a new layer of periodic time on top of PIT; * move speaker io access to hypervisor; - * use new method for counter/intrs calculation */ #include <xen/config.h> @@ -42,184 +41,117 @@ #include <asm/hvm/vpit.h> #include <asm/current.h> -/*#define DEBUG_PIT*/ +/* Enable DEBUG_PIT may cause guest calibration inaccuracy */ +/* #define DEBUG_PIT */ #define RW_STATE_LSB 1 #define RW_STATE_MSB 2 #define RW_STATE_WORD0 3 #define RW_STATE_WORD1 4 -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC (1000000000ULL) -#endif - -#ifndef TIMER_SLOP -#define TIMER_SLOP (50*1000) /* ns */ -#endif - -static void pit_irq_timer_update(PITChannelState *s, s64 current_time); - -s_time_t hvm_get_clock(void) -{ - /* TODO: add pause/unpause support */ - return NOW(); +#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency) +static int handle_pit_io(ioreq_t *p); +static int handle_speaker_io(ioreq_t *p); + +/* compute with 96 bit intermediate result: (a*b)/c */ +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + union { + uint64_t ll; + struct { +#ifdef WORDS_BIGENDIAN + uint32_t high, low; +#else + uint32_t low, high; +#endif + } l; + } u, res; + uint64_t rl, rh; + + u.ll = a; + rl = (uint64_t)u.l.low * (uint64_t)b; + rh = (uint64_t)u.l.high * (uint64_t)b; + rh += (rl >> 32); + res.l.high = rh / c; + res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; + return res.ll; +} + +/* + * get processor time. + * unit: TSC + */ +int64_t hvm_get_clock(struct vcpu *v) +{ + uint64_t gtsc; + gtsc = hvm_get_guest_time(v); + return gtsc; } static int pit_get_count(PITChannelState *s) { - u64 d; - u64 counter; - - d = hvm_get_clock() - s->count_load_time; + uint64_t d; + int counter; + + d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); switch(s->mode) { case 0: case 1: case 4: case 5: - counter = (s->period - d) & 0xffff; + counter = (s->count - d) & 0xffff; break; case 3: /* XXX: may be incorrect for odd counts */ - counter = s->period - ((2 * d) % s->period); + counter = s->count - ((2 * d) % s->count); break; default: - /* mod 2 counter handle */ - d = hvm_get_clock() - s->hvm_time->count_point; - d += s->hvm_time->count_advance; - counter = s->period - (d % s->period); - break; - } - /* change from ns to pit counter */ - counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC); + counter = s->count - (d % s->count); + break; + } return counter; } /* get pit output bit */ -static int pit_get_out1(PITChannelState *s, s64 current_time) -{ - u64 d; +static int pit_get_out1(PITChannelState *s, int64_t current_time) +{ + uint64_t d; int out; - d = current_time - s->count_load_time; + d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); switch(s->mode) { default: case 0: - out = (d >= s->period); + out = (d >= s->count); break; case 1: - out = (d < s->period); + out = (d < s->count); break; case 2: - /* mod2 out is no meaning, since intr are generated in background */ - if ((d % s->period) == 0 && d != 0) + if ((d % s->count) == 0 && d != 0) out = 1; else out = 0; break; case 3: - out = (d % s->period) < ((s->period + 1) >> 1); + out = (d % s->count) < ((s->count + 1) >> 1); break; case 4: case 5: - out = (d == s->period); + out = (d == s->count); break; } return out; } -int pit_get_out(hvm_virpit *pit, int channel, s64 current_time) +int pit_get_out(PITState *pit, int channel, int64_t current_time) { PITChannelState *s = &pit->channels[channel]; return pit_get_out1(s, current_time); } -static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time) -{ - struct hvm_time_info *hvm_time = s->hvm_time; - struct domain *d = (void *) s - - offsetof(struct domain, arch.hvm_domain.vpit.channels[0]); - - /* ticks from current time(expected time) to NOW */ - int missed_ticks; - /* current_time is expected time for next intr, check if it's true - * (actimer has a TIMER_SLOP in advance) - */ - s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time; - - if (missed_time >= 0) { - missed_ticks = missed_time/(s_time_t)s->period + 1; - if (test_bit(_DOMF_debugging, &d->domain_flags)) { - hvm_time->pending_intr_nr++; - } else { - hvm_time->pending_intr_nr += missed_ticks; - } - s->next_transition_time = current_time + (missed_ticks ) * s->period; - } - - return s->next_transition_time; -} - -/* only rearm the actimer when return value > 0 - * -2: init state - * -1: the mode has expired - * 0: current VCPU is not running - * >0: the next fired time - */ -s64 pit_get_next_transition_time(PITChannelState *s, - s64 current_time) -{ - s64 d, next_time, base; - int period2; - struct hvm_time_info *hvm_time = s->hvm_time; - - d = current_time - s->count_load_time; - switch(s->mode) { - default: - case 0: - case 1: - if (d < s->period) - next_time = s->period; - else - return -1; - break; - case 2: - next_time = missed_ticks(s, current_time); - if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) ) - return 0; - break; - case 3: - base = (d / s->period) * s->period; - period2 = ((s->period + 1) >> 1); - if ((d - base) < period2) - next_time = base + period2; - else - next_time = base + s->period; - break; - case 4: - case 5: - if (d < s->period) - next_time = s->period; - else if (d == s->period) - next_time = s->period + 1; - else - return -1; - break; - case 0xff: - return -2; /* for init state */ - break; - } - /* XXX: better solution: use a clock at PIT_FREQ Hz */ - if (next_time <= current_time){ -#ifdef DEBUG_PIT - printk("HVM_PIT:next_time <= current_time. next=0x%llx, current=0x%llx!\n",next_time, current_time); -#endif - next_time = current_time + 1; - } - return next_time; -} - /* val must be 0 or 1 */ -void pit_set_gate(hvm_virpit *pit, int channel, int val) +void pit_set_gate(PITState *pit, int channel, int val) { PITChannelState *s = &pit->channels[channel]; @@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c case 5: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(); - pit_irq_timer_update(s, s->count_load_time); + s->count_load_time = hvm_get_clock(s->vcpu); +// pit_irq_timer_update(s, s->count_load_time); } break; case 2: case 3: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(); - pit_irq_timer_update(s, s->count_load_time); + s->count_load_time = hvm_get_clock(s->vcpu); +// pit_irq_timer_update(s, s->count_load_time); } /* XXX: disable/enable counting */ break; @@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c s->gate = val; } -int pit_get_gate(hvm_virpit *pit, int channel) +int pit_get_gate(PITState *pit, int channel) { PITChannelState *s = &pit->channels[channel]; return s->gate; @@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch static inline void pit_load_count(PITChannelState *s, int val) { + u32 period; if (val == 0) val = 0x10000; - - s->count_load_time = hvm_get_clock(); + s->count_load_time = hvm_get_clock(s->vcpu); s->count = val; - s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ); + period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ); #ifdef DEBUG_PIT - printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, load_time=%lld\n", + printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, load_time=%lld\n", + s, val, - s->period / 1000, + period, s->mode, - s->count_load_time); + (long long)s->count_load_time); #endif - if (s->mode == HVM_PIT_ACCEL_MODE) { - if (!s->hvm_time) { - printk("HVM_PIT:guest should only set mod 2 on channel 0!\n"); - return; - } - s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L; - s->hvm_time->first_injected = 0; - - if (s->period < 900000) { /* < 0.9 ms */ - printk("HVM_PIT: guest programmed too small an count: %x\n", - s->count); - s->period = 1000000; - } - } - - pit_irq_timer_update(s, s->count_load_time); + switch (s->mode) { + case 2: + /* create periodic time */ + s->pt = create_periodic_time (s->vcpu, period, 0, 0); + break; + case 1: + /* create one shot time */ + s->pt = create_periodic_time (s->vcpu, period, 0, 1); +#ifdef DEBUG_PIT + printk("HVM_PIT: create one shot time.\n"); +#endif + break; + default: + break; + } } /* if already latched, do not latch again */ @@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt } } -static void pit_ioport_write(void *opaque, u32 addr, u32 val) -{ - hvm_virpit *pit = opaque; +static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + PITState *pit = opaque; int channel, access; PITChannelState *s; val &= 0xff; @@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu if (!(val & 0x10) && !s->status_latched) { /* status latch */ /* XXX: add BCD and null count */ - s->status = (pit_get_out1(s, hvm_get_clock()) << 7) | + s->status = (pit_get_out1(s, hvm_get_clock(s->vcpu)) << 7) | (s->rw_mode << 4) | (s->mode << 1) | s->bcd; @@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu } } -static u32 pit_ioport_read(void *opaque, u32 addr) -{ - hvm_virpit *pit = opaque; +static uint32_t pit_ioport_read(void *opaque, uint32_t addr) +{ + PITState *pit = opaque; int ret, count; PITChannelState *s; @@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque, return ret; } -static void pit_irq_timer_update(PITChannelState *s, s64 current_time) -{ - s64 expire_time; - int irq_level; - struct vcpu *v = current; - struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic; - - if (!s->hvm_time || s->mode == 0xff) - return; - - expire_time = pit_get_next_transition_time(s, current_time); - /* not generate intr by direct pic_set_irq in mod 2 - * XXX:mod 3 should be same as mod 2 - */ - if (s->mode != HVM_PIT_ACCEL_MODE) { - irq_level = pit_get_out1(s, current_time); - pic_set_irq(pic, s->irq, irq_level); - s->next_transition_time = expire_time; -#ifdef DEBUG_PIT - printk("HVM_PIT:irq_level=%d next_delay=%l ns\n", - irq_level, - (expire_time - current_time)); -#endif - } - - if (expire_time > 0) - set_timer(&(s->hvm_time->pit_timer), s->next_transition_time); - -} - -static void pit_irq_timer(void *data) -{ - PITChannelState *s = data; - - pit_irq_timer_update(s, s->next_transition_time); -} - static void pit_reset(void *opaque) { - hvm_virpit *pit = opaque; + PITState *pit = opaque; PITChannelState *s; int i; for(i = 0;i < 3; i++) { s = &pit->channels[i]; + if ( s -> pt ) { + destroy_periodic_time (s->pt); + s->pt = NULL; + } s->mode = 0xff; /* the init mode */ s->gate = (i != 2); pit_load_count(s, 0); } } -/* hvm_io_assist light-weight version, specific to PIT DM */ -static void resume_pit_io(ioreq_t *p) -{ - struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long old_eax = regs->eax; - p->state = STATE_INVALID; - - switch(p->size) { - case 1: - regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); - break; - case 2: - regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); - break; - case 4: - regs->eax = (p->u.data & 0xffffffff); - break; - default: - BUG(); - } +void pit_init(struct vcpu *v, unsigned long cpu_khz) +{ + PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + + s = &pit->channels[0]; + /* the timer 0 is connected to an IRQ */ + s->vcpu = v; + s++; s->vcpu = v; + s++; s->vcpu = v; + + register_portio_handler(PIT_BASE, 4, handle_pit_io); + /* register the speaker port */ + register_portio_handler(0x61, 1, handle_speaker_io); + ticks_per_sec(v) = cpu_khz * (int64_t)1000; +#ifdef DEBUG_PIT + printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v)); +#endif + pit_reset(pit); + return; } /* the intercept action for PIT DM retval:0--not handled; 1--handled */ -int handle_pit_io(ioreq_t *p) +static int handle_pit_io(ioreq_t *p) { struct vcpu *v = current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit); if (p->size != 1 || p->pdata_valid || @@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p) if (p->dir == 0) {/* write */ pit_ioport_write(vpit, p->addr, p->u.data); } else if (p->dir == 1) { /* read */ - p->u.data = pit_ioport_read(vpit, p->addr); - resume_pit_io(p); - } - - /* always return 1, since PIT sit in HV now */ + if ( (p->addr & 3) != 3 ) { + p->u.data = pit_ioport_read(vpit, p->addr); + } else { + printk("HVM_PIT: read A1:A0=3!\n"); + } + } return 1; } static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val) { - hvm_virpit *pit = opaque; - val &= 0xff; + PITState *pit = opaque; pit->speaker_data_on = (val >> 1) & 1; pit_set_gate(pit, 2, val & 1); } @@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void static uint32_t speaker_ioport_read(void *opaque, uint32_t addr) { int out; - hvm_virpit *pit = opaque; - out = pit_get_out(pit, 2, hvm_get_clock()); + PITState *pit = opaque; + out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu)); pit->dummy_refresh_clock ^= 1; return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) | (pit->dummy_refresh_clock << 4); } -int handle_speaker_io(ioreq_t *p) +static int handle_speaker_io(ioreq_t *p) { struct vcpu *v = current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit); if (p->size != 1 || p->pdata_valid || @@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p) speaker_ioport_write(vpit, p->addr, p->u.data); } else if (p->dir == 1) {/* read */ p->u.data = speaker_ioport_read(vpit, p->addr); - resume_pit_io(p); } return 1; } - -/* pick up missed timer ticks at deactive time */ -void pickup_deactive_ticks(struct hvm_virpit *vpit) -{ - s64 next_time; - PITChannelState *s = &(vpit->channels[0]); - if ( !active_timer(&(vpit->time_info.pit_timer)) ) { - next_time = pit_get_next_transition_time(s, s->next_transition_time); - if (next_time >= 0) - set_timer(&(s->hvm_time->pit_timer), s->next_transition_time); - } -} - -void pit_init(struct hvm_virpit *pit, struct vcpu *v) -{ - PITChannelState *s; - struct hvm_time_info *hvm_time; - - s = &pit->channels[0]; - /* the timer 0 is connected to an IRQ */ - s->irq = 0; - /* channel 0 need access the related time info for intr injection */ - hvm_time = s->hvm_time = &pit->time_info; - hvm_time->vcpu = v; - - init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor); - - register_portio_handler(PIT_BASE, 4, handle_pit_io); - - /* register the speaker port */ - register_portio_handler(0x61, 1, handle_speaker_io); - - pit_reset(pit); - - return; - -} diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/intercept.c Tue May 30 14:30:34 2006 -0500 @@ -214,6 +214,88 @@ void hlt_timer_fn(void *data) evtchn_set_pending(v, iopacket_port(v)); } +static __inline__ void missed_ticks(struct periodic_time *pt) +{ + int missed_ticks; + + missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period; + if ( missed_ticks++ >= 0 ) { + if ( missed_ticks > 1000 ) { + /* TODO: Adjust guest time togther */ + pt->pending_intr_nr ++; + } + else { + pt->pending_intr_nr += missed_ticks; + } + pt->scheduled += missed_ticks * pt->period; + } +} + +/* hook function for the platform periodic time */ +void pt_timer_fn(void *data) +{ + struct vcpu *v = data; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + /* pick up missed timer tick */ + missed_ticks(pt); + if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) { + set_timer(&pt->timer, pt->scheduled); + } +} + +/* pick up missed timer ticks at deactive time */ +void pickup_deactive_ticks(struct periodic_time *pt) +{ + if ( !active_timer(&(pt->timer)) ) { + missed_ticks(pt); + set_timer(&pt->timer, pt->scheduled); + } +} + +/* + * period: fire frequency in ns. + */ +struct periodic_time * create_periodic_time( + struct vcpu *v, + u32 period, + char irq, + char one_shot) +{ + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + if ( pt->enabled ) { + if ( v->vcpu_id != 0 ) { + printk("HVM_PIT: start 2nd periodic time on non BSP!\n"); + } + stop_timer (&pt->timer); + pt->enabled = 0; + } + pt->pending_intr_nr = 0; + pt->first_injected = 0; + if (period < 900000) { /* < 0.9 ms */ + printk("HVM_PlatformTime: program too small period %u\n",period); + period = 900000; /* force to 0.9ms */ + } + pt->period = period; + pt->irq = irq; + pt->period_cycles = (u64)period * cpu_khz / 1000000L; + pt->one_shot = one_shot; + if ( one_shot ) { + printk("HVM_PL: No support for one shot platform time yet\n"); + } + pt->scheduled = NOW() + period; + set_timer (&pt->timer,pt->scheduled); + pt->enabled = 1; + return pt; +} + +void destroy_periodic_time(struct periodic_time *pt) +{ + if ( pt->enabled ) { + stop_timer(&pt->timer); + pt->enabled = 0; + } +} /* * Local variables: diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/svm/intr.c Tue May 30 14:30:34 2006 -0500 @@ -44,45 +44,33 @@ */ #define BSP_CPU(v) (!(v->vcpu_id)) -u64 svm_get_guest_time(struct vcpu *v) -{ - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - u64 host_tsc; - - rdtscll(host_tsc); - return host_tsc + time_info->cache_tsc_offset; -} - void svm_set_guest_time(struct vcpu *v, u64 gtime) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); u64 host_tsc; rdtscll(host_tsc); - time_info->cache_tsc_offset = gtime - host_tsc; - v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset; + v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc; + v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset; } static inline void interrupt_post_injection(struct vcpu * v, int vector, int type) { - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); if ( is_pit_irq(v, vector, type) ) { - if ( !time_info->first_injected ) { - time_info->pending_intr_nr = 0; - time_info->last_pit_gtime = svm_get_guest_time(v); - time_info->first_injected = 1; + if ( !pt->first_injected ) { + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(v); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); + pt->first_injected = 1; } else { - time_info->pending_intr_nr--; + pt->pending_intr_nr--; + pt->last_plt_gtime += pt->period_cycles; + svm_set_guest_time(v, pt->last_plt_gtime); } - time_info->count_advance = 0; - time_info->count_point = NOW(); - - time_info->last_pit_gtime += time_info->period_cycles; - svm_set_guest_time(v, time_info->last_pit_gtime); } switch(type) @@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void) struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct hvm_virpit *vpit = &plat->vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; int intr_type = VLAPIC_DELIV_MODE_EXT; int intr_vector = -1; @@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void) if ( cpu_has_pending_irq(v) ) { intr_vector = cpu_get_interrupt(v, &intr_type); } - else if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) { - pic_set_irq(pic, 0, 0); - pic_set_irq(pic, 0, 1); + else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); intr_vector = cpu_get_interrupt(v, &intr_type); } } @@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void) /* Re-injecting a PIT interruptt? */ if (re_injecting && is_pit_irq(v, intr_vector, intr_type)) { - ++time_info->pending_intr_nr; + ++pt->pending_intr_nr; } /* let's inject this interrupt */ TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0); diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Tue May 30 14:30:34 2006 -0500 @@ -51,13 +51,6 @@ #define SVM_EXTRA_DEBUG -#ifdef TRACE_BUFFER -static unsigned long trace_values[NR_CPUS][4]; -#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value -#else -#define TRACE_VMEXIT(index,value) ((void)0) -#endif - /* Useful define */ #define MAX_INST_SIZE 15 @@ -458,6 +451,9 @@ int start_svm(void) if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability))) return 0; + svm_globals[cpu].hsa = alloc_host_save_area(); + if (! svm_globals[cpu].hsa) + return 0; rdmsr(MSR_EFER, eax, edx); eax |= EFER_SVME; @@ -466,7 +462,6 @@ int start_svm(void) printk("AMD SVM Extension is enabled for cpu %d.\n", cpu ); /* Initialize the HSA for this core */ - svm_globals[cpu].hsa = alloc_host_save_area(); phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa ); phys_hsa_lo = (u32) phys_hsa; phys_hsa_hi = (u32) (phys_hsa >> 32); @@ -670,12 +665,11 @@ static void arch_svm_do_launch(struct vc static void svm_freeze_time(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) { - v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); - time_info->count_advance += (NOW() - time_info->count_point); - stop_timer(&(time_info->pit_timer)); + if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) { + v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + stop_timer(&(pt->timer)); } } @@ -752,7 +746,7 @@ static void svm_relinquish_guest_resourc } } - kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer); + kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -782,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v) void svm_migrate_timers(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; - - migrate_timer(&time_info->pit_timer, v->processor); - migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + if ( pt->enabled ) { + migrate_timer( &pt->timer, v->processor ); + migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor ); + } if ( hvm_apic_support(v->domain) && VLAPIC( v )) migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor ); } @@ -814,7 +810,6 @@ static int svm_do_page_fault(unsigned lo return 1; handle_mmio(va, va); - TRACE_VMEXIT(2,2); return 1; } @@ -840,7 +835,6 @@ static int svm_do_page_fault(unsigned lo return 1; } - TRACE_VMEXIT (2,2); handle_mmio(va, gpa); return 1; @@ -852,8 +846,6 @@ static int svm_do_page_fault(unsigned lo /* Let's make sure that the Guest TLB is flushed */ set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); } - - TRACE_VMEXIT (2,result); return result; } @@ -1899,14 +1891,8 @@ static inline void svm_do_msr_access(str regs->edx = 0; switch (regs->ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct hvm_time_info *time_info; - - rdtscll(msr_content); - time_info = &v->domain->arch.hvm_domain.vpit.time_info; - msr_content += time_info->cache_tsc_offset; + msr_content = hvm_get_guest_time(v); break; - } case MSR_IA32_SYSENTER_CS: msr_content = vmcb->sysenter_cs; break; @@ -1973,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb) { struct vcpu *v = current; - struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; s_time_t next_pit = -1, next_wakeup; __update_guest_eip(vmcb, 1); @@ -1983,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str return; if ( !v->vcpu_id ) - next_pit = get_pit_scheduled(v, vpit); + next_pit = get_scheduled(v, pt->irq, pt); next_wakeup = get_apictime_scheduled(v); if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) next_wakeup = next_pit; diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue May 30 14:30:34 2006 -0500 @@ -139,17 +139,20 @@ static int construct_vmcb_controls(struc /* The following is for I/O and MSR permision map */ iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE)); - - ASSERT(iopm); - memset(iopm, 0xff, IOPM_SIZE); - clear_bit(PC_DEBUG_PORT, iopm); + if (iopm) + { + memset(iopm, 0xff, IOPM_SIZE); + clear_bit(PC_DEBUG_PORT, iopm); + } msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE)); - - ASSERT(msrpm); - memset(msrpm, 0xff, MSRPM_SIZE); + if (msrpm) + memset(msrpm, 0xff, MSRPM_SIZE); arch_svm->iopm = iopm; arch_svm->msrpm = msrpm; + + if (! iopm || ! msrpm) + return 1; vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm); vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm); @@ -439,19 +442,17 @@ void svm_do_resume(struct vcpu *v) void svm_do_resume(struct vcpu *v) { struct domain *d = v->domain; - struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm; svm_stts(v); /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( time_info->first_injected ) { - if ( v->domain->arch.hvm_domain.guest_time ) { - svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); - time_info->count_point = NOW(); - v->domain->arch.hvm_domain.guest_time = 0; + if ( pt->enabled && pt->first_injected ) { + if ( v->arch.hvm_vcpu.guest_time ) { + svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time); + v->arch.hvm_vcpu.guest_time = 0; } - pickup_deactive_ticks(vpit); + pickup_deactive_ticks(pt); } if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/io.c Tue May 30 14:30:34 2006 -0500 @@ -49,45 +49,33 @@ void __set_tsc_offset(u64 offset) #endif } -u64 get_guest_time(struct vcpu *v) -{ - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - u64 host_tsc; - - rdtscll(host_tsc); - return host_tsc + time_info->cache_tsc_offset; -} - void set_guest_time(struct vcpu *v, u64 gtime) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); u64 host_tsc; rdtscll(host_tsc); - time_info->cache_tsc_offset = gtime - host_tsc; - __set_tsc_offset(time_info->cache_tsc_offset); + v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc; + __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset); } static inline void interrupt_post_injection(struct vcpu * v, int vector, int type) { - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); if ( is_pit_irq(v, vector, type) ) { - if ( !time_info->first_injected ) { - time_info->pending_intr_nr = 0; - time_info->last_pit_gtime = get_guest_time(v); - time_info->first_injected = 1; + if ( !pt->first_injected ) { + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(v); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); + pt->first_injected = 1; } else { - time_info->pending_intr_nr--; - } - time_info->count_advance = 0; - time_info->count_point = NOW(); - - time_info->last_pit_gtime += time_info->period_cycles; - set_guest_time(v, time_info->last_pit_gtime); + pt->pending_intr_nr--; + pt->last_plt_gtime += pt->period_cycles; + set_guest_time(v, pt->last_plt_gtime); + } } switch(type) @@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void) unsigned long eflags; struct vcpu *v = current; struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct hvm_time_info *time_info = &plat->vpit.time_info; + struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; unsigned int idtv_info_field; unsigned long inst_len; @@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void) if ( v->vcpu_id == 0 ) hvm_pic_assist(v); - if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) { - pic_set_irq(pic, 0, 0); - pic_set_irq(pic, 0, 1); + if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); } has_ext_irq = cpu_has_pending_irq(v); @@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v) void vmx_do_resume(struct vcpu *v) { struct domain *d = v->domain; - struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; vmx_stts(); /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( time_info->first_injected ) { - if ( v->domain->arch.hvm_domain.guest_time ) { - time_info->count_point = NOW(); - set_guest_time(v, v->domain->arch.hvm_domain.guest_time); - v->domain->arch.hvm_domain.guest_time = 0; - } - pickup_deactive_ticks(vpit); + if ( pt->enabled && pt->first_injected ) { + if ( v->arch.hvm_vcpu.guest_time ) { + set_guest_time(v, v->arch.hvm_vcpu.guest_time); + v->arch.hvm_vcpu.guest_time = 0; + } + pickup_deactive_ticks(pt); } if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue May 30 14:30:34 2006 -0500 @@ -47,7 +47,7 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> -static unsigned long trace_values[NR_CPUS][4]; +static unsigned long trace_values[NR_CPUS][5]; #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value static void vmx_ctxt_switch_from(struct vcpu *v); @@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc } } - kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer); + kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write static void vmx_freeze_time(struct vcpu *v) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) { - v->domain->arch.hvm_domain.guest_time = get_guest_time(v); - time_info->count_advance += (NOW() - time_info->count_point); - stop_timer(&(time_info->pit_timer)); + if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) { + v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + stop_timer(&(pt->timer)); } } @@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc void vmx_migrate_timers(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; - - migrate_timer(&time_info->pit_timer, v->processor); - migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + if ( pt->enabled ) { + migrate_timer(&pt->timer, v->processor); + migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor); + } if ( hvm_apic_support(v->domain) && VLAPIC(v)) migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor); } @@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc (unsigned long)regs->edx); switch (regs->ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct hvm_time_info *time_info; - - rdtscll(msr_content); - time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - msr_content += time_info->cache_tsc_offset; - break; - } + msr_content = hvm_get_guest_time(v); + break; case MSR_IA32_SYSENTER_CS: __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content); break; @@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void) void vmx_vmexit_do_hlt(void) { struct vcpu *v=current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); s_time_t next_pit=-1,next_wakeup; if ( !v->vcpu_id ) - next_pit = get_pit_scheduled(v,vpit); + next_pit = get_scheduled(v, pt->irq, pt); next_wakeup = get_apictime_scheduled(v); if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) next_wakeup = next_pit; diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/mm.c Tue May 30 14:30:34 2006 -0500 @@ -260,9 +260,82 @@ void share_xen_page_with_privileged_gues share_xen_page_with_guest(page, dom_xen, readonly); } +#if defined(CONFIG_X86_PAE) + +#ifdef NDEBUG +/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */ +#define l3tab_needs_shadow(mfn) (mfn >= 0x100000) +#else +/* In debug builds we aggressively shadow PDPTs to exercise code paths. */ +#define l3tab_needs_shadow(mfn) ((mfn << PAGE_SHIFT) != __pa(idle_pg_table)) +#endif + +static l1_pgentry_t *fix_pae_highmem_pl1e; + +/* Cache the address of PAE high-memory fixmap page tables. */ +static int __init cache_pae_fixmap_address(void) +{ + unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0); + l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base); + fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base); + return 0; +} +__initcall(cache_pae_fixmap_address); + +static void __write_ptbase(unsigned long mfn) +{ + l3_pgentry_t *highmem_l3tab, *lowmem_l3tab; + struct pae_l3_cache *cache = ¤t->arch.pae_l3_cache; + unsigned int cpu = smp_processor_id(); + + /* Fast path 1: does this mfn need a shadow at all? */ + if ( !l3tab_needs_shadow(mfn) ) + { + write_cr3(mfn << PAGE_SHIFT); + return; + } + + /* Caching logic is not interrupt safe. */ + ASSERT(!in_irq()); + + /* Fast path 2: is this mfn already cached? */ + if ( cache->high_mfn == mfn ) + { + write_cr3(__pa(cache->table[cache->inuse_idx])); + return; + } + + /* Protects against pae_flush_pgd(). */ + spin_lock(&cache->lock); + + cache->inuse_idx ^= 1; + cache->high_mfn = mfn; + + /* Map the guest L3 table and copy to the chosen low-memory cache. */ + *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); + highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); + lowmem_l3tab = cache->table[cache->inuse_idx]; + memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0])); + *(fix_pae_highmem_pl1e - cpu) = l1e_empty(); + + /* Install the low-memory L3 table in CR3. */ + write_cr3(__pa(lowmem_l3tab)); + + spin_unlock(&cache->lock); +} + +#else /* !CONFIG_X86_PAE */ + +static void __write_ptbase(unsigned long mfn) +{ + write_cr3(mfn << PAGE_SHIFT); +} + +#endif /* !CONFIG_X86_PAE */ + void write_ptbase(struct vcpu *v) { - write_cr3(pagetable_get_paddr(v->arch.monitor_table)); + __write_ptbase(pagetable_get_pfn(v->arch.monitor_table)); } void invalidate_shadow_ldt(struct vcpu *v) @@ -401,6 +474,7 @@ static int get_page_and_type_from_pagenr return 1; } +#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */ /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -456,6 +530,7 @@ get_linear_pagetable( return 1; } +#endif /* !CONFIG_X86_PAE */ int get_page_from_l1e( @@ -564,10 +639,6 @@ get_page_from_l3e( rc = get_page_and_type_from_pagenr( l3e_get_pfn(l3e), PGT_l2_page_table | vaddr, d); -#if CONFIG_PAGING_LEVELS == 3 - if ( unlikely(!rc) ) - rc = get_linear_pagetable(l3e, pfn, d); -#endif return rc; } #endif /* 3 level */ @@ -773,6 +844,41 @@ static int create_pae_xen_mappings(l3_pg return 1; } +/* Flush a pgdir update into low-memory caches. */ +static void pae_flush_pgd( + unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) +{ + struct domain *d = page_get_owner(mfn_to_page(mfn)); + struct vcpu *v; + intpte_t _ol3e, _nl3e, _pl3e; + l3_pgentry_t *l3tab_ptr; + struct pae_l3_cache *cache; + + /* If below 4GB then the pgdir is not shadowed in low memory. */ + if ( !l3tab_needs_shadow(mfn) ) + return; + + for_each_vcpu ( d, v ) + { + cache = &v->arch.pae_l3_cache; + + spin_lock(&cache->lock); + + if ( cache->high_mfn == mfn ) + { + l3tab_ptr = &cache->table[cache->inuse_idx][idx]; + _ol3e = l3e_get_intpte(*l3tab_ptr); + _nl3e = l3e_get_intpte(nl3e); + _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e); + BUG_ON(_pl3e != _ol3e); + } + + spin_unlock(&cache->lock); + } + + flush_tlb_mask(d->domain_dirty_cpumask); +} + static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { @@ -787,6 +893,7 @@ static inline int l1_backptr( #elif CONFIG_X86_64 # define create_pae_xen_mappings(pl3e) (1) +# define pae_flush_pgd(mfn, idx, nl3e) ((void)0) static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) @@ -886,14 +993,6 @@ static int alloc_l3_table(struct page_in ASSERT(!shadow_mode_refcounts(d)); -#ifdef CONFIG_X86_PAE - if ( pfn >= 0x100000 ) - { - MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); - return 0; - } -#endif - pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { @@ -1240,6 +1339,8 @@ static int mod_l3_entry(l3_pgentry_t *pl okay = create_pae_xen_mappings(pl3e); BUG_ON(!okay); + + pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); put_page_from_l3e(ol3e, pfn); return 1; @@ -2811,6 +2912,8 @@ long do_update_descriptor(u64 pa, u64 de return ret; } +typedef struct e820entry e820entry_t; +DEFINE_XEN_GUEST_HANDLE(e820entry_t); long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { @@ -2869,6 +2972,39 @@ long arch_memory_op(int op, XEN_GUEST_HA break; } + case XENMEM_memory_map: + { + return -ENOSYS; + } + + case XENMEM_machine_memory_map: + { + struct xen_memory_map memmap; + XEN_GUEST_HANDLE(e820entry_t) buffer; + int count; + + if ( !IS_PRIV(current->domain) ) + return -EINVAL; + + if ( copy_from_guest(&memmap, arg, 1) ) + return -EFAULT; + if ( memmap.nr_entries < e820.nr_map + 1 ) + return -EINVAL; + + buffer = guest_handle_cast(memmap.buffer, e820entry_t); + + count = min((unsigned int)e820.nr_map, memmap.nr_entries); + if ( copy_to_guest(buffer, &e820.map[0], count) < 0 ) + return -EFAULT; + + memmap.nr_entries = count; + + if ( copy_to_guest(arg, &memmap, 1) ) + return -EFAULT; + + return 0; + } + default: return subarch_memory_op(op, arg); } @@ -3074,7 +3210,7 @@ void ptwr_flush(struct domain *d, const if ( unlikely(d->arch.ptwr[which].vcpu != current) ) /* Don't use write_ptbase: it may switch to guest_user on x86/64! */ - write_cr3(pagetable_get_paddr( + __write_ptbase(pagetable_get_pfn( d->arch.ptwr[which].vcpu->arch.guest_table)); else TOGGLE_MODE(); @@ -3185,15 +3321,16 @@ static int ptwr_emulated_update( /* Turn a sub-word access into a full-word access. */ if ( bytes != sizeof(paddr_t) ) { - int rc; - paddr_t full; - unsigned int offset = addr & (sizeof(paddr_t)-1); + paddr_t full; + unsigned int offset = addr & (sizeof(paddr_t)-1); /* Align address; read full word. */ addr &= ~(sizeof(paddr_t)-1); - if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full, - sizeof(paddr_t))) ) - return rc; + if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) ) + { + propagate_page_fault(addr, 4); /* user mode, read fault */ + return X86EMUL_PROPAGATE_FAULT; + } /* Mask out bits provided by caller. */ full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8)); /* Shift the caller value and OR in the missing bits. */ @@ -3271,7 +3408,8 @@ static int ptwr_emulated_write( static int ptwr_emulated_write( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, 0, val, bytes, 0); } @@ -3280,7 +3418,8 @@ static int ptwr_emulated_cmpxchg( unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, old, new, bytes, 1); } @@ -3290,7 +3429,8 @@ static int ptwr_emulated_cmpxchg8b( unsigned long old, unsigned long old_hi, unsigned long new, - unsigned long new_hi) + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt) { if ( CONFIG_PAGING_LEVELS == 2 ) return X86EMUL_UNHANDLEABLE; @@ -3299,7 +3439,7 @@ static int ptwr_emulated_cmpxchg8b( addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1); } -static struct x86_mem_emulator ptwr_mem_emulator = { +static struct x86_emulate_ops ptwr_emulate_ops = { .read_std = x86_emulate_read_std, .write_std = x86_emulate_write_std, .read_emulated = x86_emulate_read_std, @@ -3318,6 +3458,7 @@ int ptwr_do_page_fault(struct domain *d, l2_pgentry_t *pl2e, l2e; int which, flags; unsigned long l2_idx; + struct x86_emulate_ctxt emul_ctxt; if ( unlikely(shadow_mode_enabled(d)) ) return 0; @@ -3472,8 +3613,10 @@ int ptwr_do_page_fault(struct domain *d, return EXCRET_fault_fixed; emulate: - if ( x86_emulate_memop(guest_cpu_user_regs(), addr, - &ptwr_mem_emulator, X86EMUL_MODE_HOST) ) + emul_ctxt.regs = guest_cpu_user_regs(); + emul_ctxt.cr2 = addr; + emul_ctxt.mode = X86EMUL_MODE_HOST; + if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) ) return 0; perfc_incrc(ptwr_emulations); return EXCRET_fault_fixed; @@ -3596,11 +3739,10 @@ int map_pages_to_xen( } void __set_fixmap( - enum fixed_addresses idx, unsigned long p, unsigned long flags) -{ - if ( unlikely(idx >= __end_of_fixed_addresses) ) - BUG(); - map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags); + enum fixed_addresses idx, unsigned long mfn, unsigned long flags) +{ + BUG_ON(idx >= __end_of_fixed_addresses); + map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags); } #ifdef MEMORY_GUARD diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/shadow.c Tue May 30 14:30:34 2006 -0500 @@ -430,7 +430,8 @@ no_shadow_page: perfc_value(shadow_l2_pages), perfc_value(hl2_table_pages), perfc_value(snapshot_pages)); - BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */ + /* XXX FIXME: try a shadow flush to free up some memory. */ + domain_crash_synchronous(); return 0; } @@ -3064,7 +3065,8 @@ static inline unsigned long init_bl2( if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) ) { printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); - BUG(); /* XXX Deal gracefully with failure. */ + /* XXX Deal gracefully with failure. */ + domain_crash_synchronous(); } spl4e = (l4_pgentry_t *)map_domain_page(smfn); diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/shadow32.c Tue May 30 14:30:34 2006 -0500 @@ -246,7 +246,8 @@ alloc_shadow_page(struct domain *d, perfc_value(shadow_l2_pages), perfc_value(hl2_table_pages), perfc_value(snapshot_pages)); - BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */ + /* XXX FIXME: try a shadow flush to free up some memory. */ + domain_crash_synchronous(); } smfn = page_to_mfn(page); @@ -983,6 +984,11 @@ alloc_p2m_table(struct domain *d) else { page = alloc_domheap_page(NULL); + if (!page) + { + printk("Alloc p2m table fail\n"); + domain_crash(d); + } l1tab = map_domain_page(page_to_mfn(page)); memset(l1tab, 0, PAGE_SIZE); diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/shadow_public.c Tue May 30 14:30:34 2006 -0500 @@ -324,6 +324,11 @@ static void alloc_monitor_pagetable(stru mmfn_info = alloc_domheap_page(NULL); ASSERT( mmfn_info ); + if (!mmfn_info) + { + printk("Fail to allocate monitor pagetable\n"); + domain_crash(v->domain); + } mmfn = page_to_mfn(mmfn_info); mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn); diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/traps.c Tue May 30 14:30:34 2006 -0500 @@ -876,7 +876,7 @@ static int emulate_privileged_op(struct PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; } - regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; break; case 0x6e: /* OUTSB */ @@ -902,7 +902,7 @@ static int emulate_privileged_op(struct outl_user((u32)data, (u16)regs->edx, v, regs); break; } - regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; break; } diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/arch/x86/x86_emulate.c Tue May 30 14:30:34 2006 -0500 @@ -363,12 +363,13 @@ do{ __asm__ __volatile__ ( #endif /* __i386__ */ /* Fetch next part of the instruction being emulated. */ -#define insn_fetch(_type, _size, _eip) \ -({ unsigned long _x; \ - if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \ - goto done; \ - (_eip) += (_size); \ - (_type)_x; \ +#define insn_fetch(_type, _size, _eip) \ +({ unsigned long _x; \ + rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt); \ + if ( rc != 0 ) \ + goto done; \ + (_eip) += (_size); \ + (_type)_x; \ }) /* Access/update address held in a register, based on addressing mode. */ @@ -426,12 +427,10 @@ decode_register( return p; } -int +int x86_emulate_memop( - struct cpu_user_regs *regs, - unsigned long cr2, - struct x86_mem_emulator *ops, - int mode) + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { uint8_t b, d, sib, twobyte = 0, rex_prefix = 0; uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; @@ -439,9 +438,11 @@ x86_emulate_memop( unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; int rc = 0; struct operand src, dst; + unsigned long cr2 = ctxt->cr2; + int mode = ctxt->mode; /* Shadow copy of register state. Committed on successful emulation. */ - struct cpu_user_regs _regs = *regs; + struct cpu_user_regs _regs = *ctxt->regs; switch ( mode ) { @@ -628,7 +629,7 @@ x86_emulate_memop( dst.bytes = (d & ByteOp) ? 1 : op_bytes; if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ ((rc = ops->read_emulated((unsigned long)dst.ptr, - &dst.val, dst.bytes)) != 0) ) + &dst.val, dst.bytes, ctxt)) != 0) ) goto done; break; } @@ -670,7 +671,7 @@ x86_emulate_memop( src.type = OP_MEM; src.ptr = (unsigned long *)cr2; if ( (rc = ops->read_emulated((unsigned long)src.ptr, - &src.val, src.bytes)) != 0 ) + &src.val, src.bytes, ctxt)) != 0 ) goto done; src.orig_val = src.val; break; @@ -776,7 +777,7 @@ x86_emulate_memop( if ( mode == X86EMUL_MODE_PROT64 ) dst.bytes = 8; if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp), - &dst.val, dst.bytes)) != 0 ) + &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment(_regs.esp, dst.bytes); break; @@ -854,12 +855,12 @@ x86_emulate_memop( { dst.bytes = 8; if ( (rc = ops->read_std((unsigned long)dst.ptr, - &dst.val, 8)) != 0 ) + &dst.val, 8, ctxt)) != 0 ) goto done; } - register_address_increment(_regs.esp, -dst.bytes); + register_address_increment(_regs.esp, -(int)dst.bytes); if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp), - dst.val, dst.bytes)) != 0 ) + dst.val, dst.bytes, ctxt)) != 0 ) goto done; dst.val = dst.orig_val; /* skanky: disable writeback */ break; @@ -887,10 +888,11 @@ x86_emulate_memop( case OP_MEM: if ( lock_prefix ) rc = ops->cmpxchg_emulated( - (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes); + (unsigned long)dst.ptr, dst.orig_val, + dst.val, dst.bytes, ctxt); else rc = ops->write_emulated( - (unsigned long)dst.ptr, dst.val, dst.bytes); + (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt); if ( rc != 0 ) goto done; default: @@ -899,7 +901,7 @@ x86_emulate_memop( } /* Commit shadow register state. */ - *regs = _regs; + *ctxt->regs = _regs; done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; @@ -911,11 +913,11 @@ x86_emulate_memop( { if ( _regs.ecx == 0 ) { - regs->eip = _regs.eip; + ctxt->regs->eip = _regs.eip; goto done; } _regs.ecx--; - _regs.eip = regs->eip; + _regs.eip = ctxt->regs->eip; } switch ( b ) { @@ -928,20 +930,21 @@ x86_emulate_memop( dst.ptr = (unsigned long *)cr2; if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds, _regs.esi), - &dst.val, dst.bytes)) != 0 ) + &dst.val, dst.bytes, ctxt)) != 0 ) goto done; } else { /* Read fault: source is special memory. */ dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi); - if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &dst.val, + dst.bytes, ctxt)) != 0 ) goto done; } register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xa6 ... 0xa7: /* cmps */ DPRINTF("Urk! I don't handle CMPS.\n"); @@ -952,16 +955,16 @@ x86_emulate_memop( dst.ptr = (unsigned long *)cr2; dst.val = _regs.eax; register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xac ... 0xad: /* lods */ dst.type = OP_REG; dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.ptr = (unsigned long *)&_regs.eax; - if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); @@ -1074,8 +1077,8 @@ x86_emulate_memop( #if defined(__i386__) { unsigned long old_lo, old_hi; - if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) || - ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) ) + if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) || + ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) ) goto done; if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) ) { @@ -1090,8 +1093,8 @@ x86_emulate_memop( } else { - if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, - _regs.ebx, _regs.ecx)) != 0 ) + if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx, + _regs.ecx, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } @@ -1100,7 +1103,7 @@ x86_emulate_memop( #elif defined(__x86_64__) { unsigned long old, new; - if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 ) goto done; if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || ((uint32_t)(old>>32) != (uint32_t)_regs.edx) ) @@ -1112,7 +1115,7 @@ x86_emulate_memop( else { new = (_regs.ecx<<32)|(uint32_t)_regs.ebx; - if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 ) + if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } @@ -1136,7 +1139,8 @@ x86_emulate_read_std( x86_emulate_read_std( unsigned long addr, unsigned long *val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { *val = 0; if ( copy_from_user((void *)val, (void *)addr, bytes) ) @@ -1151,7 +1155,8 @@ x86_emulate_write_std( x86_emulate_write_std( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { if ( copy_to_user((void *)addr, (void *)&val, bytes) ) { diff -r e74246451527 -r f54d38cea8ac xen/common/Makefile --- a/xen/common/Makefile Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/Makefile Tue May 30 14:30:34 2006 -0500 @@ -13,6 +13,7 @@ obj-y += page_alloc.o obj-y += page_alloc.o obj-y += rangeset.o obj-y += sched_bvt.o +obj-y += sched_credit.o obj-y += sched_sedf.o obj-y += schedule.o obj-y += softirq.o diff -r e74246451527 -r f54d38cea8ac xen/common/acm_ops.c --- a/xen/common/acm_ops.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/acm_ops.c Tue May 30 14:30:34 2006 -0500 @@ -32,100 +32,94 @@ #ifndef ACM_SECURITY -long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op) + +long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) { return -ENOSYS; } + #else -enum acm_operation { - POLICY, /* access to policy interface (early drop) */ - GETPOLICY, /* dump policy cache */ - SETPOLICY, /* set policy cache (controls security) */ - DUMPSTATS, /* dump policy statistics */ - GETSSID, /* retrieve ssidref for domain id (decide inside authorized domains) */ - GETDECISION /* retrieve ACM decision from authorized domains */ -}; - -int acm_authorize_acm_ops(struct domain *d, enum acm_operation pops) + +int acm_authorize_acm_ops(struct domain *d) { /* currently, policy management functions are restricted to privileged domains */ if (!IS_PRIV(d)) return -EPERM; - return 0; } -long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op) -{ - long ret = 0; - struct acm_op curop, *op = &curop; - - if (acm_authorize_acm_ops(current->domain, POLICY)) + +long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) +{ + long rc = -EFAULT; + + if (acm_authorize_acm_ops(current->domain)) return -EPERM; - if (copy_from_guest(op, u_acm_op, 1)) - return -EFAULT; - - if (op->interface_version != ACM_INTERFACE_VERSION) - return -EACCES; - - switch (op->cmd) + switch ( cmd ) { - case ACM_SETPOLICY: - { - ret = acm_authorize_acm_ops(current->domain, SETPOLICY); - if (!ret) - ret = acm_set_policy(op->u.setpolicy.pushcache, - op->u.setpolicy.pushcache_size, 1); - } - break; - - case ACM_GETPOLICY: - { - ret = acm_authorize_acm_ops(current->domain, GETPOLICY); - if (!ret) - ret = acm_get_policy(op->u.getpolicy.pullcache, - op->u.getpolicy.pullcache_size); - if (!ret) - copy_to_guest(u_acm_op, op, 1); - } - break; - - case ACM_DUMPSTATS: - { - ret = acm_authorize_acm_ops(current->domain, DUMPSTATS); - if (!ret) - ret = acm_dump_statistics(op->u.dumpstats.pullcache, - op->u.dumpstats.pullcache_size); - if (!ret) - copy_to_guest(u_acm_op, op, 1); - } - break; - - case ACM_GETSSID: - { + + case ACMOP_setpolicy: { + struct acm_setpolicy setpolicy; + if (copy_from_guest(&setpolicy, arg, 1) != 0) + return -EFAULT; + if (setpolicy.interface_version != ACM_INTERFACE_VERSION) + return -EACCES; + + rc = acm_set_policy(setpolicy.pushcache, + setpolicy.pushcache_size, 1); + break; + } + + case ACMOP_getpolicy: { + struct acm_getpolicy getpolicy; + if (copy_from_guest(&getpolicy, arg, 1) != 0) + return -EFAULT; + if (getpolicy.interface_version != ACM_INTERFACE_VERSION) + return -EACCES; + + rc = acm_get_policy(getpolicy.pullcache, + getpolicy.pullcache_size); + break; + } + + case ACMOP_dumpstats: { + struct acm_dumpstats dumpstats; + if (copy_from_guest(&dumpstats, arg, 1) != 0) + return -EFAULT; + if (dumpstats.interface_version != ACM_INTERFACE_VERSION) + return -EACCES; + + rc = acm_dump_statistics(dumpstats.pullcache, + dumpstats.pullcache_size); + break; + } + + case ACMOP_getssid: { + struct acm_getssid getssid; ssidref_t ssidref; - ret = acm_authorize_acm_ops(current->domain, GETSSID); - if (ret) - break; - - if (op->u.getssid.get_ssid_by == SSIDREF) - ssidref = op->u.getssid.id.ssidref; - else if (op->u.getssid.get_ssid_by == DOMAINID) - { - struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid); - if (!subj) - { - ret = -ESRCH; /* domain not found */ - break; - } - if (subj->ssid == NULL) - { - put_domain(subj); - ret = -ESRCH; + if (copy_from_guest(&getssid, arg, 1) != 0) + return -EFAULT; + if (getssid.interface_version != ACM_INTERFACE_VERSION) + return -EACCES; + + if (getssid.get_ssid_by == SSIDREF) + ssidref = getssid.id.ssidref; + else if (getssid.get_ssid_by == DOMAINID) + { + struct domain *subj = find_domain_by_id(getssid.id.domainid); + if (!subj) + { + rc = -ESRCH; /* domain not found */ + break; + } + if (subj->ssid == NULL) + { + put_domain(subj); + rc = -ESRCH; break; } ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; @@ -133,39 +127,36 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t } else { - ret = -ESRCH; - break; - } - ret = acm_get_ssid(ssidref, - op->u.getssid.ssidbuf, - op->u.getssid.ssidbuf_size); - if (!ret) - copy_to_guest(u_acm_op, op, 1); - } - break; - - case ACM_GETDECISION: - { + rc = -ESRCH; + break; + } + rc = acm_get_ssid(ssidref, getssid.ssidbuf, getssid.ssidbuf_size); + break; + } + + case ACMOP_getdecision: { + struct acm_getdecision getdecision; ssidref_t ssidref1, ssidref2; - ret = acm_authorize_acm_ops(current->domain, GETDECISION); - if (ret) - break; - - if (op->u.getdecision.get_decision_by1 == SSIDREF) - ssidref1 = op->u.getdecision.id1.ssidref; - else if (op->u.getdecision.get_decision_by1 == DOMAINID) - { - struct domain *subj = find_domain_by_id(op->u.getdecision.id1.domainid); - if (!subj) - { - ret = -ESRCH; /* domain not found */ - break; - } - if (subj->ssid == NULL) - { - put_domain(subj); - ret = -ESRCH; + if (copy_from_guest(&getdecision, arg, 1) != 0) + return -EFAULT; + if (getdecision.interface_version != ACM_INTERFACE_VERSION) + return -EACCES; + + if (getdecision.get_decision_by1 == SSIDREF) + ssidref1 = getdecision.id1.ssidref; + else if (getdecision.get_decision_by1 == DOMAINID) + { + struct domain *subj = find_domain_by_id(getdecision.id1.domainid); + if (!subj) + { + rc = -ESRCH; /* domain not found */ + break; + } + if (subj->ssid == NULL) + { + put_domain(subj); + rc = -ESRCH; break; } ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; @@ -173,23 +164,23 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t } else { - ret = -ESRCH; - break; - } - if (op->u.getdecision.get_decision_by2 == SSIDREF) - ssidref2 = op->u.getdecision.id2.ssidref; - else if (op->u.getdecision.get_decision_by2 == DOMAINID) - { - struct domain *subj = find_domain_by_id(op->u.getdecision.id2.domainid); - if (!subj) - { - ret = -ESRCH; /* domain not found */ + rc = -ESRCH; + break; + } + if (getdecision.get_decision_by2 == SSIDREF) + ssidref2 = getdecision.id2.ssidref; + else if (getdecision.get_decision_by2 == DOMAINID) + { + struct domain *subj = find_domain_by_id(getdecision.id2.domainid); + if (!subj) + { + rc = -ESRCH; /* domain not found */ break;; } if (subj->ssid == NULL) { put_domain(subj); - ret = -ESRCH; + rc = -ESRCH; break; } ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; @@ -197,34 +188,35 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t } else { - ret = -ESRCH; - break; - } - ret = acm_get_decision(ssidref1, ssidref2, op->u.getdecision.hook); - - if (ret == ACM_ACCESS_PERMITTED) - { - op->u.getdecision.acm_decision = ACM_ACCESS_PERMITTED; - ret = 0; - } - else if (ret == ACM_ACCESS_DENIED) - { - op->u.getdecision.acm_decision = ACM_ACCESS_DENIED; - ret = 0; - } - else - ret = -ESRCH; - - if (!ret) - copy_to_guest(u_acm_op, op, 1); - } - break; + rc = -ESRCH; + break; + } + rc = acm_get_decision(ssidref1, ssidref2, getdecision.hook); + + if (rc == ACM_ACCESS_PERMITTED) + { + getdecision.acm_decision = ACM_ACCESS_PERMITTED; + rc = 0; + } + else if (rc == ACM_ACCESS_DENIED) + { + getdecision.acm_decision = ACM_ACCESS_DENIED; + rc = 0; + } + else + rc = -ESRCH; + + if ( (rc == 0) && (copy_to_guest(arg, &getdecision, 1) != 0) ) + rc = -EFAULT; + break; + } default: - ret = -ESRCH; - } - - return ret; + rc = -ENOSYS; + break; + } + + return rc; } #endif diff -r e74246451527 -r f54d38cea8ac xen/common/elf.c --- a/xen/common/elf.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/elf.c Tue May 30 14:30:34 2006 -0500 @@ -23,10 +23,10 @@ int parseelfimage(struct domain_setup_in Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr; Elf_Phdr *phdr; Elf_Shdr *shdr; - unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base; + unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off; char *shstrtab, *guestinfo=NULL, *p; char *elfbase = (char *)dsi->image_addr; - int h; + int h, virt_base_defined, elf_pa_off_defined; if ( !elf_sanity_check(ehdr) ) return -EINVAL; @@ -84,29 +84,40 @@ int parseelfimage(struct domain_setup_in if ( guestinfo == NULL ) guestinfo = ""; - virt_base = 0; - if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL ) - virt_base = simple_strtoul(p+10, &p, 0); - dsi->elf_paddr_offset = virt_base; - if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL ) - dsi->elf_paddr_offset = simple_strtoul(p+17, &p, 0); + /* Initial guess for virt_base is 0 if it is not explicitly defined. */ + p = strstr(guestinfo, "VIRT_BASE="); + virt_base_defined = (p != NULL); + virt_base = virt_base_defined ? simple_strtoul(p+10, &p, 0) : 0; + + /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */ + p = strstr(guestinfo, "ELF_PADDR_OFFSET="); + elf_pa_off_defined = (p != NULL); + elf_pa_off = elf_pa_off_defined ? simple_strtoul(p+17, &p, 0) : virt_base; + + if ( elf_pa_off_defined && !virt_base_defined ) + goto bad_image; for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; - vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base; + vaddr = phdr->p_paddr - elf_pa_off + virt_base; + if ( (vaddr + phdr->p_memsz) < vaddr ) + goto bad_image; if ( vaddr < kernstart ) kernstart = vaddr; if ( (vaddr + phdr->p_memsz) > kernend ) kernend = vaddr + phdr->p_memsz; } - if ( virt_base ) - dsi->v_start = virt_base; - else - dsi->v_start = kernstart; + /* + * Legacy compatibility and images with no __xen_guest section: assume + * header addresses are virtual addresses, and that guest memory should be + * mapped starting at kernel load address. + */ + dsi->v_start = virt_base_defined ? virt_base : kernstart; + dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start; dsi->v_kernentry = ehdr->e_entry; if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL ) @@ -114,11 +125,9 @@ int parseelfimage(struct domain_setup_in if ( (kernstart > kernend) || (dsi->v_kernentry < kernstart) || - (dsi->v_kernentry > kernend) ) - { - printk("Malformed ELF image.\n"); - return -EINVAL; - } + (dsi->v_kernentry > kernend) || + (dsi->v_start > kernstart) ) + goto bad_image; if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL ) dsi->load_symtab = 1; @@ -130,6 +139,10 @@ int parseelfimage(struct domain_setup_in loadelfsymtab(dsi, 0); return 0; + + bad_image: + printk("Malformed ELF image.\n"); + return -EINVAL; } int loadelfimage(struct domain_setup_info *dsi) diff -r e74246451527 -r f54d38cea8ac xen/common/grant_table.c --- a/xen/common/grant_table.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/grant_table.c Tue May 30 14:30:34 2006 -0500 @@ -505,15 +505,12 @@ gnttab_setup_table( goto out; } - if ( op.nr_frames <= NR_GRANT_FRAMES ) - { - ASSERT(d->grant_table != NULL); - op.status = GNTST_okay; - for ( i = 0; i < op.nr_frames; i++ ) - { - gmfn = gnttab_shared_gmfn(d, d->grant_table, i); - (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1); - } + ASSERT(d->grant_table != NULL); + op.status = GNTST_okay; + for ( i = 0; i < op.nr_frames; i++ ) + { + gmfn = gnttab_shared_gmfn(d, d->grant_table, i); + (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1); } put_domain(d); diff -r e74246451527 -r f54d38cea8ac xen/common/kernel.c --- a/xen/common/kernel.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/kernel.c Tue May 30 14:30:34 2006 -0500 @@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H switch ( fi.submap_idx ) { case 0: - fi.submap = 0; + fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb); if ( shadow_mode_translate(current->domain) ) fi.submap |= (1U << XENFEAT_writable_page_tables) | - (1U << XENFEAT_auto_translated_physmap) | - (1U << XENFEAT_pae_pgdir_above_4gb); + (1U << XENFEAT_auto_translated_physmap); if ( supervisor_mode_kernel ) fi.submap |= 1U << XENFEAT_supervisor_mode_kernel; break; diff -r e74246451527 -r f54d38cea8ac xen/common/schedule.c --- a/xen/common/schedule.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/schedule.c Tue May 30 14:30:34 2006 -0500 @@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP extern struct scheduler sched_bvt_def; extern struct scheduler sched_sedf_def; +extern struct scheduler sched_credit_def; static struct scheduler *schedulers[] = { &sched_bvt_def, &sched_sedf_def, + &sched_credit_def, NULL }; @@ -639,6 +641,8 @@ static void t_timer_fn(void *unused) page_scrub_schedule_work(); + SCHED_OP(tick, cpu); + set_timer(&t_timer[cpu], NOW() + MILLISECS(10)); } @@ -681,6 +685,7 @@ void __init scheduler_init(void) printk("Could not find scheduler: %s\n", opt_sched); printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); + SCHED_OP(init); if ( idle_vcpu[0] != NULL ) { diff -r e74246451527 -r f54d38cea8ac xen/common/trace.c --- a/xen/common/trace.c Tue May 30 12:52:02 2006 -0500 +++ b/xen/common/trace.c Tue May 30 14:30:34 2006 -0500 @@ -91,6 +91,7 @@ static int alloc_trace_bufs(void) if ( (rawbuf = alloc_xenheap_pages(order)) == NULL ) { printk("Xen trace buffers: memory allocation failed\n"); + opt_tbuf_size = 0; return -EINVAL; } @@ -135,10 +136,7 @@ static int tb_set_size(int size) opt_tbuf_size = size; if ( alloc_trace_bufs() != 0 ) - { - opt_tbuf_size = 0; - return -EINVAL; - } + return -EINVAL; printk("Xen trace buffers: initialized\n"); return 0; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/config.h --- a/xen/include/asm-ia64/config.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/config.h Tue May 30 14:30:34 2006 -0500 @@ -97,6 +97,13 @@ extern char _end[]; /* standard ELF symb //#define HZ 1000 // FIXME SMP: leave SMP for a later time +/* A power-of-two value greater than or equal to number of hypercalls. */ +#define NR_hypercalls 64 + +#if NR_hypercalls & (NR_hypercalls - 1) +#error "NR_hypercalls must be a power-of-two value" +#endif + /////////////////////////////////////////////////////////////// // xen/include/asm/config.h // Natural boundary upon TR size to define xenheap space @@ -239,6 +246,10 @@ void dummy_called(char *function); // these declarations got moved at some point, find a better place for them extern int ht_per_core; +#ifdef CONFIG_XEN_IA64_DOM0_VP +#define CONFIG_SHADOW 1 +#endif + // xen/include/asm/config.h /****************************************************************************** * config.h diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/dom_fw.h --- a/xen/include/asm-ia64/dom_fw.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/dom_fw.h Tue May 30 14:30:34 2006 -0500 @@ -5,7 +5,7 @@ * Dan Magenheimer (dan.magenheimer@xxxxxx) */ -extern unsigned long dom_fw_setup(struct domain *, const char *, int); +#include <linux/efi.h> #ifndef MB #define MB (1024*1024) @@ -55,7 +55,7 @@ extern unsigned long dom_fw_setup(struct #define FW_HYPERCALL_SAL_CALL_INDEX 0x82UL #define FW_HYPERCALL_SAL_CALL_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_SAL_CALL_INDEX) -#define FW_HYPERCALL_SAL_CALL 0x1001UL +#define FW_HYPERCALL_SAL_CALL 0x1100UL /* * EFI is accessed via the EFI system table, which contains: @@ -94,6 +94,7 @@ extern unsigned long dom_fw_setup(struct #define FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX 9UL /* these are hypercall numbers */ +#define FW_HYPERCALL_EFI_CALL 0x300UL #define FW_HYPERCALL_EFI_GET_TIME 0x300UL #define FW_HYPERCALL_EFI_SET_TIME 0x301UL #define FW_HYPERCALL_EFI_GET_WAKEUP_TIME 0x302UL @@ -125,7 +126,7 @@ extern unsigned long dom_fw_setup(struct */ #define FW_HYPERCALL_FIRST_ARCH 0x300UL -#define FW_HYPERCALL_IPI 0x380UL +#define FW_HYPERCALL_IPI 0x400UL /* Xen/ia64 user hypercalls. Only used for debugging. */ #define FW_HYPERCALL_FIRST_USER 0xff00UL @@ -133,9 +134,16 @@ extern unsigned long dom_fw_setup(struct /* Interrupt vector used for os boot rendez vous. */ #define XEN_SAL_BOOT_RENDEZ_VEC 0xF3 +#define FW_HYPERCALL_NUM_MASK_HIGH ~0xffUL +#define FW_HYPERCALL_NUM_MASK_LOW 0xffUL + +#define EFI_MEMDESC_VERSION 1 + extern struct ia64_pal_retval xen_pal_emulator(UINT64, u64, u64, u64); extern struct sal_ret_values sal_emulator (long index, unsigned long in1, unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, unsigned long in6, unsigned long in7); extern struct ia64_pal_retval pal_emulator_static (unsigned long); +extern unsigned long dom_fw_setup (struct domain *, const char *, int); +extern efi_status_t efi_emulator (struct pt_regs *regs, unsigned long *fault); extern void build_pal_hypercall_bundles(unsigned long *imva, unsigned long brkimm, unsigned long hypnum); extern void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64 ret); diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/domain.h Tue May 30 14:30:34 2006 -0500 @@ -22,8 +22,13 @@ extern void panic_domain(struct pt_regs extern void panic_domain(struct pt_regs *, const char *, ...) __attribute__ ((noreturn, format (printf, 2, 3))); +struct mm_struct { + pgd_t * pgd; + // atomic_t mm_users; /* How many users with user space? */ +}; + struct arch_domain { - struct mm_struct *mm; + struct mm_struct mm; unsigned long metaphysical_rr0; unsigned long metaphysical_rr4; @@ -54,10 +59,11 @@ struct arch_domain { unsigned long initrd_start; unsigned long initrd_len; char *cmdline; + int efi_virt_mode; /* phys : 0 , virt : 1 */ + void *efi_runtime; }; #define xen_vastart arch.xen_vastart #define xen_vaend arch.xen_vaend -#define shared_info_va arch.shared_info_va #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) @@ -69,8 +75,6 @@ struct arch_vcpu { TR_ENTRY dtlb; unsigned int itr_regions; unsigned int dtr_regions; - unsigned long itlb_pte; - unsigned long dtlb_pte; unsigned long irr[4]; unsigned long insvc[4]; unsigned long tc_regions; @@ -106,27 +110,15 @@ struct arch_vcpu { struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ }; -//#define thread arch._thread - -// FOLLOWING FROM linux-2.6.7/include/sched.h - -struct mm_struct { - pgd_t * pgd; - // atomic_t mm_users; /* How many users with user space? */ - struct list_head pt_list; /* List of pagetable */ -}; - -extern struct mm_struct init_mm; - struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr); void assign_new_domain0_page(struct domain *d, unsigned long mpaddr); +void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr); void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr); void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags); #ifdef CONFIG_XEN_IA64_DOM0_VP unsigned long assign_domain_mmio_page(struct domain *d, unsigned long mpaddr, unsigned long size); unsigned long assign_domain_mach_page(struct domain *d, unsigned long mpaddr, unsigned long size); unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3); -unsigned long dom0vp_populate_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order, unsigned int address_bits); unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order); unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn, unsigned int flags, domid_t domid); #endif diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/event.h --- a/xen/include/asm-ia64/event.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/event.h Tue May 30 14:30:34 2006 -0500 @@ -29,7 +29,7 @@ static inline void evtchn_notify(struct smp_send_event_check_cpu(v->processor); if(!VMX_DOMAIN(v)) - vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector); + vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector); } /* Note: Bitwise operations result in fast code with no branches. */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/grant_table.h --- a/xen/include/asm-ia64/grant_table.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/grant_table.h Tue May 30 14:30:34 2006 -0500 @@ -7,12 +7,33 @@ #define ORDER_GRANT_FRAMES 0 +#ifndef CONFIG_XEN_IA64_DOM0_VP +// for grant map/unmap #define create_grant_host_mapping(a, f, fl) 0 #define destroy_grant_host_mapping(a, f, fl) 0 +// for grant transfer #define steal_page_for_grant_transfer(d, p) 0 -#define gnttab_create_shared_page(d, t, i) ((void)0) +#else +// for grant map/unmap +int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags); +int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags); + +// for grant transfer +int steal_page_for_grant_transfer(struct domain *d, struct page_info *page); +void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn); + +#endif + +// for grant table shared page +#define gnttab_create_shared_page(d, t, i) \ + do { \ + share_xen_page_with_guest( \ + virt_to_page((char *)(t)->shared + ((i) << PAGE_SHIFT)), \ + (d), XENSHARE_writable); \ + } while (0) + /* Guest physical address of the grant table. */ #define IA64_GRANT_TABLE_PADDR (1UL << 40) @@ -20,13 +41,21 @@ #define gnttab_shared_maddr(d, t, i) \ virt_to_maddr((char*)(t)->shared + ((i) << PAGE_SHIFT)) -#define gnttab_shared_gmfn(d, t, i) \ +#ifndef CONFIG_XEN_IA64_DOM0_VP +# define gnttab_shared_gmfn(d, t, i) \ ({ ((d) == dom0) ? \ (virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i): \ assign_domain_page((d), \ IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \ gnttab_shared_maddr(d, t, i)), \ (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);}) +#else +# define gnttab_shared_gmfn(d, t, i) \ + ({ assign_domain_page((d), \ + IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \ + gnttab_shared_maddr((d), (t), (i))); \ + (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);}) +#endif #define gnttab_log_dirty(d, f) ((void)0) diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/linux-xen/asm/pgalloc.h --- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Tue May 30 14:30:34 2006 -0500 @@ -139,12 +139,14 @@ static inline void pte_free(struct page { pgtable_quicklist_free(page_address(pte)); } +#endif static inline void pte_free_kernel(pte_t * pte) { pgtable_quicklist_free(pte); } +#ifndef XEN #define __pte_free_tlb(tlb, pte) pte_free(pte) #endif diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue May 30 14:30:34 2006 -0500 @@ -383,6 +383,7 @@ ptep_test_and_clear_dirty (struct vm_are return 1; #endif } +#endif static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -396,6 +397,19 @@ ptep_get_and_clear(struct mm_struct *mm, #endif } +static inline pte_t +ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte) +{ +#ifdef CONFIG_SMP + return __pte(xchg((long *) ptep, pte_val(npte))); +#else + pte_t pte = *ptep; + set_pte (ptep, npte); + return pte; +#endif +} + +#ifndef XEN static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/mm.h Tue May 30 14:30:34 2006 -0500 @@ -12,7 +12,7 @@ #include <asm/processor.h> #include <asm/atomic.h> -#include <asm/flushtlb.h> +#include <asm/tlbflush.h> #include <asm/io.h> #include <public/xen.h> @@ -128,8 +128,10 @@ static inline u32 pickle_domptr(struct d #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d)) -/* Dummy now */ -#define share_xen_page_with_guest(p, d, r) do { } while (0) +#define XENSHARE_writable 0 +#define XENSHARE_readonly 1 +void share_xen_page_with_guest(struct page_info *page, + struct domain *d, int readonly); #define share_xen_page_with_privileged_guests(p, r) do { } while (0) extern struct page_info *frame_table; @@ -471,6 +473,4 @@ extern unsigned long ____lookup_domain_m /* Arch-specific portion of memory_op hypercall. */ #define arch_memory_op(op, arg) (-ENOSYS) -extern void assign_domain_page(struct domain *d, unsigned long mpaddr, - unsigned long physaddr); #endif /* __ASM_IA64_MM_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/shadow.h --- a/xen/include/asm-ia64/shadow.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/shadow.h Tue May 30 14:30:34 2006 -0500 @@ -1,2 +1,57 @@ -/* empty */ +/****************************************************************************** + * include/asm-ia64/shadow.h + * + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _XEN_SHADOW_H +#define _XEN_SHADOW_H + +#include <xen/config.h> + +#ifdef CONFIG_XEN_IA64_DOM0_VP +#ifndef CONFIG_SHADOW +# error "CONFIG_SHADOW must be defined" +#endif + +#define shadow_drop_references(d, p) ((void)0) + +// this is used only x86-specific code +//#define shadow_sync_and_drop_references(d, p) ((void)0) + +#define shadow_mode_translate(d) (1) + +// for granttab transfer. XENMEM_populate_physmap +void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn); +// for balloon driver. XENMEM_decrease_reservation +void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn); +#endif + +#endif // _XEN_SHADOW_H + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/vcpu.h Tue May 30 14:30:34 2006 -0500 @@ -135,7 +135,10 @@ extern IA64FAULT vcpu_set_pkr(VCPU *vcpu extern IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val); extern IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key); /* TLB */ -extern void vcpu_purge_tr_entry(TR_ENTRY *trp); +static inline void vcpu_purge_tr_entry(TR_ENTRY *trp) +{ + trp->pte.val = 0; +} extern IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 padr, UINT64 itir, UINT64 ifa); extern IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 padr, @@ -148,8 +151,7 @@ extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range); extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range); extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range); -extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, - BOOLEAN is_data, BOOLEAN in_tpa, +extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir, UINT64 *iha); extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr); extern IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa); diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/vhpt.h Tue May 30 14:30:34 2006 -0500 @@ -4,18 +4,17 @@ #define VHPT_ENABLED 1 /* Size of the VHPT. */ -#define VHPT_SIZE_LOG2 24 +#ifdef CONFIG_XEN_IA64_DOM0_VP +// XXX work around to avoid trigerring xenLinux software lock up detection. +# define VHPT_SIZE_LOG2 16 // 64KB +#else +# define VHPT_SIZE_LOG2 24 // 16MB default +#endif /* Number of entries in the VHPT. The size of an entry is 4*8B == 32B */ #define VHPT_NUM_ENTRIES (1 << (VHPT_SIZE_LOG2 - 5)) -#ifdef CONFIG_SMP -# define vhpt_flush_all() smp_vhpt_flush_all() -#else -# define vhpt_flush_all() vhpt_flush() -#endif // FIXME: These should be automatically generated - #define VLE_PGFLAGS_OFFSET 0 #define VLE_ITIR_OFFSET 8 #define VLE_TITAG_OFFSET 16 @@ -37,15 +36,10 @@ extern void vhpt_init (void); extern void vhpt_init (void); extern void zero_vhpt_stats(void); extern int dump_vhpt_stats(char *buf); -extern void vhpt_flush_address(unsigned long vadr, unsigned long addr_range); -extern void vhpt_flush_address_remote(int cpu, unsigned long vadr, - unsigned long addr_range); extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -extern void vhpt_flush(void); -extern void smp_vhpt_flush_all(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vmx_vcpu.h --- a/xen/include/asm-ia64/vmx_vcpu.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-ia64/vmx_vcpu.h Tue May 30 14:30:34 2006 -0500 @@ -359,7 +359,7 @@ IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu, // TODO: unimplemented DBRs return a reserved register fault // TODO: Should set Logical CPU state, not just physical if(reg > 4){ - panic("there are only five cpuid registers"); + panic_domain(vcpu_regs(vcpu),"there are only five cpuid registers"); } *pval=VCPU(vcpu,vcpuid[reg]); return (IA64_NO_FAULT); diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/domain.h Tue May 30 14:30:34 2006 -0500 @@ -114,11 +114,32 @@ struct arch_domain unsigned long first_reserved_pfn; } __cacheline_aligned; +#ifdef CONFIG_X86_PAE +struct pae_l3_cache { + /* + * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest + * supplies a >=4GB PAE L3 table. We need two because we cannot set up + * an L3 table while we are currently running on it (without using + * expensive atomic 64-bit operations). + */ + l3_pgentry_t table[2][4] __attribute__((__aligned__(32))); + unsigned long high_mfn; /* The >=4GB MFN being shadowed. */ + unsigned int inuse_idx; /* Which of the two cache slots is in use? */ + spinlock_t lock; +}; +#define pae_l3_cache_init(c) spin_lock_init(&(c)->lock) +#else /* !CONFIG_X86_PAE */ +struct pae_l3_cache { }; +#define pae_l3_cache_init(c) ((void)0) +#endif + struct arch_vcpu { /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ struct vcpu_guest_context guest_context __attribute__((__aligned__(16))); + + struct pae_l3_cache pae_l3_cache; unsigned long flags; /* TF_ */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/fixmap.h --- a/xen/include/asm-x86/fixmap.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/fixmap.h Tue May 30 14:30:34 2006 -0500 @@ -25,6 +25,10 @@ * from the end of virtual memory backwards. */ enum fixed_addresses { +#ifdef CONFIG_X86_PAE + FIX_PAE_HIGHMEM_0, + FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1, +#endif FIX_APIC_BASE, FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, @@ -40,13 +44,13 @@ enum fixed_addresses { #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern void __set_fixmap( - enum fixed_addresses idx, unsigned long p, unsigned long flags); + enum fixed_addresses idx, unsigned long mfn, unsigned long flags); #define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_HYPERVISOR) + __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR) #define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE) + __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE) #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/domain.h Tue May 30 14:30:34 2006 -0500 @@ -35,9 +35,9 @@ struct hvm_domain { unsigned int nr_vcpus; unsigned int apic_enabled; unsigned int pae_enabled; - - struct hvm_virpit vpit; - u64 guest_time; + s64 tsc_frequency; + struct pl_time pl_time; + struct hvm_virpic vpic; struct hvm_vioapic vioapic; struct hvm_io_handler io_handler; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/intr.h --- a/xen/include/asm-x86/hvm/svm/intr.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/svm/intr.h Tue May 30 14:30:34 2006 -0500 @@ -21,7 +21,6 @@ #ifndef __ASM_X86_HVM_SVM_INTR_H__ #define __ASM_X86_HVM_SVM_INTR_H__ -extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit); extern void svm_intr_assist(void); extern void svm_intr_assist_update(struct vcpu *v, int highest_vector); extern void svm_intr_assist_test_valid(struct vcpu *v, diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/svm.h --- a/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 14:30:34 2006 -0500 @@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v extern void svm_do_launch(struct vcpu *v); extern void svm_do_resume(struct vcpu *v); extern void svm_set_guest_time(struct vcpu *v, u64 gtime); -extern u64 svm_get_guest_time(struct vcpu *v); extern void arch_svm_do_resume(struct vcpu *v); extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa); /* For debugging. Remove when no longer needed. */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/vcpu.h Tue May 30 14:30:34 2006 -0500 @@ -32,6 +32,9 @@ struct hvm_vcpu { unsigned long ioflags; struct mmio_op mmio_op; struct vlapic *vlapic; + s64 cache_tsc_offset; + u64 guest_time; + /* For AP startup */ unsigned long init_sipi_sipi_state; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 14:30:34 2006 -0500 @@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc extern void arch_vmx_do_launch(struct vcpu *); extern void arch_vmx_do_resume(struct vcpu *); extern void set_guest_time(struct vcpu *v, u64 gtime); -extern u64 get_guest_time(struct vcpu *v); extern unsigned int cpu_rev; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/hvm/vpit.h Tue May 30 14:30:34 2006 -0500 @@ -29,9 +29,7 @@ #include <asm/hvm/vpic.h> #define PIT_FREQ 1193181 - -#define PIT_BASE 0x40 -#define HVM_PIT_ACCEL_MODE 2 +#define PIT_BASE 0x40 typedef struct PITChannelState { int count; /* can be 65536 */ @@ -48,47 +46,56 @@ typedef struct PITChannelState { u8 gate; /* timer start */ s64 count_load_time; /* irq handling */ - s64 next_transition_time; - int irq; - struct hvm_time_info *hvm_time; - u32 period; /* period(ns) based on count */ + struct vcpu *vcpu; + struct periodic_time *pt; } PITChannelState; - -struct hvm_time_info { - /* extra info for the mode 2 channel */ - struct timer pit_timer; - struct vcpu *vcpu; /* which vcpu the ac_timer bound to */ - u64 period_cycles; /* pit frequency in cpu cycles */ - s_time_t count_advance; /* accumulated count advance since last fire */ - s_time_t count_point; /* last point accumulating count advance */ - unsigned int pending_intr_nr; /* the couner for pending timer interrupts */ - int first_injected; /* flag to prevent shadow window */ - s64 cache_tsc_offset; /* cache of VMCS TSC_OFFSET offset */ - u64 last_pit_gtime; /* guest time when last pit is injected */ + +/* + * Abstract layer of periodic time, one short time. + */ +struct periodic_time { + char enabled; /* enabled */ + char one_shot; /* one shot time */ + char irq; + char first_injected; /* flag to prevent shadow window */ + u32 pending_intr_nr; /* the couner for pending timer interrupts */ + u32 period; /* frequency in ns */ + u64 period_cycles; /* frequency in cpu cycles */ + s_time_t scheduled; /* scheduled timer interrupt */ + u64 last_plt_gtime; /* platform time when last IRQ is injected */ + struct timer timer; /* ac_timer */ }; -typedef struct hvm_virpit { +typedef struct PITState { PITChannelState channels[3]; - struct hvm_time_info time_info; int speaker_data_on; int dummy_refresh_clock; -}hvm_virpit; +} PITState; +struct pl_time { /* platform time */ + struct periodic_time periodic_tm; + struct PITState vpit; + /* TODO: RTC/ACPI time */ +}; -static __inline__ s_time_t get_pit_scheduled( - struct vcpu *v, - struct hvm_virpit *vpit) +static __inline__ s_time_t get_scheduled( + struct vcpu *v, int irq, + struct periodic_time *pt) { - struct PITChannelState *s = &(vpit->channels[0]); - if ( is_irq_enabled(v, 0) ) { - return s->next_transition_time; + if ( is_irq_enabled(v, irq) ) { + return pt->scheduled; } else return -1; } /* to hook the ioreq packet to get the PIT initialization info */ -extern void pit_init(struct hvm_virpit *pit, struct vcpu *v); -extern void pickup_deactive_ticks(struct hvm_virpit *vpit); +extern void hvm_hooks_assist(struct vcpu *v); +extern void pickup_deactive_ticks(struct periodic_time *vpit); +extern u64 hvm_get_guest_time(struct vcpu *v); +extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, char irq, char one_shot); +extern void destroy_periodic_time(struct periodic_time *pt); +void pit_init(struct vcpu *v, unsigned long cpu_khz); +void pt_timer_fn(void *data); #endif /* __ASM_X86_HVM_VPIT_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/string.h --- a/xen/include/asm-x86/string.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/string.h Tue May 30 14:30:34 2006 -0500 @@ -2,152 +2,6 @@ #define __X86_STRING_H__ #include <xen/config.h> - -#define __HAVE_ARCH_STRCPY -static inline char *strcpy(char *dest, const char *src) -{ - long d0, d1, d2; - __asm__ __volatile__ ( - "1: lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - : "=&S" (d0), "=&D" (d1), "=&a" (d2) - : "0" (src), "1" (dest) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *dest, const char *src, size_t count) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - "1: dec %2 \n" - " js 2f \n" - " lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - " rep ; stosb \n" - "2: \n" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - : "0" (src), "1" (dest), "2" (count) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRCAT -static inline char *strcat(char *dest, const char *src) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - " repne ; scasb \n" - " dec %1 \n" - "1: lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRNCAT -static inline char *strncat(char *dest, const char *src, size_t count) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - " repne ; scasb \n" - " dec %1 \n" - " mov %8,%3 \n" - "1: dec %3 \n" - " js 2f \n" - " lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - "2: xor %%eax,%%eax\n" - " stosb" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count) - : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRCMP -static inline int strcmp(const char *cs, const char *ct) -{ - long d0, d1; - register int __res; - __asm__ __volatile__ ( - "1: lodsb \n" - " scasb \n" - " jne 2f \n" - " test %%al,%%al \n" - " jne 1b \n" - " xor %%eax,%%eax\n" - " jmp 3f \n" - "2: sbb %%eax,%%eax\n" - " or $1,%%al \n" - "3: \n" - : "=a" (__res), "=&S" (d0), "=&D" (d1) - : "1" (cs), "2" (ct) ); - return __res; -} - -#define __HAVE_ARCH_STRNCMP -static inline int strncmp(const char *cs, const char *ct, size_t count) -{ - long d0, d1, d2; - register int __res; - __asm__ __volatile__ ( - "1: dec %3 \n" - " js 2f \n" - " lodsb \n" - " scasb \n" - " jne 3f \n" - " test %%al,%%al \n" - " jne 1b \n" - "2: xor %%eax,%%eax\n" - " jmp 4f \n" - "3: sbb %%eax,%%eax\n" - " or $1,%%al \n" - "4: \n" - : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - : "1" (cs), "2" (ct), "3" (count) ); - return __res; -} - -#define __HAVE_ARCH_STRCHR -static inline char *strchr(const char *s, int c) -{ - long d0; - register char *__res; - __asm__ __volatile__ ( - " mov %%al,%%ah \n" - "1: lodsb \n" - " cmp %%ah,%%al \n" - " je 2f \n" - " test %%al,%%al \n" - " jne 1b \n" - " mov $1,%1 \n" - "2: mov %1,%0 \n" - " dec %0 \n" - : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) ); - return __res; -} - -#define __HAVE_ARCH_STRLEN -static inline size_t strlen(const char *s) -{ - long d0; - register int __res; - __asm__ __volatile__ ( - " repne ; scasb \n" - " notl %0 \n" - " decl %0 \n" - : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) ); - return __res; -} static inline void *__variable_memcpy(void *to, const void *from, size_t n) { @@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v #define __HAVE_ARCH_MEMCMP #define memcmp __builtin_memcmp -#define __HAVE_ARCH_MEMCHR -static inline void *memchr(const void *cs, int c, size_t count) -{ - long d0; - register void *__res; - if ( count == 0 ) - return NULL; - __asm__ __volatile__ ( - " repne ; scasb\n" - " je 1f \n" - " mov $1,%0 \n" - "1: dec %0 \n" - : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) ); - return __res; -} - static inline void *__memset_generic(void *s, char c, size_t count) { long d0, d1; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/x86_emulate.h --- a/xen/include/asm-x86/x86_emulate.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/asm-x86/x86_emulate.h Tue May 30 14:30:34 2006 -0500 @@ -9,8 +9,10 @@ #ifndef __X86_EMULATE_H__ #define __X86_EMULATE_H__ -/* - * x86_mem_emulator: +struct x86_emulate_ctxt; + +/* + * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. * There are two categories of operation: those that act on ordinary memory @@ -47,7 +49,7 @@ #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ -struct x86_mem_emulator +struct x86_emulate_ops { /* * read_std: Read bytes of standard (non-emulated/special) memory. @@ -59,7 +61,8 @@ struct x86_mem_emulator int (*read_std)( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * write_std: Write bytes of standard (non-emulated/special) memory. @@ -71,7 +74,8 @@ struct x86_mem_emulator int (*write_std)( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * read_emulated: Read bytes from emulated/special memory area. @@ -82,7 +86,8 @@ struct x86_mem_emulator int (*read_emulated)( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * write_emulated: Read bytes from emulated/special memory area. @@ -93,7 +98,8 @@ struct x86_mem_emulator int (*write_emulated)( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -107,11 +113,12 @@ struct x86_mem_emulator unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes); - - /* - * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an - * emulated/special memory area. + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + + /* + * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an + * emulated/special memory area. * @addr: [IN ] Linear address to access. * @old: [IN ] Value expected to be current at @addr. * @new: [IN ] Value to write to @addr. @@ -126,7 +133,8 @@ struct x86_mem_emulator unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, - unsigned long new_hi); + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt); }; /* Standard reader/writer functions that callers may wish to use. */ @@ -134,14 +142,28 @@ x86_emulate_read_std( x86_emulate_read_std( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); extern int x86_emulate_write_std( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); struct cpu_user_regs; + +struct x86_emulate_ctxt +{ + /* Register state before/after emulation. */ + struct cpu_user_regs *regs; + + /* Linear faulting address (if emulating a page-faulting instruction). */ + unsigned long cr2; + + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ + int mode; +}; /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -159,25 +181,19 @@ struct cpu_user_regs; /* * x86_emulate_memop: Emulate an instruction that faulted attempting to * read/write a 'special' memory area. - * @regs: Register state at time of fault. - * @cr2: Linear faulting address within an emulated/special memory area. - * @ops: Interface to access special memory. - * @mode: Emulated execution mode, represented by an X86EMUL_MODE value. * Returns -1 on failure, 0 on success. */ -extern int +int x86_emulate_memop( - struct cpu_user_regs *regs, - unsigned long cr2, - struct x86_mem_emulator *ops, - int mode); + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); /* * Given the 'reg' portion of a ModRM byte, and a register block, return a * pointer into the block that addresses the relevant register. * @highbyte_regs specifies whether to decode AH,CH,DH,BH. */ -extern void * +void * decode_register( uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs); diff -r e74246451527 -r f54d38cea8ac xen/include/public/acm_ops.h --- a/xen/include/public/acm_ops.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/acm_ops.h Tue May 30 14:30:34 2006 -0500 @@ -2,7 +2,7 @@ * acm_ops.h: Xen access control module hypervisor commands * * Reiner Sailer <sailer@xxxxxxxxxxxxxx> - * Copyright (c) 2005, International Business Machines Corporation. + * Copyright (c) 2005,2006 International Business Machines Corporation. */ #ifndef __XEN_PUBLIC_ACM_OPS_H__ @@ -17,36 +17,50 @@ * This makes sure that old versions of acm tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define ACM_INTERFACE_VERSION 0xAAAA0006 +#define ACM_INTERFACE_VERSION 0xAAAA0007 /************************************************************************/ -#define ACM_SETPOLICY 4 +/* + * Prototype for this hypercall is: + * int acm_op(int cmd, void *args) + * @cmd == ACMOP_??? (access control module operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + + +#define ACMOP_setpolicy 1 struct acm_setpolicy { - /* OUT variables */ + /* IN */ + uint32_t interface_version; void *pushcache; uint32_t pushcache_size; }; -#define ACM_GETPOLICY 5 +#define ACMOP_getpolicy 2 struct acm_getpolicy { - /* OUT variables */ + /* IN */ + uint32_t interface_version; void *pullcache; uint32_t pullcache_size; }; -#define ACM_DUMPSTATS 6 +#define ACMOP_dumpstats 3 struct acm_dumpstats { + /* IN */ + uint32_t interface_version; void *pullcache; uint32_t pullcache_size; }; -#define ACM_GETSSID 7 +#define ACMOP_getssid 4 enum get_type {UNSET=0, SSIDREF, DOMAINID}; struct acm_getssid { + /* IN */ + uint32_t interface_version; enum get_type get_ssid_by; union { domaintype_t domainid; @@ -56,9 +70,11 @@ struct acm_getssid { uint32_t ssidbuf_size; }; -#define ACM_GETDECISION 8 +#define ACMOP_getdecision 5 struct acm_getdecision { - enum get_type get_decision_by1; /* in */ + /* IN */ + uint32_t interface_version; + enum get_type get_decision_by1; enum get_type get_decision_by2; union { domaintype_t domainid; @@ -69,23 +85,11 @@ struct acm_getdecision { ssidref_t ssidref; } id2; enum acm_hook_type hook; - int acm_decision; /* out */ + /* OUT */ + int acm_decision; }; -typedef struct acm_op { - uint32_t cmd; - uint32_t interface_version; /* ACM_INTERFACE_VERSION */ - union { - struct acm_setpolicy setpolicy; - struct acm_getpolicy getpolicy; - struct acm_dumpstats dumpstats; - struct acm_getssid getssid; - struct acm_getdecision getdecision; - } u; -} acm_op_t; -DEFINE_XEN_GUEST_HANDLE(acm_op_t); - -#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ +#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ /* * Local variables: diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/arch-ia64.h Tue May 30 14:30:34 2006 -0500 @@ -38,15 +38,17 @@ DEFINE_XEN_GUEST_HANDLE(void); #ifndef __ASSEMBLY__ #define MAX_NR_SECTION 32 /* at most 32 memory holes */ -typedef struct { +struct mm_section { unsigned long start; /* start of memory hole */ unsigned long end; /* end of memory hole */ -} mm_section_t; - -typedef struct { +}; +typedef struct mm_section mm_section_t; + +struct pmt_entry { unsigned long mfn : 56; unsigned long type: 8; -} pmt_entry_t; +}; +typedef struct pmt_entry pmt_entry_t; #define GPFN_MEM (0UL << 56) /* Guest pfn is normal mem */ #define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */ @@ -93,10 +95,11 @@ typedef struct { * NB. This may become a 64-bit count with no shift. If this happens then the * structure size will still be 8 bytes, so no other alignments will change. */ -typedef struct { +struct tsc_timestamp { unsigned int tsc_bits; /* 0: 32 bits read from the CPU's TSC. */ unsigned int tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */ -} tsc_timestamp_t; /* 8 bytes */ +}; /* 8 bytes */ +typedef struct tsc_timestamp tsc_timestamp_t; struct pt_fpreg { union { @@ -105,7 +108,7 @@ struct pt_fpreg { } u; }; -typedef struct cpu_user_regs{ +struct cpu_user_regs { /* The following registers are saved by SAVE_MIN: */ unsigned long b6; /* scratch */ unsigned long b7; /* scratch */ @@ -179,9 +182,10 @@ typedef struct cpu_user_regs{ unsigned long eml_unat; /* used for emulating instruction */ unsigned long rfi_pfs; /* used for elulating rfi */ -}cpu_user_regs_t; - -typedef union { +}; +typedef struct cpu_user_regs cpu_user_regs_t; + +union vac { unsigned long value; struct { int a_int:1; @@ -193,9 +197,10 @@ typedef union { int a_bsw:1; long reserved:57; }; -} vac_t; - -typedef union { +}; +typedef union vac vac_t; + +union vdc { unsigned long value; struct { int d_vmsw:1; @@ -206,11 +211,12 @@ typedef union { int d_itm:1; long reserved:58; }; -} vdc_t; - -typedef struct { - vac_t vac; - vdc_t vdc; +}; +typedef union vdc vdc_t; + +struct mapped_regs { + union vac vac; + union vdc vdc; unsigned long virt_env_vaddr; unsigned long reserved1[29]; unsigned long vhpi; @@ -290,27 +296,32 @@ typedef struct { unsigned long reserved6[3456]; unsigned long vmm_avail[128]; unsigned long reserved7[4096]; -} mapped_regs_t; - -typedef struct { - mapped_regs_t *privregs; - int evtchn_vector; -} arch_vcpu_info_t; +}; +typedef struct mapped_regs mapped_regs_t; + +struct arch_vcpu_info { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; typedef mapped_regs_t vpd_t; -typedef struct { +struct arch_shared_info { unsigned int flags; unsigned long start_info_pfn; -} arch_shared_info_t; - -typedef struct { + + /* Interrupt vector for event channel. */ + int evtchn_vector; +}; +typedef struct arch_shared_info arch_shared_info_t; + +struct arch_initrd_info { unsigned long start; unsigned long size; -} arch_initrd_info_t; +}; +typedef struct arch_initrd_info arch_initrd_info_t; #define IA64_COMMAND_LINE_SIZE 512 -typedef struct vcpu_guest_context { +struct vcpu_guest_context { #define VGCF_FPU_VALID (1<<0) #define VGCF_VMX_GUEST (1<<1) #define VGCF_IN_KERNEL (1<<2) @@ -320,19 +331,17 @@ typedef struct vcpu_guest_context { unsigned long sys_pgnr; /* System pages out of domain memory */ unsigned long vm_assist; /* VMASST_TYPE_* bitmap, now none on IPF */ - cpu_user_regs_t regs; - arch_vcpu_info_t vcpu; - arch_shared_info_t shared; - arch_initrd_info_t initrd; + struct cpu_user_regs regs; + struct mapped_regs *privregs; + struct arch_shared_info shared; + struct arch_initrd_info initrd; char cmdline[IA64_COMMAND_LINE_SIZE]; -} vcpu_guest_context_t; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); // dom0 vp op -#define __HYPERVISOR_ia64_dom0vp_op 256 // XXX sufficient large - // TODO - // arch specific hypercall - // number conversion +#define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0 #define IA64_DOM0VP_ioremap 0 // map io space in machine // address to dom0 physical // address space. @@ -352,10 +361,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte // to the corresponding // pseudo physical page frame // number of the caller domain -#define IA64_DOM0VP_populate_physmap 16 // allocate machine-contigusous - // memory region and - // map it to pseudo physical - // address #define IA64_DOM0VP_zap_physmap 17 // unmap and free pages // contained in the specified // pseudo physical region @@ -364,6 +369,32 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte // address space. #endif /* !__ASSEMBLY__ */ + +/* Hyperprivops. */ +#define HYPERPRIVOP_RFI 0x1 +#define HYPERPRIVOP_RSM_DT 0x2 +#define HYPERPRIVOP_SSM_DT 0x3 +#define HYPERPRIVOP_COVER 0x4 +#define HYPERPRIVOP_ITC_D 0x5 +#define HYPERPRIVOP_ITC_I 0x6 +#define HYPERPRIVOP_SSM_I 0x7 +#define HYPERPRIVOP_GET_IVR 0x8 +#define HYPERPRIVOP_GET_TPR 0x9 +#define HYPERPRIVOP_SET_TPR 0xa +#define HYPERPRIVOP_EOI 0xb +#define HYPERPRIVOP_SET_ITM 0xc +#define HYPERPRIVOP_THASH 0xd +#define HYPERPRIVOP_PTC_GA 0xe +#define HYPERPRIVOP_ITR_D 0xf +#define HYPERPRIVOP_GET_RR 0x10 +#define HYPERPRIVOP_SET_RR 0x11 +#define HYPERPRIVOP_SET_KR 0x12 +#define HYPERPRIVOP_FC 0x13 +#define HYPERPRIVOP_GET_CPUID 0x14 +#define HYPERPRIVOP_GET_PMD 0x15 +#define HYPERPRIVOP_GET_EFLAG 0x16 +#define HYPERPRIVOP_SET_EFLAG 0x17 +#define HYPERPRIVOP_MAX 0x17 #endif /* __HYPERVISOR_IF_IA64_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/arch-x86_32.h Tue May 30 14:30:34 2006 -0500 @@ -95,15 +95,16 @@ DEFINE_XEN_GUEST_HANDLE(void); #define TI_GET_IF(_ti) ((_ti)->flags & 4) #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -typedef struct trap_info { +struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ uint16_t cs; /* code selector */ unsigned long address; /* code offset */ -} trap_info_t; +}; +typedef struct trap_info trap_info_t; DEFINE_XEN_GUEST_HANDLE(trap_info_t); -typedef struct cpu_user_regs { +struct cpu_user_regs { uint32_t ebx; uint32_t ecx; uint32_t edx; @@ -124,7 +125,8 @@ typedef struct cpu_user_regs { uint16_t ds, _pad3; uint16_t fs, _pad4; uint16_t gs, _pad5; -} cpu_user_regs_t; +}; +typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ @@ -133,14 +135,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. */ -typedef struct vcpu_guest_context { +struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ #define VGCF_I387_VALID (1<<0) #define VGCF_HVM_GUEST (1<<1) #define VGCF_IN_KERNEL (1<<2) unsigned long flags; /* VGCF_* flags */ - cpu_user_regs_t user_regs; /* User-level CPU registers */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ @@ -152,25 +154,29 @@ typedef struct vcpu_guest_context { unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ unsigned long failsafe_callback_eip; unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ -} vcpu_guest_context_t; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -typedef struct arch_shared_info { +struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ unsigned long pfn_to_mfn_frame_list_list; unsigned long nmi_reason; -} arch_shared_info_t; - -typedef struct { +}; +typedef struct arch_shared_info arch_shared_info_t; + +struct arch_vcpu_info { unsigned long cr2; unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ -} arch_vcpu_info_t; - -typedef struct { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { unsigned long cs; unsigned long eip; -} xen_callback_t; +}; +typedef struct xen_callback xen_callback_t; #endif /* !__ASSEMBLY__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/arch-x86_64.h Tue May 30 14:30:34 2006 -0500 @@ -150,12 +150,13 @@ struct iret_context { #define TI_GET_IF(_ti) ((_ti)->flags & 4) #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -typedef struct trap_info { +struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ uint16_t cs; /* code selector */ unsigned long address; /* code offset */ -} trap_info_t; +}; +typedef struct trap_info trap_info_t; DEFINE_XEN_GUEST_HANDLE(trap_info_t); #ifdef __GNUC__ @@ -166,7 +167,7 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t); #define __DECL_REG(name) uint64_t r ## name #endif -typedef struct cpu_user_regs { +struct cpu_user_regs { uint64_t r15; uint64_t r14; uint64_t r13; @@ -195,7 +196,8 @@ typedef struct cpu_user_regs { uint16_t ds, _pad4[3]; uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ -} cpu_user_regs_t; +}; +typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); #undef __DECL_REG @@ -206,14 +208,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. */ -typedef struct vcpu_guest_context { +struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ #define VGCF_I387_VALID (1<<0) #define VGCF_HVM_GUEST (1<<1) #define VGCF_IN_KERNEL (1<<2) unsigned long flags; /* VGCF_* flags */ - cpu_user_regs_t user_regs; /* User-level CPU registers */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ @@ -228,20 +230,23 @@ typedef struct vcpu_guest_context { uint64_t fs_base; uint64_t gs_base_kernel; uint64_t gs_base_user; -} vcpu_guest_context_t; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -typedef struct arch_shared_info { +struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ unsigned long pfn_to_mfn_frame_list_list; unsigned long nmi_reason; -} arch_shared_info_t; - -typedef struct { +}; +typedef struct arch_shared_info arch_shared_info_t; + +struct arch_vcpu_info { unsigned long cr2; unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ -} arch_vcpu_info_t; +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; typedef unsigned long xen_callback_t; diff -r e74246451527 -r f54d38cea8ac xen/include/public/callback.h --- a/xen/include/public/callback.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/callback.h Tue May 30 14:30:34 2006 -0500 @@ -32,10 +32,11 @@ * Register a callback. */ #define CALLBACKOP_register 0 -typedef struct callback_register { +struct callback_register { int type; xen_callback_t address; -} callback_register_t; +}; +typedef struct callback_register callback_register_t; DEFINE_XEN_GUEST_HANDLE(callback_register_t); /* @@ -45,9 +46,10 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe * you attempt to unregister such a callback. */ #define CALLBACKOP_unregister 1 -typedef struct callback_unregister { +struct callback_unregister { int type; -} callback_unregister_t; +}; +typedef struct callback_unregister callback_unregister_t; DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); #endif /* __XEN_PUBLIC_CALLBACK_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/dom0_ops.h Tue May 30 14:30:34 2006 -0500 @@ -24,14 +24,15 @@ /************************************************************************/ #define DOM0_GETMEMLIST 2 -typedef struct dom0_getmemlist { +struct dom0_getmemlist { /* IN variables. */ domid_t domain; unsigned long max_pfns; XEN_GUEST_HANDLE(ulong) buffer; /* OUT variables. */ unsigned long num_pfns; -} dom0_getmemlist_t; +}; +typedef struct dom0_getmemlist dom0_getmemlist_t; DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t); #define DOM0_SCHEDCTL 6 @@ -45,39 +46,43 @@ DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t); #define DOM0_CREATEDOMAIN 8 -typedef struct dom0_createdomain { +struct dom0_createdomain { /* IN parameters */ uint32_t ssidref; xen_domain_handle_t handle; /* IN/OUT parameters. */ /* Identifier for new domain (auto-allocate if zero is specified). */ domid_t domain; -} dom0_createdomain_t; +}; +typedef struct dom0_createdomain dom0_createdomain_t; DEFINE_XEN_GUEST_HANDLE(dom0_createdomain_t); #define DOM0_DESTROYDOMAIN 9 -typedef struct dom0_destroydomain { - /* IN variables. */ - domid_t domain; -} dom0_destroydomain_t; +struct dom0_destroydomain { + /* IN variables. */ + domid_t domain; +}; +typedef struct dom0_destroydomain dom0_destroydomain_t; DEFINE_XEN_GUEST_HANDLE(dom0_destroydomain_t); #define DOM0_PAUSEDOMAIN 10 -typedef struct dom0_pausedomain { +struct dom0_pausedomain { /* IN parameters. */ domid_t domain; -} dom0_pausedomain_t; +}; +typedef struct dom0_pausedomain dom0_pausedomain_t; DEFINE_XEN_GUEST_HANDLE(dom0_pausedomain_t); #define DOM0_UNPAUSEDOMAIN 11 -typedef struct dom0_unpausedomain { +struct dom0_unpausedomain { /* IN parameters. */ domid_t domain; -} dom0_unpausedomain_t; +}; +typedef struct dom0_unpausedomain dom0_unpausedomain_t; DEFINE_XEN_GUEST_HANDLE(dom0_unpausedomain_t); #define DOM0_GETDOMAININFO 12 -typedef struct dom0_getdomaininfo { +struct dom0_getdomaininfo { /* IN variables. */ domid_t domain; /* NB. IN/OUT variable. */ /* OUT variables. */ @@ -99,21 +104,23 @@ typedef struct dom0_getdomaininfo { uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; -} dom0_getdomaininfo_t; +}; +typedef struct dom0_getdomaininfo dom0_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfo_t); #define DOM0_SETVCPUCONTEXT 13 -typedef struct dom0_setvcpucontext { +struct dom0_setvcpucontext { /* IN variables. */ domid_t domain; uint32_t vcpu; /* IN/OUT parameters */ XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; -} dom0_setvcpucontext_t; +}; +typedef struct dom0_setvcpucontext dom0_setvcpucontext_t; DEFINE_XEN_GUEST_HANDLE(dom0_setvcpucontext_t); #define DOM0_MSR 15 -typedef struct dom0_msr { +struct dom0_msr { /* IN variables. */ uint32_t write; cpumap_t cpu_mask; @@ -123,7 +130,8 @@ typedef struct dom0_msr { /* OUT variables. */ uint32_t out1; uint32_t out2; -} dom0_msr_t; +}; +typedef struct dom0_msr dom0_msr_t; DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); /* @@ -131,12 +139,13 @@ DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); * 1 January, 1970 if the current system time was <system_time>. */ #define DOM0_SETTIME 17 -typedef struct dom0_settime { +struct dom0_settime { /* IN variables. */ uint32_t secs; uint32_t nsecs; uint64_t system_time; -} dom0_settime_t; +}; +typedef struct dom0_settime dom0_settime_t; DEFINE_XEN_GUEST_HANDLE(dom0_settime_t); #define DOM0_GETPAGEFRAMEINFO 18 @@ -151,44 +160,47 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t); #define LTAB_MASK XTAB #define LTABTYPE_MASK (0x7<<LTAB_SHIFT) -typedef struct dom0_getpageframeinfo { +struct dom0_getpageframeinfo { /* IN variables. */ unsigned long mfn; /* Machine page frame number to query. */ domid_t domain; /* To which domain does the frame belong? */ /* OUT variables. */ /* Is the page PINNED to a type? */ uint32_t type; /* see above type defs */ -} dom0_getpageframeinfo_t; +}; +typedef struct dom0_getpageframeinfo dom0_getpageframeinfo_t; DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo_t); /* * Read console content from Xen buffer ring. */ #define DOM0_READCONSOLE 19 -typedef struct dom0_readconsole { +struct dom0_readconsole { /* IN variables. */ uint32_t clear; /* Non-zero -> clear after reading. */ /* IN/OUT variables. */ XEN_GUEST_HANDLE(char) buffer; /* In: Buffer start; Out: Used buffer start */ uint32_t count; /* In: Buffer size; Out: Used buffer size */ -} dom0_readconsole_t; +}; +typedef struct dom0_readconsole dom0_readconsole_t; DEFINE_XEN_GUEST_HANDLE(dom0_readconsole_t); /* * Set which physical cpus a vcpu can execute on. */ #define DOM0_SETVCPUAFFINITY 20 -typedef struct dom0_setvcpuaffinity { +struct dom0_setvcpuaffinity { /* IN variables. */ domid_t domain; uint32_t vcpu; cpumap_t cpumap; -} dom0_setvcpuaffinity_t; +}; +typedef struct dom0_setvcpuaffinity dom0_setvcpuaffinity_t; DEFINE_XEN_GUEST_HANDLE(dom0_setvcpuaffinity_t); /* Get trace buffers machine base address */ #define DOM0_TBUFCONTROL 21 -typedef struct dom0_tbufcontrol { +struct dom0_tbufcontrol { /* IN variables */ #define DOM0_TBUF_GET_INFO 0 #define DOM0_TBUF_SET_CPU_MASK 1 @@ -203,14 +215,15 @@ typedef struct dom0_tbufcontrol { /* OUT variables */ unsigned long buffer_mfn; uint32_t size; -} dom0_tbufcontrol_t; +}; +typedef struct dom0_tbufcontrol dom0_tbufcontrol_t; DEFINE_XEN_GUEST_HANDLE(dom0_tbufcontrol_t); /* * Get physical information about the host machine */ #define DOM0_PHYSINFO 22 -typedef struct dom0_physinfo { +struct dom0_physinfo { uint32_t threads_per_core; uint32_t cores_per_socket; uint32_t sockets_per_node; @@ -219,17 +232,19 @@ typedef struct dom0_physinfo { unsigned long total_pages; unsigned long free_pages; uint32_t hw_cap[8]; -} dom0_physinfo_t; +}; +typedef struct dom0_physinfo dom0_physinfo_t; DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t); /* * Get the ID of the current scheduler. */ #define DOM0_SCHED_ID 24 -typedef struct dom0_sched_id { +struct dom0_sched_id { /* OUT variable */ uint32_t sched_id; -} dom0_sched_id_t; +}; +typedef struct dom0_physinfo dom0_sched_id_t; DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t); /* @@ -246,15 +261,16 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t) #define DOM0_SHADOW_CONTROL_OP_CLEAN 11 #define DOM0_SHADOW_CONTROL_OP_PEEK 12 -typedef struct dom0_shadow_control_stats { +struct dom0_shadow_control_stats { uint32_t fault_count; uint32_t dirty_count; uint32_t dirty_net_count; uint32_t dirty_block_count; -} dom0_shadow_control_stats_t; +}; +typedef struct dom0_shadow_control_stats dom0_shadow_control_stats_t; DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_stats_t); -typedef struct dom0_shadow_control { +struct dom0_shadow_control { /* IN variables. */ domid_t domain; uint32_t op; @@ -262,26 +278,29 @@ typedef struct dom0_shadow_control { /* IN/OUT variables. */ unsigned long pages; /* size of buffer, updated with actual size */ /* OUT variables. */ - dom0_shadow_control_stats_t stats; -} dom0_shadow_control_t; + struct dom0_shadow_control_stats stats; +}; +typedef struct dom0_shadow_control dom0_shadow_control_t; DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_t); #define DOM0_SETDOMAINMAXMEM 28 -typedef struct dom0_setdomainmaxmem { +struct dom0_setdomainmaxmem { /* IN variables. */ domid_t domain; unsigned long max_memkb; -} dom0_setdomainmaxmem_t; +}; +typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t; DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t); #define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */ -typedef struct dom0_getpageframeinfo2 { +struct dom0_getpageframeinfo2 { /* IN variables. */ domid_t domain; unsigned long num; /* IN/OUT variables. */ XEN_GUEST_HANDLE(ulong) array; -} dom0_getpageframeinfo2_t; +}; +typedef struct dom0_getpageframeinfo2 dom0_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo2_t); /* @@ -292,7 +311,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram * (x86-specific). */ #define DOM0_ADD_MEMTYPE 31 -typedef struct dom0_add_memtype { +struct dom0_add_memtype { /* IN variables. */ unsigned long mfn; unsigned long nr_mfns; @@ -300,7 +319,8 @@ typedef struct dom0_add_memtype { /* OUT variables. */ uint32_t handle; uint32_t reg; -} dom0_add_memtype_t; +}; +typedef struct dom0_add_memtype dom0_add_memtype_t; DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t); /* @@ -311,23 +331,25 @@ DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype * (x86-specific). */ #define DOM0_DEL_MEMTYPE 32 -typedef struct dom0_del_memtype { +struct dom0_del_memtype { /* IN variables. */ uint32_t handle; uint32_t reg; -} dom0_del_memtype_t; +}; +typedef struct dom0_del_memtype dom0_del_memtype_t; DEFINE_XEN_GUEST_HANDLE(dom0_del_memtype_t); /* Read current type of an MTRR (x86-specific). */ #define DOM0_READ_MEMTYPE 33 -typedef struct dom0_read_memtype { +struct dom0_read_memtype { /* IN variables. */ uint32_t reg; /* OUT variables. */ unsigned long mfn; unsigned long nr_mfns; uint32_t type; -} dom0_read_memtype_t; +}; +typedef struct dom0_read_memtype dom0_read_memtype_t; DEFINE_XEN_GUEST_HANDLE(dom0_read_memtype_t); /* Interface for controlling Xen software performance counters. */ @@ -335,50 +357,56 @@ DEFINE_XEN_GUEST_HANDLE(dom0_read_memtyp /* Sub-operations: */ #define DOM0_PERFCCONTROL_OP_RESET 1 /* Reset all counters to zero. */ #define DOM0_PERFCCONTROL_OP_QUERY 2 /* Get perfctr information. */ -typedef struct dom0_perfc_desc { +struct dom0_perfc_desc { char name[80]; /* name of perf counter */ uint32_t nr_vals; /* number of values for this counter */ uint32_t vals[64]; /* array of values */ -} dom0_perfc_desc_t; +}; +typedef struct dom0_perfc_desc dom0_perfc_desc_t; DEFINE_XEN_GUEST_HANDLE(dom0_perfc_desc_t); -typedef struct dom0_perfccontrol { + +struct dom0_perfccontrol { /* IN variables. */ uint32_t op; /* DOM0_PERFCCONTROL_OP_??? */ /* OUT variables. */ uint32_t nr_counters; /* number of counters */ XEN_GUEST_HANDLE(dom0_perfc_desc_t) desc; /* counter information (or NULL) */ -} dom0_perfccontrol_t; +}; +typedef struct dom0_perfccontrol dom0_perfccontrol_t; DEFINE_XEN_GUEST_HANDLE(dom0_perfccontrol_t); #define DOM0_MICROCODE 35 -typedef struct dom0_microcode { +struct dom0_microcode { /* IN variables. */ XEN_GUEST_HANDLE(void) data; /* Pointer to microcode data */ uint32_t length; /* Length of microcode data. */ -} dom0_microcode_t; +}; +typedef struct dom0_microcode dom0_microcode_t; DEFINE_XEN_GUEST_HANDLE(dom0_microcode_t); #define DOM0_IOPORT_PERMISSION 36 -typedef struct dom0_ioport_permission { +struct dom0_ioport_permission { domid_t domain; /* domain to be affected */ uint32_t first_port; /* first port int range */ uint32_t nr_ports; /* size of port range */ uint8_t allow_access; /* allow or deny access to range? */ -} dom0_ioport_permission_t; +}; +typedef struct dom0_ioport_permission dom0_ioport_permission_t; DEFINE_XEN_GUEST_HANDLE(dom0_ioport_permission_t); #define DOM0_GETVCPUCONTEXT 37 -typedef struct dom0_getvcpucontext { +struct dom0_getvcpucontext { /* IN variables. */ domid_t domain; /* domain to be affected */ uint32_t vcpu; /* vcpu # */ /* OUT variables. */ XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; -} dom0_getvcpucontext_t; +}; +typedef struct dom0_getvcpucontext dom0_getvcpucontext_t; DEFINE_XEN_GUEST_HANDLE(dom0_getvcpucontext_t); #define DOM0_GETVCPUINFO 43 -typedef struct dom0_getvcpuinfo { +struct dom0_getvcpuinfo { /* IN variables. */ domid_t domain; /* domain to be affected */ uint32_t vcpu; /* vcpu # */ @@ -389,92 +417,104 @@ typedef struct dom0_getvcpuinfo { uint64_t cpu_time; /* total cpu time consumed (ns) */ uint32_t cpu; /* current mapping */ cpumap_t cpumap; /* allowable mapping */ -} dom0_getvcpuinfo_t; +}; +typedef struct dom0_getvcpuinfo dom0_getvcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(dom0_getvcpuinfo_t); #define DOM0_GETDOMAININFOLIST 38 -typedef struct dom0_getdomaininfolist { +struct dom0_getdomaininfolist { /* IN variables. */ domid_t first_domain; uint32_t max_domains; XEN_GUEST_HANDLE(dom0_getdomaininfo_t) buffer; /* OUT variables. */ uint32_t num_domains; -} dom0_getdomaininfolist_t; +}; +typedef struct dom0_getdomaininfolist dom0_getdomaininfolist_t; DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfolist_t); #define DOM0_PLATFORM_QUIRK 39 #define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ #define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ #define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ -typedef struct dom0_platform_quirk { +struct dom0_platform_quirk { /* IN variables. */ uint32_t quirk_id; -} dom0_platform_quirk_t; +}; +typedef struct dom0_platform_quirk dom0_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(dom0_platform_quirk_t); -#define DOM0_PHYSICAL_MEMORY_MAP 40 -typedef struct dom0_memory_map_entry { +#define DOM0_PHYSICAL_MEMORY_MAP 40 /* Unimplemented from 3.0.3 onwards */ +struct dom0_memory_map_entry { uint64_t start, end; uint32_t flags; /* reserved */ uint8_t is_ram; -} dom0_memory_map_entry_t; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); -typedef struct dom0_physical_memory_map { + +struct dom0_physical_memory_map { /* IN variables. */ uint32_t max_map_entries; /* OUT variables. */ uint32_t nr_map_entries; XEN_GUEST_HANDLE(dom0_memory_map_entry_t) memory_map; -} dom0_physical_memory_map_t; +}; +typedef struct dom0_physical_memory_map dom0_physical_memory_map_t; DEFINE_XEN_GUEST_HANDLE(dom0_physical_memory_map_t); #define DOM0_MAX_VCPUS 41 -typedef struct dom0_max_vcpus { +struct dom0_max_vcpus { domid_t domain; /* domain to be affected */ uint32_t max; /* maximum number of vcpus */ -} dom0_max_vcpus_t; +}; +typedef struct dom0_max_vcpus dom0_max_vcpus_t; DEFINE_XEN_GUEST_HANDLE(dom0_max_vcpus_t); #define DOM0_SETDOMAINHANDLE 44 -typedef struct dom0_setdomainhandle { +struct dom0_setdomainhandle { domid_t domain; xen_domain_handle_t handle; -} dom0_setdomainhandle_t; +}; +typedef struct dom0_setdomainhandle dom0_setdomainhandle_t; DEFINE_XEN_GUEST_HANDLE(dom0_setdomainhandle_t); #define DOM0_SETDEBUGGING 45 -typedef struct dom0_setdebugging { +struct dom0_setdebugging { domid_t domain; uint8_t enable; -} dom0_setdebugging_t; +}; +typedef struct dom0_setdebugging dom0_setdebugging_t; DEFINE_XEN_GUEST_HANDLE(dom0_setdebugging_t); #define DOM0_IRQ_PERMISSION 46 -typedef struct dom0_irq_permission { +struct dom0_irq_permission { domid_t domain; /* domain to be affected */ uint8_t pirq; uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ -} dom0_irq_permission_t; +}; +typedef struct dom0_irq_permission dom0_irq_permission_t; DEFINE_XEN_GUEST_HANDLE(dom0_irq_permission_t); #define DOM0_IOMEM_PERMISSION 47 -typedef struct dom0_iomem_permission { +struct dom0_iomem_permission { domid_t domain; /* domain to be affected */ unsigned long first_mfn; /* first page (physical page number) in range */ unsigned long nr_mfns; /* number of pages in range (>0) */ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ -} dom0_iomem_permission_t; +}; +typedef struct dom0_iomem_permission dom0_iomem_permission_t; DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permission_t); #define DOM0_HYPERCALL_INIT 48 -typedef struct dom0_hypercall_init { +struct dom0_hypercall_init { domid_t domain; /* domain to be affected */ unsigned long mfn; /* machine frame to be initialised */ -} dom0_hypercall_init_t; +}; +typedef struct dom0_hypercall_init dom0_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t); -typedef struct dom0_op { +struct dom0_op { uint32_t cmd; uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ union { @@ -517,7 +557,8 @@ typedef struct dom0_op { struct dom0_hypercall_init hypercall_init; uint8_t pad[128]; } u; -} dom0_op_t; +}; +typedef struct dom0_op dom0_op_t; DEFINE_XEN_GUEST_HANDLE(dom0_op_t); #endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/event_channel.h --- a/xen/include/public/event_channel.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/event_channel.h Tue May 30 14:30:34 2006 -0500 @@ -28,12 +28,13 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); * 2. <rdom> may be DOMID_SELF, allowing loopback connections. */ #define EVTCHNOP_alloc_unbound 6 -typedef struct evtchn_alloc_unbound { +struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; /* OUT parameters */ evtchn_port_t port; -} evtchn_alloc_unbound_t; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between @@ -45,13 +46,14 @@ typedef struct evtchn_alloc_unbound { * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. */ #define EVTCHNOP_bind_interdomain 0 -typedef struct evtchn_bind_interdomain { +struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; evtchn_port_t remote_port; /* OUT parameters. */ evtchn_port_t local_port; -} evtchn_bind_interdomain_t; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified @@ -66,13 +68,14 @@ typedef struct evtchn_bind_interdomain { * binding cannot be changed. */ #define EVTCHNOP_bind_virq 1 -typedef struct evtchn_bind_virq { +struct evtchn_bind_virq { /* IN parameters. */ uint32_t virq; uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; -} evtchn_bind_virq_t; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. @@ -81,14 +84,15 @@ typedef struct evtchn_bind_virq { * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ #define EVTCHNOP_bind_pirq 2 -typedef struct evtchn_bind_pirq { +struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 uint32_t flags; /* BIND_PIRQ__* */ /* OUT parameters. */ evtchn_port_t port; -} evtchn_bind_pirq_t; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. @@ -97,11 +101,12 @@ typedef struct evtchn_bind_pirq { * may not be changed. */ #define EVTCHNOP_bind_ipi 7 -typedef struct evtchn_bind_ipi { - uint32_t vcpu; - /* OUT parameters. */ - evtchn_port_t port; -} evtchn_bind_ipi_t; +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; /* * EVTCHNOP_close: Close a local event channel <port>. If the channel is @@ -109,20 +114,22 @@ typedef struct evtchn_bind_ipi { * (EVTCHNSTAT_unbound), awaiting a new connection. */ #define EVTCHNOP_close 3 -typedef struct evtchn_close { - /* IN parameters. */ - evtchn_port_t port; -} evtchn_close_t; +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is <port>. */ #define EVTCHNOP_send 4 -typedef struct evtchn_send { - /* IN parameters. */ - evtchn_port_t port; -} evtchn_send_t; +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; /* * EVTCHNOP_status: Get the current status of the communication channel which @@ -133,7 +140,7 @@ typedef struct evtchn_send { * channel for which <dom> is not DOMID_SELF. */ #define EVTCHNOP_status 5 -typedef struct evtchn_status { +struct evtchn_status { /* IN parameters */ domid_t dom; evtchn_port_t port; @@ -157,7 +164,8 @@ typedef struct evtchn_status { uint32_t pirq; /* EVTCHNSTAT_pirq */ uint32_t virq; /* EVTCHNSTAT_virq */ } u; -} evtchn_status_t; +}; +typedef struct evtchn_status evtchn_status_t; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an @@ -172,41 +180,44 @@ typedef struct evtchn_status { * has its binding reset to vcpu0). */ #define EVTCHNOP_bind_vcpu 8 -typedef struct evtchn_bind_vcpu { - /* IN parameters. */ - evtchn_port_t port; - uint32_t vcpu; -} evtchn_bind_vcpu_t; +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ #define EVTCHNOP_unmask 9 -typedef struct evtchn_unmask { - /* IN parameters. */ - evtchn_port_t port; -} evtchn_unmask_t; +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; /* * Argument to event_channel_op_compat() hypercall. Superceded by new * event_channel_op() hypercall since 0x00030202. */ -typedef struct evtchn_op { +struct evtchn_op { uint32_t cmd; /* EVTCHNOP_* */ union { - evtchn_alloc_unbound_t alloc_unbound; - evtchn_bind_interdomain_t bind_interdomain; - evtchn_bind_virq_t bind_virq; - evtchn_bind_pirq_t bind_pirq; - evtchn_bind_ipi_t bind_ipi; - evtchn_close_t close; - evtchn_send_t send; - evtchn_status_t status; - evtchn_bind_vcpu_t bind_vcpu; - evtchn_unmask_t unmask; + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; } u; -} evtchn_op_t; +}; +typedef struct evtchn_op evtchn_op_t; DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/grant_table.h Tue May 30 14:30:34 2006 -0500 @@ -71,7 +71,7 @@ * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -typedef struct grant_entry { +struct grant_entry { /* GTF_xxx: various type and flag information. [XEN,GST] */ #if defined(__powerpc__) ulong flags; @@ -85,7 +85,8 @@ typedef struct grant_entry { * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] */ uint32_t frame; -} grant_entry_t; +}; +typedef struct grant_entry grant_entry_t; /* * Type of grant entry. @@ -160,7 +161,7 @@ typedef uint32_t grant_handle_t; * to be accounted to the correct grant reference! */ #define GNTTABOP_map_grant_ref 0 -typedef struct gnttab_map_grant_ref { +struct gnttab_map_grant_ref { /* IN parameters. */ uint64_t host_addr; uint32_t flags; /* GNTMAP_* */ @@ -170,7 +171,8 @@ typedef struct gnttab_map_grant_ref { int16_t status; /* GNTST_* */ grant_handle_t handle; uint64_t dev_bus_addr; -} gnttab_map_grant_ref_t; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); /* @@ -185,14 +187,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant * mappings will remain in the device or host TLBs. */ #define GNTTABOP_unmap_grant_ref 1 -typedef struct gnttab_unmap_grant_ref { +struct gnttab_unmap_grant_ref { /* IN parameters. */ uint64_t host_addr; uint64_t dev_bus_addr; grant_handle_t handle; /* OUT parameters. */ int16_t status; /* GNTST_* */ -} gnttab_unmap_grant_ref_t; +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); /* @@ -205,14 +208,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_gra * 3. Xen may not support more than a single grant-table page per domain. */ #define GNTTABOP_setup_table 2 -typedef struct gnttab_setup_table { +struct gnttab_setup_table { /* IN parameters. */ domid_t dom; uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ XEN_GUEST_HANDLE(ulong) frame_list; -} gnttab_setup_table_t; +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); /* @@ -220,12 +224,13 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_setup_tab * xen console. Debugging use only. */ #define GNTTABOP_dump_table 3 -typedef struct gnttab_dump_table { +struct gnttab_dump_table { /* IN parameters. */ domid_t dom; /* OUT parameters. */ int16_t status; /* GNTST_* */ -} gnttab_dump_table_t; +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); /* @@ -237,14 +242,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl * to the calling domain *unless* the error is GNTST_bad_page. */ #define GNTTABOP_transfer 4 -typedef struct gnttab_transfer { +struct gnttab_transfer { /* IN parameters. */ unsigned long mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ int16_t status; -} gnttab_transfer_t; +}; +typedef struct gnttab_transfer gnttab_transfer_t; DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); /* diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/hvm/ioreq.h Tue May 30 14:30:34 2006 -0500 @@ -41,7 +41,7 @@ * prepare this structure and notify service OS and DM by sending * virq */ -typedef struct { +struct ioreq { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ uint64_t count; /* for rep prefixes */ @@ -55,31 +55,35 @@ typedef struct { uint8_t df:1; uint8_t type; /* I/O type */ uint64_t io_count; /* How many IO done on a vcpu */ -} ioreq_t; +}; +typedef struct ioreq ioreq_t; #define MAX_VECTOR 256 #define BITS_PER_BYTE 8 #define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t))) #define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t))) -typedef struct { +struct global_iodata { uint16_t pic_elcr; uint16_t pic_irr; uint16_t pic_last_irr; uint16_t pic_clear_irr; -} global_iodata_t; +}; +typedef struct global_iodata global_iodata_t; -typedef struct { - ioreq_t vp_ioreq; +struct vcpu_iodata { + struct ioreq vp_ioreq; /* Event channel port */ unsigned int vp_eport; /* VMX vcpu uses this to notify DM */ unsigned int dm_eport; /* DM uses this to notify VMX vcpu */ -} vcpu_iodata_t; +}; +typedef struct vcpu_iodata vcpu_iodata_t; -typedef struct { - global_iodata_t sp_global; - vcpu_iodata_t vcpu_iodata[1]; -} shared_iopage_t; +struct shared_iopage { + struct global_iodata sp_global; + struct vcpu_iodata vcpu_iodata[1]; +}; +typedef struct shared_iopage shared_iopage_t; #endif /* _IOREQ_H_ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/vmx_assist.h --- a/xen/include/public/hvm/vmx_assist.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/hvm/vmx_assist.h Tue May 30 14:30:34 2006 -0500 @@ -37,7 +37,7 @@ union vmcs_arbytes { /* * World switch state */ -typedef struct vmx_assist_context { +struct vmx_assist_context { uint32_t eip; /* execution pointer */ uint32_t esp; /* stack pointer */ uint32_t eflags; /* flags register */ @@ -80,7 +80,8 @@ typedef struct vmx_assist_context { uint32_t ldtr_limit; uint32_t ldtr_base; union vmcs_arbytes ldtr_arbytes; -} vmx_assist_context_t; +}; +typedef struct vmx_assist_context vmx_assist_context_t; #endif /* __ASSEMBLY__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/io/blkif.h Tue May 30 14:30:34 2006 -0500 @@ -39,7 +39,7 @@ */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 -typedef struct blkif_request { +struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ @@ -51,13 +51,15 @@ typedef struct blkif_request { /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -} blkif_request_t; +}; +typedef struct blkif_request blkif_request_t; -typedef struct blkif_response { +struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ -} blkif_response_t; +}; +typedef struct blkif_response blkif_response_t; #define BLKIF_RSP_ERROR -1 /* non-specific 'error' */ #define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */ @@ -66,7 +68,7 @@ typedef struct blkif_response { * Generate blkif ring structures and types. */ -DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t); +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/io/netif.h Tue May 30 14:30:34 2006 -0500 @@ -13,10 +13,10 @@ #include "../grant_table.h" /* - * Note that there is *never* any need to notify the backend when enqueuing - * receive requests (netif_rx_request_t). Notifications after enqueuing any - * other type of message should be conditional on the appropriate req_event - * or rsp_event field in the shared ring. + * Note that there is *never* any need to notify the backend when + * enqueuing receive requests (struct netif_rx_request). Notifications + * after enqueuing any other type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. */ /* Protocol checksum field is blank in the packet (hardware offload)? */ @@ -27,23 +27,26 @@ #define _NETTXF_data_validated (1) #define NETTXF_data_validated (1U<<_NETTXF_data_validated) -typedef struct netif_tx_request { +struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ uint16_t flags; /* NETTXF_* */ uint16_t id; /* Echoed in response message. */ uint16_t size; /* Packet size in bytes. */ -} netif_tx_request_t; +}; +typedef struct netif_tx_request netif_tx_request_t; -typedef struct netif_tx_response { +struct netif_tx_response { uint16_t id; int16_t status; /* NETIF_RSP_* */ -} netif_tx_response_t; +}; +typedef struct netif_tx_response netif_tx_response_t; -typedef struct { +struct netif_rx_request { uint16_t id; /* Echoed in response message. */ grant_ref_t gref; /* Reference to incoming granted frame */ -} netif_rx_request_t; +}; +typedef struct netif_rx_request netif_rx_request_t; /* Packet data has been validated against protocol checksum. */ #define _NETRXF_data_validated (0) @@ -53,19 +56,20 @@ typedef struct { #define _NETRXF_csum_blank (1) #define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) -typedef struct { +struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ uint16_t flags; /* NETRXF_* */ int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ -} netif_rx_response_t; +}; +typedef struct netif_rx_response netif_rx_response_t; /* * Generate netif ring structures and types. */ -DEFINE_RING_TYPES(netif_tx, netif_tx_request_t, netif_tx_response_t); -DEFINE_RING_TYPES(netif_rx, netif_rx_request_t, netif_rx_response_t); +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/tpmif.h --- a/xen/include/public/io/tpmif.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/io/tpmif.h Tue May 30 14:30:34 2006 -0500 @@ -18,12 +18,13 @@ #include "../grant_table.h" -typedef struct { +struct tpmif_tx_request { unsigned long addr; /* Machine address of packet. */ grant_ref_t ref; /* grant table access reference */ uint16_t unused; uint16_t size; /* Packet size in bytes. */ -} tpmif_tx_request_t; +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; /* * The TPMIF_TX_RING_SIZE defines the number of pages the @@ -35,13 +36,15 @@ typedef uint32_t TPMIF_RING_IDX; /* This structure must fit in a memory page. */ -typedef struct { - tpmif_tx_request_t req; -} tpmif_ring_t; +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; -typedef struct { - tpmif_ring_t ring[TPMIF_TX_RING_SIZE]; -} tpmif_tx_interface_t; +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; #endif diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/xenbus.h --- a/xen/include/public/io/xenbus.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/io/xenbus.h Tue May 30 14:30:34 2006 -0500 @@ -9,34 +9,37 @@ #ifndef _XEN_PUBLIC_IO_XENBUS_H #define _XEN_PUBLIC_IO_XENBUS_H -/* The state of either end of the Xenbus, i.e. the current communication - status of initialisation across the bus. States here imply nothing about - the state of the connection between the driver and the kernel's device - layers. */ -typedef enum -{ - XenbusStateUnknown = 0, - XenbusStateInitialising = 1, - XenbusStateInitWait = 2, /* Finished early initialisation, but waiting - for information from the peer or hotplug - scripts. */ - XenbusStateInitialised = 3, /* Initialised and waiting for a connection - from the peer. */ - XenbusStateConnected = 4, - XenbusStateClosing = 5, /* The device is being closed due to an error - or an unplug event. */ - XenbusStateClosed = 6 +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, -} XenbusState; + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6 +}; +typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/memory.h --- a/xen/include/public/memory.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/memory.h Tue May 30 14:30:34 2006 -0500 @@ -17,7 +17,7 @@ #define XENMEM_increase_reservation 0 #define XENMEM_decrease_reservation 1 #define XENMEM_populate_physmap 6 -typedef struct xen_memory_reservation { +struct xen_memory_reservation { /* * XENMEM_increase_reservation: @@ -49,7 +49,8 @@ typedef struct xen_memory_reservation { */ domid_t domid; -} xen_memory_reservation_t; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); /* @@ -74,7 +75,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser * arg == addr of xen_machphys_mfn_list_t. */ #define XENMEM_machphys_mfn_list 5 -typedef struct xen_machphys_mfn_list { +struct xen_machphys_mfn_list { /* * Size of the 'extent_start' array. Fewer entries will be filled if the * machphys table is smaller than max_extents * 2MB. @@ -93,7 +94,8 @@ typedef struct xen_machphys_mfn_list { * than 'max_extents' if the machphys table is smaller than max_e * 2MB. */ unsigned int nr_extents; -} xen_machphys_mfn_list_t; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); /* @@ -102,7 +104,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn * arg == addr of xen_add_to_physmap_t. */ #define XENMEM_add_to_physmap 7 -typedef struct xen_add_to_physmap { +struct xen_add_to_physmap { /* Which domain to change the mapping for. */ domid_t domid; @@ -116,7 +118,8 @@ typedef struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ unsigned long gpfn; -} xen_add_to_physmap_t; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); /* @@ -124,7 +127,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physm * code on failure. This call only works for auto-translated guests. */ #define XENMEM_translate_gpfn_list 8 -typedef struct xen_translate_gpfn_list { +struct xen_translate_gpfn_list { /* Which domain to translate for? */ domid_t domid; @@ -139,8 +142,37 @@ typedef struct xen_translate_gpfn_list { * list (in which case each input GPFN is overwritten with the output MFN). */ XEN_GUEST_HANDLE(ulong) mfn_list; -} xen_translate_gpfn_list_t; +}; +typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + */ +#define XENMEM_machine_memory_map 10 #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/nmi.h --- a/xen/include/public/nmi.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/nmi.h Tue May 30 14:30:34 2006 -0500 @@ -34,10 +34,11 @@ * arg == pointer to xennmi_callback structure. */ #define XENNMI_register_callback 0 -typedef struct xennmi_callback { +struct xennmi_callback { unsigned long handler_address; unsigned long pad; -} xennmi_callback_t; +}; +typedef struct xennmi_callback xennmi_callback_t; DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); /* diff -r e74246451527 -r f54d38cea8ac xen/include/public/physdev.h --- a/xen/include/public/physdev.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/physdev.h Tue May 30 14:30:34 2006 -0500 @@ -14,10 +14,11 @@ * @arg == pointer to physdev_eoi structure. */ #define PHYSDEVOP_eoi 12 -typedef struct physdev_eoi { +struct physdev_eoi { /* IN */ uint32_t irq; -} physdev_eoi_t; +}; +typedef struct physdev_eoi physdev_eoi_t; DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); /* @@ -25,12 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); * @arg == pointer to physdev_irq_status_query structure. */ #define PHYSDEVOP_irq_status_query 5 -typedef struct physdev_irq_status_query { +struct physdev_irq_status_query { /* IN */ uint32_t irq; /* OUT */ uint32_t flags; /* XENIRQSTAT_* */ -} physdev_irq_status_query_t; +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ @@ -42,10 +44,11 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_stat * @arg == pointer to physdev_set_iopl structure. */ #define PHYSDEVOP_set_iopl 6 -typedef struct physdev_set_iopl { +struct physdev_set_iopl { /* IN */ uint32_t iopl; -} physdev_set_iopl_t; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); /* @@ -53,11 +56,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl * @arg == pointer to physdev_set_iobitmap structure. */ #define PHYSDEVOP_set_iobitmap 7 -typedef struct physdev_set_iobitmap { +struct physdev_set_iobitmap { /* IN */ uint8_t *bitmap; uint32_t nr_ports; -} physdev_set_iobitmap_t; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); /* @@ -66,13 +70,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iobi */ #define PHYSDEVOP_apic_read 8 #define PHYSDEVOP_apic_write 9 -typedef struct physdev_apic { +struct physdev_apic { /* IN */ unsigned long apic_physbase; uint32_t reg; /* IN or OUT */ uint32_t value; -} physdev_apic_t; +}; +typedef struct physdev_apic physdev_apic_t; DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); /* @@ -81,28 +86,30 @@ DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); */ #define PHYSDEVOP_alloc_irq_vector 10 #define PHYSDEVOP_free_irq_vector 11 -typedef struct physdev_irq { +struct physdev_irq { /* IN */ uint32_t irq; /* IN or OUT */ uint32_t vector; -} physdev_irq_t; +}; +typedef struct physdev_irq physdev_irq_t; DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. */ -typedef struct physdev_op { +struct physdev_op { uint32_t cmd; union { - physdev_irq_status_query_t irq_status_query; - physdev_set_iopl_t set_iopl; - physdev_set_iobitmap_t set_iobitmap; - physdev_apic_t apic_op; - physdev_irq_t irq_op; + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; } u; -} physdev_op_t; +}; +typedef struct physdev_op physdev_op_t; DEFINE_XEN_GUEST_HANDLE(physdev_op_t); /* diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched.h --- a/xen/include/public/sched.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/sched.h Tue May 30 14:30:34 2006 -0500 @@ -46,9 +46,10 @@ * @arg == pointer to sched_shutdown structure. */ #define SCHEDOP_shutdown 2 -typedef struct sched_shutdown { +struct sched_shutdown { unsigned int reason; /* SHUTDOWN_* */ -} sched_shutdown_t; +}; +typedef struct sched_shutdown sched_shutdown_t; DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); /* @@ -57,11 +58,12 @@ DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t * @arg == pointer to sched_poll structure. */ #define SCHEDOP_poll 3 -typedef struct sched_poll { +struct sched_poll { XEN_GUEST_HANDLE(evtchn_port_t) ports; unsigned int nr_ports; uint64_t timeout; -} sched_poll_t; +}; +typedef struct sched_poll sched_poll_t; DEFINE_XEN_GUEST_HANDLE(sched_poll_t); /* @@ -71,10 +73,11 @@ DEFINE_XEN_GUEST_HANDLE(sched_poll_t); * @arg == pointer to sched_remote_shutdown structure. */ #define SCHEDOP_remote_shutdown 4 -typedef struct sched_remote_shutdown { +struct sched_remote_shutdown { domid_t domain_id; /* Remote domain ID */ unsigned int reason; /* SHUTDOWN_xxx reason */ -} sched_remote_shutdown_t; +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); /* diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched_ctl.h --- a/xen/include/public/sched_ctl.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/sched_ctl.h Tue May 30 14:30:34 2006 -0500 @@ -10,6 +10,7 @@ /* Scheduler types. */ #define SCHED_BVT 0 #define SCHED_SEDF 4 +#define SCHED_CREDIT 5 /* Set or get info? */ #define SCHED_INFO_PUT 0 @@ -48,6 +49,10 @@ struct sched_adjdom_cmd { uint32_t extratime; uint32_t weight; } sedf; + struct sched_credit_adjdom { + uint16_t weight; + uint16_t cap; + } credit; } u; }; diff -r e74246451527 -r f54d38cea8ac xen/include/public/vcpu.h --- a/xen/include/public/vcpu.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/vcpu.h Tue May 30 14:30:34 2006 -0500 @@ -56,7 +56,7 @@ * @extra_arg == pointer to vcpu_runstate_info structure. */ #define VCPUOP_get_runstate_info 4 -typedef struct vcpu_runstate_info { +struct vcpu_runstate_info { /* VCPU's current state (RUNSTATE_*). */ int state; /* When was current state entered (system time, ns)? */ @@ -66,7 +66,8 @@ typedef struct vcpu_runstate_info { * guaranteed not to drift from system time. */ uint64_t time[4]; -} vcpu_runstate_info_t; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; /* VCPU is currently running on a physical CPU. */ #define RUNSTATE_running 0 @@ -99,12 +100,13 @@ typedef struct vcpu_runstate_info { * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. */ #define VCPUOP_register_runstate_memory_area 5 -typedef struct vcpu_register_runstate_memory_area { +struct vcpu_register_runstate_memory_area { union { struct vcpu_runstate_info *v; uint64_t p; } addr; -} vcpu_register_runstate_memory_area_t; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; #endif /* __XEN_PUBLIC_VCPU_H__ */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/version.h --- a/xen/include/public/version.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/version.h Tue May 30 14:30:34 2006 -0500 @@ -22,12 +22,13 @@ typedef char xen_extraversion_t[16]; /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 -typedef struct xen_compile_info { +struct xen_compile_info { char compiler[64]; char compile_by[16]; char compile_domain[32]; char compile_date[32]; -} xen_compile_info_t; +}; +typedef struct xen_compile_info xen_compile_info_t; #define XENVER_capabilities 3 typedef char xen_capabilities_info_t[1024]; @@ -38,15 +39,17 @@ typedef char xen_changeset_info_t[64]; #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 -typedef struct xen_platform_parameters { +struct xen_platform_parameters { unsigned long virt_start; -} xen_platform_parameters_t; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; #define XENVER_get_features 6 -typedef struct xen_feature_info { +struct xen_feature_info { unsigned int submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ -} xen_feature_info_t; +}; +typedef struct xen_feature_info xen_feature_info_t; /* Declares the features reported by XENVER_get_features. */ #include "features.h" diff -r e74246451527 -r f54d38cea8ac xen/include/public/xen.h --- a/xen/include/public/xen.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/xen.h Tue May 30 14:30:34 2006 -0500 @@ -195,7 +195,7 @@ #define MMUEXT_NEW_USER_BASEPTR 15 #ifndef __ASSEMBLY__ -typedef struct mmuext_op { +struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ @@ -209,7 +209,8 @@ typedef struct mmuext_op { /* TLB_FLUSH_MULTI, INVLPG_MULTI */ void *vcpumask; } arg2; -} mmuext_op_t; +}; +typedef struct mmuext_op mmuext_op_t; DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #endif @@ -273,20 +274,22 @@ typedef uint16_t domid_t; * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ -typedef struct mmu_update { +struct mmu_update { uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ -} mmu_update_t; +}; +typedef struct mmu_update mmu_update_t; DEFINE_XEN_GUEST_HANDLE(mmu_update_t); /* * Send an array of these to HYPERVISOR_multicall(). * NB. The fields are natural register size for this architecture. */ -typedef struct multicall_entry { +struct multicall_entry { unsigned long op, result; unsigned long args[6]; -} multicall_entry_t; +}; +typedef struct multicall_entry multicall_entry_t; DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); /* @@ -295,7 +298,7 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_ */ #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) -typedef struct vcpu_time_info { +struct vcpu_time_info { /* * Updates to the following values are preceded and followed by an * increment of 'version'. The guest can therefore detect updates by @@ -319,9 +322,10 @@ typedef struct vcpu_time_info { uint32_t tsc_to_system_mul; int8_t tsc_shift; int8_t pad1[3]; -} vcpu_time_info_t; /* 32 bytes */ - -typedef struct vcpu_info { +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { /* * 'evtchn_upcall_pending' is written non-zero by Xen to indicate * a pending notification for a particular VCPU. It is then cleared @@ -354,16 +358,17 @@ typedef struct vcpu_info { #endif uint8_t evtchn_upcall_mask; unsigned long evtchn_pending_sel; - arch_vcpu_info_t arch; - vcpu_time_info_t time; -} vcpu_info_t; /* 64 bytes (x86) */ + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +typedef struct vcpu_info vcpu_info_t; /* * Xen/kernel shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. */ -typedef struct shared_info { - vcpu_info_t vcpu_info[MAX_VIRT_CPUS]; +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; /* * A domain can create "event channels" on which it can send and receive @@ -407,9 +412,10 @@ typedef struct shared_info { uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ - arch_shared_info_t arch; - -} shared_info_t; + struct arch_shared_info arch; + +}; +typedef struct shared_info shared_info_t; /* * Start-of-day memory layout for the initial domain (DOM0): @@ -437,7 +443,7 @@ typedef struct shared_info { */ #define MAX_GUEST_CMDLINE 1024 -typedef struct start_info { +struct start_info { /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ char magic[32]; /* "xen-<version>-<platform>". */ unsigned long nr_pages; /* Total pages allocated to this domain. */ @@ -454,7 +460,8 @@ typedef struct start_info { unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ int8_t cmd_line[MAX_GUEST_CMDLINE]; -} start_info_t; +}; +typedef struct start_info start_info_t; /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ diff -r e74246451527 -r f54d38cea8ac xen/include/public/xenoprof.h --- a/xen/include/public/xenoprof.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/public/xenoprof.h Tue May 30 14:30:34 2006 -0500 @@ -41,7 +41,7 @@ struct event_log { }; /* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ -typedef struct xenoprof_buf { +struct xenoprof_buf { uint32_t event_head; uint32_t event_tail; uint32_t event_size; @@ -51,10 +51,11 @@ typedef struct xenoprof_buf { uint64_t user_samples; uint64_t lost_samples; struct event_log event_log[1]; -} xenoprof_buf_t; +}; +typedef struct xenoprof_buf xenoprof_buf_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); -typedef struct xenoprof_init { +struct xenoprof_init { int32_t max_samples; int32_t num_events; int32_t is_primary; @@ -62,10 +63,11 @@ typedef struct xenoprof_init { int32_t bufsize; uint64_t buf_maddr; char cpu_type[XENOPROF_CPU_TYPE_SIZE]; -} xenoprof_init_t; +}; +typedef struct xenoprof_init xenoprof_init_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); -typedef struct xenoprof_counter { +struct xenoprof_counter { uint32_t ind; uint64_t count; uint32_t enabled; @@ -74,7 +76,8 @@ typedef struct xenoprof_counter { uint32_t kernel; uint32_t user; uint64_t unit_mask; -} xenoprof_counter_t; +}; +typedef struct xenoprof_counter xenoprof_counter_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); diff -r e74246451527 -r f54d38cea8ac xen/include/xen/hypercall.h --- a/xen/include/xen/hypercall.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/xen/hypercall.h Tue May 30 14:30:34 2006 -0500 @@ -80,7 +80,7 @@ do_vcpu_op( extern long do_acm_op( - XEN_GUEST_HANDLE(acm_op_t) u_acm_op); + int cmd, XEN_GUEST_HANDLE(void) arg); extern long do_nmi_op( diff -r e74246451527 -r f54d38cea8ac xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/xen/sched-if.h Tue May 30 14:30:34 2006 -0500 @@ -58,6 +58,8 @@ struct scheduler { char *opt_name; /* option name for this scheduler */ unsigned int sched_id; /* ID for this scheduler */ + void (*init) (void); + void (*tick) (unsigned int cpu); int (*alloc_task) (struct vcpu *); void (*add_task) (struct vcpu *); void (*free_task) (struct domain *); diff -r e74246451527 -r f54d38cea8ac xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Tue May 30 12:52:02 2006 -0500 +++ b/xen/include/xen/softirq.h Tue May 30 14:30:34 2006 -0500 @@ -26,6 +26,19 @@ asmlinkage void do_softirq(void); asmlinkage void do_softirq(void); extern void open_softirq(int nr, softirq_handler handler); +static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr) +{ + int cpu; + + for_each_cpu_mask(cpu, mask) + { + if ( test_and_set_bit(nr, &softirq_pending(cpu)) ) + cpu_clear(cpu, mask); + } + + smp_send_event_check_mask(mask); +} + static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) { if ( !test_and_set_bit(nr, &softirq_pending(cpu)) ) diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/util.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,115 @@ +/****************************************************************************** + * arch/ia64/xen/util.c + * This file is the ia64 counterpart of drivers/xen/util.c + * + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <xen/driver_util.h> + +struct vm_struct *alloc_vm_area(unsigned long size) +{ + int order; + unsigned long virt; + unsigned long nr_pages; + struct vm_struct* area; + + order = get_order(size); + virt = __get_free_pages(GFP_KERNEL, order); + if (virt == 0) { + goto err0; + } + nr_pages = 1 << order; + scrub_pages(virt, nr_pages); + + area = kmalloc(sizeof(*area), GFP_KERNEL); + if (area == NULL) { + goto err1; + } + + area->flags = VM_IOREMAP;//XXX + area->addr = (void*)virt; + area->size = size; + area->pages = NULL; //XXX + area->nr_pages = nr_pages; + area->phys_addr = __pa(virt); + + return area; + +err1: + free_pages(virt, order); +err0: + return NULL; + +} +EXPORT_SYMBOL_GPL(alloc_vm_area); + +void free_vm_area(struct vm_struct *area) +{ + unsigned int order = get_order(area->size); + unsigned long i; + + // This area is used for foreign page mappping. + // So underlying machine page may not be assigned. + for (i = 0; i < (1 << order); i++) { + unsigned long ret; + unsigned long gpfn = (area->phys_addr >> PAGE_SHIFT) + i; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gpfn); + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &reservation); + BUG_ON(ret != 1); + } + free_pages((unsigned long)area->addr, order); + kfree(area); +} +EXPORT_SYMBOL_GPL(free_vm_area); + +void lock_vm_area(struct vm_struct *area) +{ + // nothing +} +EXPORT_SYMBOL_GPL(lock_vm_area); + +void unlock_vm_area(struct vm_struct *area) +{ + // nothing +} +EXPORT_SYMBOL_GPL(unlock_vm_area); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,185 @@ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <xen/cpu_hotplug.h> +#include <xen/xenbus.h> + +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; + +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + +static void vcpu_hotplug(unsigned int cpu) +{ + int err; + char dir[32], state[32]; + + if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) + return; + + sprintf(dir, "cpu/%d", cpu); + err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state); + if (err != 1) { + printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); + return; + } + + if (strcmp(state, "online") == 0) { + cpu_set(cpu, xenbus_allowed_cpumask); + (void)cpu_up(cpu); + } else if (strcmp(state, "offline") == 0) { + cpu_clear(cpu, xenbus_allowed_cpumask); + (void)cpu_down(cpu); + } else { + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", + state, cpu); + } +} + +static void handle_vcpu_hotplug_event( + struct xenbus_watch *watch, const char **vec, unsigned int len) +{ + int cpu; + char *cpustr; + const char *node = vec[XS_WATCH_PATH]; + + if ((cpustr = strstr(node, "cpu/")) != NULL) { + sscanf(cpustr, "cpu/%d", &cpu); + vcpu_hotplug(cpu); + } +} + +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpu_clear(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + +static int setup_cpu_watcher(struct notifier_block *notifier, + unsigned long event, void *data) +{ + int i; + + static struct xenbus_watch cpu_watch = { + .node = "cpu", + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; + (void)register_xenbus_watch(&cpu_watch); + + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + for_each_cpu(i) + vcpu_hotplug(i); + printk(KERN_INFO "Brought up %ld CPUs\n", + (long)num_online_cpus()); + } + + return NOTIFY_DONE; +} + +static int __init setup_vcpu_hotplug_event(void) +{ + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; + static struct notifier_block xsn_cpu = { + .notifier_call = setup_cpu_watcher }; + + register_cpu_notifier(&hotplug_cpu); + register_xenstore_notifier(&xsn_cpu); + + return 0; +} + +arch_initcall(setup_vcpu_hotplug_event); + +int smp_suspend(void) +{ + int i, err; + + lock_cpu_hotplug(); + + /* + * Take all other CPUs offline. We hold the hotplug mutex to + * avoid other processes bringing up CPUs under our feet. + */ + while (num_online_cpus() > 1) { + unlock_cpu_hotplug(); + for_each_online_cpu(i) { + if (i == 0) + continue; + err = cpu_down(i); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_cpu(i) + vcpu_hotplug(i); + return err; + } + } + lock_cpu_hotplug(); + } + + return 0; +} + +void smp_resume(void) +{ + int cpu; + + for_each_cpu(cpu) + cpu_initialize_context(cpu); + + unlock_cpu_hotplug(); + + for_each_cpu(cpu) + vcpu_hotplug(cpu); +} + +int cpu_up_check(unsigned int cpu) +{ + int rc = 0; + + if (local_cpu_hotplug_request()) { + cpu_set(cpu, local_allowed_cpumask); + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + rc = -EBUSY; + } + } else if (!cpu_isset(cpu, local_allowed_cpumask) || + !cpu_isset(cpu, xenbus_allowed_cpumask)) { + rc = -EBUSY; + } + + return rc; +} + +void init_xenbus_allowed_cpumask(void) +{ + xenbus_allowed_cpumask = cpu_present_map; +} diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,63 @@ +/* + * structures and definitions for the int 15, ax=e820 memory map + * scheme. + * + * In a nutshell, setup.S populates a scratch table in the + * empty_zero_block that contains a list of usable address/size + * duples. setup.c, this information is transferred into the e820map, + * and in init.c/numa.c, that new information is used to mark pages + * reserved or not. + */ +#ifndef __E820_HEADER +#define __E820_HEADER + +#include <linux/mmzone.h> + +#define E820MAP 0x2d0 /* our map */ +#define E820MAX 128 /* number of entries in E820MAP */ +#define E820NR 0x1e8 /* # entries in E820MAP */ + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ +#define E820_NVS 4 + +#define HIGH_MEMORY (1024*1024) + +#define LOWMEMSIZE() (0x9f000) + +#ifndef __ASSEMBLY__ +struct e820entry { + u64 addr; /* start of memory segment */ + u64 size; /* size of memory segment */ + u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + int nr_map; + struct e820entry map[E820MAX]; +}; + +extern unsigned long find_e820_area(unsigned long start, unsigned long end, + unsigned size); +extern void add_memory_region(unsigned long start, unsigned long size, + int type); +extern void setup_memory_region(void); +extern void contig_e820_setup(void); +extern unsigned long e820_end_of_ram(void); +extern void e820_reserve_resources(struct e820entry *e820, int nr_map); +extern void e820_print_map(char *who); +extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); + +extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); +extern void e820_setup_gap(struct e820entry *e820, int nr_map); +extern unsigned long e820_hole_size(unsigned long start_pfn, + unsigned long end_pfn); + +extern void __init parse_memopt(char *p, char **end); +extern void __init parse_memmapopt(char *p, char **end); + +extern struct e820map e820; +#endif/*!__ASSEMBLY__*/ + +#endif/*__E820_HEADER*/ diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/cpu_hotplug.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,42 @@ +#ifndef __XEN_CPU_HOTPLUG_H__ +#define __XEN_CPU_HOTPLUG_H__ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/cpumask.h> + +#if defined(CONFIG_HOTPLUG_CPU) + +#if defined(CONFIG_X86) +void cpu_initialize_context(unsigned int cpu); +#else +#define cpu_initialize_context(cpu) ((void)0) +#endif + +int cpu_up_check(unsigned int cpu); +void init_xenbus_allowed_cpumask(void); +int smp_suspend(void); +void smp_resume(void); + +#else /* !defined(CONFIG_HOTPLUG_CPU) */ + +#define cpu_up_check(cpu) (0) +#define init_xenbus_allowed_cpumask() ((void)0) + +static inline int smp_suspend(void) +{ + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Can't suspend SMP guests " + "without CONFIG_HOTPLUG_CPU\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static inline void smp_resume(void) +{ +} + +#endif /* !defined(CONFIG_HOTPLUG_CPU) */ + +#endif /* __XEN_CPU_HOTPLUG_H__ */ diff -r e74246451527 -r f54d38cea8ac patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,18 @@ +diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c +--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 2006-05-02 22:38:44.000000000 +0100 ++++ ./drivers/ide/ide-lib.c 2006-05-24 18:37:05.000000000 +0100 +@@ -410,10 +410,10 @@ + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { +- if (HWIF(drive)->pci_dev) ++ if (on && drive->media == ide_disk) { ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (HWIF(drive)->pci_dev) + addr = HWIF(drive)->pci_dev->dma_mask; + } + diff -r e74246451527 -r f54d38cea8ac patches/linux-2.6.16.13/xen-hotplug.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/xen-hotplug.patch Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,11 @@ +--- ../pristine-linux-2.6.16.13/fs/proc/proc_misc.c 2006-05-02 22:38:44.000000000 +0100 ++++ ./fs/proc/proc_misc.c 2006-05-22 15:29:34.000000000 +0100 +@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p, + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal)); +- for_each_online_cpu(i) { ++ for_each_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_csched.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_csched.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,50 @@ +/**************************************************************************** + * (C) 2006 - Emmanuel Ackaouy - XenSource Inc. + **************************************************************************** + * + * File: xc_csched.c + * Author: Emmanuel Ackaouy + * + * Description: XC Interface to the credit scheduler + * + */ +#include "xc_private.h" + + +int +xc_sched_credit_domain_set( + int xc_handle, + uint32_t domid, + struct sched_credit_adjdom *sdom) +{ + DECLARE_DOM0_OP; + + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t) domid; + op.u.adjustdom.sched_id = SCHED_CREDIT; + op.u.adjustdom.direction = SCHED_INFO_PUT; + op.u.adjustdom.u.credit = *sdom; + + return do_dom0_op(xc_handle, &op); +} + +int +xc_sched_credit_domain_get( + int xc_handle, + uint32_t domid, + struct sched_credit_adjdom *sdom) +{ + DECLARE_DOM0_OP; + int err; + + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t) domid; + op.u.adjustdom.sched_id = SCHED_CREDIT; + op.u.adjustdom.direction = SCHED_INFO_GET; + + err = do_dom0_op(xc_handle, &op); + if ( err == 0 ) + *sdom = op.u.adjustdom.u.credit; + + return err; +} diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_linux.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xenstore/xenstored_linux.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,69 @@ +/****************************************************************************** + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "xenstored_core.h" + +#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva" +#define XENSTORED_PROC_PORT "/proc/xen/xsd_port" + +evtchn_port_t xenbus_evtchn(void) +{ + int fd; + int rc; + evtchn_port_t port; + char str[20]; + + fd = open(XENSTORED_PROC_PORT, O_RDONLY); + if (fd == -1) + return -1; + + rc = read(fd, str, sizeof(str)); + if (rc == -1) + { + int err = errno; + close(fd); + errno = err; + return -1; + } + + str[rc] = '\0'; + port = strtoul(str, NULL, 0); + + close(fd); + return port; +} + +void *xenbus_map(void) +{ + int fd; + void *addr; + + fd = open(XENSTORED_PROC_KVA, O_RDWR); + if (fd == -1) + return NULL; + + addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0); + + if (addr == MAP_FAILED) + addr = NULL; + + close(fd); + + return addr; +} diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/block-integrity/01_block_device_read_verify.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,62 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: Harry Butterworth <butterwo@xxxxxxxxxx> + +# This test initialises a ram disk in dom0 with data from /dev/urandom and +# then imports the ram disk device as a physical device into a domU. The md5 +# checksum of the data in the ramdisk is calculated in dom0 and also +# calculated by the domU reading the data through the blk frontend and +# backend drivers. The test succeeds if the checksums match indicating that +# the domU successfully read all the correct data from the device. + +import re + +from XmTestLib import * +from XmTestLib.block_utils import * + +if ENABLE_HVM_SUPPORT: + SKIP("Block-attach not supported for HVM domains") + +domain = XmTestDomain() + +try: + console = domain.start() +except DomainError, e: + FAIL(str(e)) + +console.setHistorySaveCmds(value=True) + +traceCommand("cat /dev/urandom > /dev/ram1") + +s, o = traceCommand("md5sum /dev/ram1") + +dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o) + +block_attach(domain, "phy:ram1", "hda1") + +try: + run = console.runCmd("md5sum /dev/hda1") +except ConsoleError, e: + FAIL(str(e)) + +domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"]) + +domain.closeConsole() + +domain.stop() + +if dom0_md5sum_match == None: + FAIL("Failed to get md5sum of test ram disk in dom0.") + +if domU_md5sum_match == None: + FAIL("Failed to get md5sum of test ram disk in domU.") + +if verbose: + print "md5sum dom0:" + print dom0_md5sum_match.group() + print "md5sum domU:" + print domU_md5sum_match.group() + +if dom0_md5sum_match.group() != domU_md5sum_match.group(): + FAIL("MISCOMPARE: data read in domU did not match data provided by domO.") diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/block-integrity/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/tests/block-integrity/Makefile.am Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,21 @@ + +SUBDIRS = + +TESTS = 01_block_device_read_verify.test + +XFAIL_TESTS = + +EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) + +TESTS_ENVIRONMENT=@TENV@ + +%.test: %.py + cp $< $@ + chmod +x $@ + +clean-local: am_config_clean-local + +am_config_clean-local: + rm -f *test + rm -f *log + rm -f *~ diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/tools/sparse-merge --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/tools/sparse-merge Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,144 @@ +#!/bin/bash +# Generate a patch for each of the ia64 files in the linux-2.6-xen-sparse tree + +# Path to mercurial tree of upstream Linux +# WARNING: This will do an 'hg up -C' on the upstream Linux tree, you +# will lose data if there's anything there you care about. +: ${LINUXPATH:=/tmp/linux-2.6} +# Tag of current base upstream image for Xen files +: ${OLDTAG:=v$(awk '/^LINUX_VER/{print $NF}' buildconfigs/mk.linux-2.6-xen)} +# Tag of new upstream base to go to +: ${NEWTAG:=v$(wget -O- -o/dev/null http://kernel.org/kdist/finger_banner \ + | awk '/latest stable/{print $NF}')} +# Restrict merge to specific arch (set to . for all) +: ${ARCH:=ia64} + +SPARSEDIR=linux-2.6-xen-sparse +WD=$PWD + +if [ ! -d $SPARSEDIR ]; then + echo "Can't find $SPARSEDIR directory." + exit +fi + +# Check for modified files in the sparse tree before starting +if hg st $SPARSEDIR | head | grep .; then + echo + echo "$SPARSEDIR contains modifications, please clean it up first" + exit +fi + +# We want the linux upstream tree to be at the OLDTAG to get the OLDTAG-Xen diff. +# Save current revision to restore when done +cd $LINUXPATH || exit 1 +OLDCSET=$(hg parents | awk '/^changeset:/{print($2)}' | cut -f 1 -d :) +for t in $OLDTAG $NEWTAG; do + if ! hg tags | cut -f1 -d' ' | grep -Fx $t; then + echo "Tag $t not found, ketching up" + hg up -C ${t%.*} || exit 1 + ketchup ${t#v} || exit 1 + hg addremove + hg ci -m $t + hg tag -l $t + fi +done +hg up -C $OLDTAG || exit 1 + +cd $WD +for i in $(hg manifest | awk '{print($3)}' | grep $SPARSEDIR | grep "$ARCH"); do + cd $WD + + FILENAME=$(basename $i) + DIRNAME=$(dirname $i) + DIFFPATH=$(echo $i | sed -e "s,^$SPARSEDIR,$LINUXPATH,") + + if [ ! -d $DIRNAME ]; then + echo "Hmm, something bad happened parsing directory name: $i" + continue + fi + + if [ ! -e $DIFFPATH ]; then + continue + fi + + echo -n "$i ... " + + cd $DIRNAME + XENDIR=$(pwd) + + ORIGPATH=$(echo $i | sed -e "s/^$SPARSEDIR/./") + APATH=$(echo $i | sed -e "s/^$SPARSEDIR/a/") + BPATH=$(echo $i | sed -e "s/^$SPARSEDIR/b/") + cd $LINUXPATH + hg diff -r $OLDTAG -r $NEWTAG $ORIGPATH | \ + sed -e "s,^--- $APATH,--- $FILENAME," \ + -e "s,^+++ $BPATH,+++ $FILENAME," \ + > $XENDIR/$FILENAME-$OLDTAG-$NEWTAG.diff + cd $XENDIR + + # Do we have a diff file? Did anything change? + if [ ! -s $FILENAME-$OLDTAG-$NEWTAG.diff ]; then + echo "SUCCESS (Upstream unchanged)" + continue + fi + + if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1; then + # It failed, how badly? + if [ ! -e ${FILENAME}.rej ]; then + echo "ERROR, Hmm, no .rej file, but diff failed, fix manually" + continue + fi + TONEWREJ=$(wc -l ${FILENAME}.rej | \ + awk '{print($1)}') + hg st $FILENAME | grep -q . && hg revert $FILENAME + rm -f ${FILENAME}.rej ${FILENAME}.orig + diff -uN $DIFFPATH $FILENAME | \ + sed -e "s,^--- $DIFFPATH,--- $FILENAME," \ + > $FILENAME-$OLDTAG-Xen.diff + + if [ ! -e $FILENAME-$OLDTAG-Xen.diff ]; then + echo "ERROR, failed to create patch file" + continue + fi + + if ! patch -R -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; then + echo "ERROR, reverting Xen changes failed" + hg revert $FILENAME + continue + fi + + if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1; then + echo "ERROR, new upstream patch failed on reverted file" + hg revert $FILENAME + continue + fi + + if ! patch -f -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; then + if [ ! -e ${FILENAME}.rej ]; then + echo "ERROR, Hmm, no .rej file, but diff failed, fix manually" + continue + fi + TOXENREJ=$(wc -l ${FILENAME}.rej | \ + awk '{print($1)}') + + if [ $TOXENREJ -gt $TONEWREJ ]; then + hg revert $FILENAME + rm -f ${FILENAME}.rej ${FILENAME}.orig + patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1 + echo "MANUAL MERGE REQUIRED (Upstream reject)" + else + echo "MANUAL MERGE REQUIRED (Xen reject)" + fi + + else + rm -f ${FILENAME}.rej ${FILENAME}.orig + echo "SUCCESS (Re-applied Xen patch)" + fi + else + rm -f ${FILENAME}.rej ${FILENAME}.orig + echo "SUCCESS (Upstream applied)" + fi +done +find $SPARSEDIR -name \*.diff -empty | xargs -r rm -f +cd $LINUXPATH +hg up -C $OLDCSET diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/efi_emul.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/efi_emul.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,180 @@ +/* + * efi_emul.c: + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <xen/config.h> +#include <xen/compile.h> +#include <asm/pgalloc.h> +#include <asm/vcpu.h> +#include <asm/dom_fw.h> +#include <public/sched.h> + +extern unsigned long translate_domain_mpaddr(unsigned long); +extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); + +// given a current domain (virtual or metaphysical) address, return the virtual address +static unsigned long +efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault) +{ + struct vcpu *v = current; + unsigned long mpaddr = domain_addr; + *fault = IA64_NO_FAULT; + + if (v->domain->arch.efi_virt_mode) { + *fault = vcpu_tpa(v, domain_addr, &mpaddr); + if (*fault != IA64_NO_FAULT) return 0; + } + + return ((unsigned long) __va(translate_domain_mpaddr(mpaddr))); +} + +static efi_status_t +efi_emulate_get_time( + unsigned long tv_addr, unsigned long tc_addr, + IA64FAULT *fault) +{ + unsigned long tv = 0, tc = 0; + efi_status_t status; + + //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr); + tv = efi_translate_domain_addr(tv_addr, fault); + if (*fault != IA64_NO_FAULT) return 0; + if (tc_addr) { + tc = efi_translate_domain_addr(tc_addr, fault); + if (*fault != IA64_NO_FAULT) return 0; + } + //printf("efi_get_time(%016lx,%016lx) translated to xen virtual address\n", tv, tc); + status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc); + //printf("efi_get_time returns %lx\n", status); + return status; +} + +static efi_status_t +efi_emulate_set_virtual_address_map( + unsigned long memory_map_size, unsigned long descriptor_size, + u32 descriptor_version, efi_memory_desc_t *virtual_map) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t entry, *md = &entry; + u64 efi_desc_size; + + unsigned long *vfn; + struct domain *d = current->domain; + efi_runtime_services_t *efi_runtime = d->arch.efi_runtime; + + if (descriptor_version != EFI_MEMDESC_VERSION) { + printf ("efi_emulate_set_virtual_address_map: memory descriptor version unmatched\n"); + return EFI_INVALID_PARAMETER; + } + + if (descriptor_size != sizeof(efi_memory_desc_t)) { + printf ("efi_emulate_set_virtual_address_map: memory descriptor size unmatched\n"); + return EFI_INVALID_PARAMETER; + } + + if (d->arch.efi_virt_mode) return EFI_UNSUPPORTED; + + efi_map_start = virtual_map; + efi_map_end = efi_map_start + memory_map_size; + efi_desc_size = sizeof(efi_memory_desc_t); + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + if (copy_from_user(&entry, p, sizeof(efi_memory_desc_t))) { + printf ("efi_emulate_set_virtual_address_map: copy_from_user() fault. addr=0x%p\n", p); + return EFI_UNSUPPORTED; + } + + /* skip over non-PAL_CODE memory descriptors; EFI_RUNTIME is included in PAL_CODE. */ + if (md->type != EFI_PAL_CODE) + continue; + +#define EFI_HYPERCALL_PATCH_TO_VIRT(tgt,call) \ + do { \ + vfn = (unsigned long *) domain_mpa_to_imva(d, tgt); \ + *vfn++ = FW_HYPERCALL_##call##_INDEX * 16UL + md->virt_addr; \ + *vfn++ = 0; \ + } while (0) + + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_time,EFI_GET_TIME); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_time,EFI_SET_TIME); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_variable,EFI_GET_VARIABLE); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_variable,EFI_SET_VARIABLE); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT); + EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->reset_system,EFI_RESET_SYSTEM); + } + + /* The virtual address map has been applied. */ + d->arch.efi_virt_mode = 1; + + return EFI_SUCCESS; +} + +efi_status_t +efi_emulator (struct pt_regs *regs, IA64FAULT *fault) +{ + struct vcpu *v = current; + efi_status_t status; + + *fault = IA64_NO_FAULT; + + switch (regs->r2) { + case FW_HYPERCALL_EFI_RESET_SYSTEM: + printf("efi.reset_system called "); + if (current->domain == dom0) { + printf("(by dom0)\n "); + (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); + } + else + domain_shutdown (current->domain, SHUTDOWN_reboot); + status = EFI_UNSUPPORTED; + break; + case FW_HYPERCALL_EFI_GET_TIME: + status = efi_emulate_get_time ( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33), + fault); + break; + case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: + status = efi_emulate_set_virtual_address_map ( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33), + (u32) vcpu_get_gr(v,34), + (efi_memory_desc_t *) vcpu_get_gr(v,35)); + break; + case FW_HYPERCALL_EFI_SET_TIME: + case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: + case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_GET_VARIABLE: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: + case FW_HYPERCALL_EFI_SET_VARIABLE: + case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: + // FIXME: need fixes in efi.h from 2.6.9 + status = EFI_UNSUPPORTED; + break; + default: + printf("unknown ia64 fw hypercall %lx\n", regs->r2); + status = EFI_UNSUPPORTED; + } + + return status; +} diff -r e74246451527 -r f54d38cea8ac xen/common/sched_credit.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/sched_credit.c Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,1233 @@ +/**************************************************************************** + * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc. + **************************************************************************** + * + * File: common/csched_credit.c + * Author: Emmanuel Ackaouy + * + * Description: Credit-based SMP CPU scheduler + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/domain.h> +#include <xen/delay.h> +#include <xen/event.h> +#include <xen/time.h> +#include <xen/perfc.h> +#include <xen/sched-if.h> +#include <xen/softirq.h> +#include <asm/atomic.h> + + +/* + * CSCHED_STATS + * + * Manage very basic counters and stats. + * + * Useful for debugging live systems. The stats are displayed + * with runq dumps ('r' on the Xen console). + */ +#define CSCHED_STATS + + +/* + * Basic constants + */ +#define CSCHED_TICK 10 /* milliseconds */ +#define CSCHED_TSLICE 30 /* milliseconds */ +#define CSCHED_ACCT_NTICKS 3 +#define CSCHED_ACCT_PERIOD (CSCHED_ACCT_NTICKS * CSCHED_TICK) +#define CSCHED_DEFAULT_WEIGHT 256 + + +/* + * Priorities + */ +#define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */ +#define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */ +#define CSCHED_PRI_IDLE -64 /* idle */ +#define CSCHED_PRI_TS_PARKED -65 /* time-share w/ capped credits */ + + +/* + * Useful macros + */ +#define CSCHED_PCPU(_c) ((struct csched_pcpu *)schedule_data[_c].sched_priv) +#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) +#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) +#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) + + +/* + * Stats + */ +#ifdef CSCHED_STATS + +#define CSCHED_STAT(_X) (csched_priv.stats._X) +#define CSCHED_STAT_DEFINE(_X) uint32_t _X; +#define CSCHED_STAT_PRINTK(_X) \ + do \ + { \ + printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X)); \ + } while ( 0 ); + +#define CSCHED_STATS_EXPAND_SCHED(_MACRO) \ + _MACRO(vcpu_alloc) \ + _MACRO(vcpu_add) \ + _MACRO(vcpu_sleep) \ + _MACRO(vcpu_wake_running) \ + _MACRO(vcpu_wake_onrunq) \ + _MACRO(vcpu_wake_runnable) \ + _MACRO(vcpu_wake_not_runnable) \ + _MACRO(dom_free) \ + _MACRO(schedule) \ + _MACRO(tickle_local_idler) \ + _MACRO(tickle_local_over) \ + _MACRO(tickle_local_under) \ + _MACRO(tickle_local_other) \ + _MACRO(acct_run) \ + _MACRO(acct_no_work) \ + _MACRO(acct_balance) \ + _MACRO(acct_reorder) \ + _MACRO(acct_min_credit) \ + _MACRO(acct_vcpu_active) \ + _MACRO(acct_vcpu_idle) \ + _MACRO(acct_vcpu_credit_min) + +#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \ + _MACRO(vcpu_migrate) \ + _MACRO(load_balance_idle) \ + _MACRO(load_balance_over) \ + _MACRO(load_balance_other) \ + _MACRO(steal_trylock_failed) \ + _MACRO(steal_peer_down) \ + _MACRO(steal_peer_idle) \ + _MACRO(steal_peer_running) \ + _MACRO(steal_peer_pinned) \ + _MACRO(tickle_idlers_none) \ + _MACRO(tickle_idlers_some) + +#ifndef NDEBUG +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \ + _MACRO(vcpu_check) +#else +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) +#endif + +#define CSCHED_STATS_EXPAND(_MACRO) \ + CSCHED_STATS_EXPAND_SCHED(_MACRO) \ + CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \ + CSCHED_STATS_EXPAND_CHECKS(_MACRO) + +#define CSCHED_STATS_RESET() \ + do \ + { \ + memset(&csched_priv.stats, 0, sizeof(csched_priv.stats)); \ + } while ( 0 ) + +#define CSCHED_STATS_DEFINE() \ + struct \ + { \ + CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \ + } stats + +#define CSCHED_STATS_PRINTK() \ + do \ + { \ + printk("stats:\n"); \ + CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \ + } while ( 0 ) + +#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++) + +#else /* CSCHED_STATS */ + +#define CSCHED_STATS_RESET() do {} while ( 0 ) +#define CSCHED_STATS_DEFINE() do {} while ( 0 ) +#define CSCHED_STATS_PRINTK() do {} while ( 0 ) +#define CSCHED_STAT_CRANK(_X) do {} while ( 0 ) + +#endif /* CSCHED_STATS */ + + +/* + * Physical CPU + */ +struct csched_pcpu { + struct list_head runq; + uint32_t runq_sort_last; +}; + +/* + * Virtual CPU + */ +struct csched_vcpu { + struct list_head runq_elem; + struct list_head active_vcpu_elem; + struct csched_dom *sdom; + struct vcpu *vcpu; + atomic_t credit; + int credit_last; + uint32_t credit_incr; + uint32_t state_active; + uint32_t state_idle; + int16_t pri; +}; + +/* + * Domain + */ +struct csched_dom { + struct list_head active_vcpu; + struct list_head active_sdom_elem; + struct domain *dom; + uint16_t active_vcpu_count; + uint16_t weight; + uint16_t cap; +}; + +/* + * System-wide private data + */ +struct csched_private { + spinlock_t lock; + struct list_head active_sdom; + uint32_t ncpus; + unsigned int master; + cpumask_t idlers; + uint32_t weight; + uint32_t credit; + int credit_balance; + uint32_t runq_sort; + CSCHED_STATS_DEFINE(); +}; + + +/* + * Global variables + */ +static struct csched_private csched_priv; + + + +static inline int +__vcpu_on_runq(struct csched_vcpu *svc) +{ + return !list_empty(&svc->runq_elem); +} + +static inline struct csched_vcpu * +__runq_elem(struct list_head *elem) +{ + return list_entry(elem, struct csched_vcpu, runq_elem); +} + +static inline void +__runq_insert(unsigned int cpu, struct csched_vcpu *svc) +{ + const struct list_head * const runq = RUNQ(cpu); + struct list_head *iter; + + BUG_ON( __vcpu_on_runq(svc) ); + BUG_ON( cpu != svc->vcpu->processor ); + + list_for_each( iter, runq ) + { + const struct csched_vcpu * const iter_svc = __runq_elem(iter); + if ( svc->pri > iter_svc->pri ) + break; + } + + list_add_tail(&svc->runq_elem, iter); +} + +static inline void +__runq_remove(struct csched_vcpu *svc) +{ + BUG_ON( !__vcpu_on_runq(svc) ); + list_del_init(&svc->runq_elem); +} + +static inline void +__runq_tickle(unsigned int cpu, struct csched_vcpu *new) +{ + struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr); + cpumask_t mask; + + ASSERT(cur); + cpus_clear(mask); + + /* If strictly higher priority than current VCPU, signal the CPU */ + if ( new->pri > cur->pri ) + { + if ( cur->pri == CSCHED_PRI_IDLE ) + CSCHED_STAT_CRANK(tickle_local_idler); + else if ( cur->pri == CSCHED_PRI_TS_OVER ) + CSCHED_STAT_CRANK(tickle_local_over); + else if ( cur->pri == CSCHED_PRI_TS_UNDER ) + CSCHED_STAT_CRANK(tickle_local_under); + else + CSCHED_STAT_CRANK(tickle_local_other); + + cpu_set(cpu, mask); + } + + /* + * If this CPU has at least two runnable VCPUs, we tickle any idlers to + * let them know there is runnable work in the system... + */ + if ( cur->pri > CSCHED_PRI_IDLE ) + { + if ( cpus_empty(csched_priv.idlers) ) + { + CSCHED_STAT_CRANK(tickle_idlers_none); + } + else + { + CSCHED_STAT_CRANK(tickle_idlers_some); + cpus_or(mask, mask, csched_priv.idlers); + } + } + + /* Send scheduler interrupts to designated CPUs */ + if ( !cpus_empty(mask) ) + cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ); +} + +static void +csched_pcpu_init(int cpu) +{ + struct csched_pcpu *spc; + unsigned long flags; + + spin_lock_irqsave(&csched_priv.lock, flags); + + /* Initialize/update system-wide config */ + csched_priv.credit += CSCHED_ACCT_PERIOD; + if ( csched_priv.ncpus <= cpu ) + csched_priv.ncpus = cpu + 1; + if ( csched_priv.master >= csched_priv.ncpus ) + csched_priv.master = cpu; + + /* Allocate per-PCPU info */ + spc = xmalloc(struct csched_pcpu); + BUG_ON( spc == NULL ); + INIT_LIST_HEAD(&spc->runq); + spc->runq_sort_last = csched_priv.runq_sort; + schedule_data[cpu].sched_priv = spc; + + /* Start off idling... */ + BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) ); + cpu_set(cpu, csched_priv.idlers); + + spin_unlock_irqrestore(&csched_priv.lock, flags); +} + +#ifndef NDEBUG +static inline void +__csched_vcpu_check(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + struct csched_dom * const sdom = svc->sdom; + + BUG_ON( svc->vcpu != vc ); + BUG_ON( sdom != CSCHED_DOM(vc->domain) ); + if ( sdom ) + { + BUG_ON( is_idle_vcpu(vc) ); + BUG_ON( sdom->dom != vc->domain ); + } + else + { + BUG_ON( !is_idle_vcpu(vc) ); + } + + CSCHED_STAT_CRANK(vcpu_check); +} +#define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc)) +#else +#define CSCHED_VCPU_CHECK(_vc) +#endif + +static inline int +__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc) +{ + /* + * Don't pick up work that's in the peer's scheduling tail. Also only pick + * up work that's allowed to run on our CPU. + */ + if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) ) + { + CSCHED_STAT_CRANK(steal_peer_running); + return 0; + } + + if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) + { + CSCHED_STAT_CRANK(steal_peer_pinned); + return 0; + } + + return 1; +} + +static void +csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec) +{ + struct csched_dom * const sdom = svc->sdom; + unsigned long flags; + + /* Update credits */ + atomic_sub(credit_dec, &svc->credit); + + /* Put this VCPU and domain back on the active list if it was idling */ + if ( list_empty(&svc->active_vcpu_elem) ) + { + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( list_empty(&svc->active_vcpu_elem) ) + { + CSCHED_STAT_CRANK(acct_vcpu_active); + svc->state_active++; + + sdom->active_vcpu_count++; + list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); + if ( list_empty(&sdom->active_sdom_elem) ) + { + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + csched_priv.weight += sdom->weight; + } + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } +} + +static inline void +__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + + BUG_ON( list_empty(&svc->active_vcpu_elem) ); + + CSCHED_STAT_CRANK(acct_vcpu_idle); + svc->state_idle++; + + sdom->active_vcpu_count--; + list_del_init(&svc->active_vcpu_elem); + if ( list_empty(&sdom->active_vcpu) ) + { + BUG_ON( csched_priv.weight < sdom->weight ); + list_del_init(&sdom->active_sdom_elem); + csched_priv.weight -= sdom->weight; + } + + atomic_set(&svc->credit, 0); +} + +static int +csched_vcpu_alloc(struct vcpu *vc) +{ + struct domain * const dom = vc->domain; + struct csched_dom *sdom; + struct csched_vcpu *svc; + int16_t pri; + + CSCHED_STAT_CRANK(vcpu_alloc); + + /* Allocate, if appropriate, per-domain info */ + if ( is_idle_vcpu(vc) ) + { + sdom = NULL; + pri = CSCHED_PRI_IDLE; + } + else if ( CSCHED_DOM(dom) ) + { + sdom = CSCHED_DOM(dom); + pri = CSCHED_PRI_TS_UNDER; + } + else + { + sdom = xmalloc(struct csched_dom); + if ( !sdom ) + return -1; + + /* Initialize credit and weight */ + INIT_LIST_HEAD(&sdom->active_vcpu); + sdom->active_vcpu_count = 0; + INIT_LIST_HEAD(&sdom->active_sdom_elem); + sdom->dom = dom; + sdom->weight = CSCHED_DEFAULT_WEIGHT; + sdom->cap = 0U; + dom->sched_priv = sdom; + pri = CSCHED_PRI_TS_UNDER; + } + + /* Allocate per-VCPU info */ + svc = xmalloc(struct csched_vcpu); + if ( !svc ) + return -1; + + INIT_LIST_HEAD(&svc->runq_elem); + INIT_LIST_HEAD(&svc->active_vcpu_elem); + svc->sdom = sdom; + svc->vcpu = vc; + atomic_set(&svc->credit, 0); + svc->credit_last = 0; + svc->credit_incr = 0U; + svc->state_active = 0U; + svc->state_idle = 0U; + svc->pri = pri; + vc->sched_priv = svc; + + CSCHED_VCPU_CHECK(vc); + + /* Attach fair-share VCPUs to the accounting list */ + if ( likely(sdom != NULL) ) + csched_vcpu_acct(svc, 0); + + return 0; +} + +static void +csched_vcpu_add(struct vcpu *vc) +{ + CSCHED_STAT_CRANK(vcpu_add); + + /* Allocate per-PCPU info */ + if ( unlikely(!CSCHED_PCPU(vc->processor)) ) + csched_pcpu_init(vc->processor); + + CSCHED_VCPU_CHECK(vc); +} + +static void +csched_vcpu_free(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + struct csched_dom * const sdom = svc->sdom; + unsigned long flags; + + BUG_ON( sdom == NULL ); + BUG_ON( !list_empty(&svc->runq_elem) ); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( !list_empty(&svc->active_vcpu_elem) ) + __csched_vcpu_acct_idle_locked(svc); + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + xfree(svc); +} + +static void +csched_vcpu_sleep(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + + CSCHED_STAT_CRANK(vcpu_sleep); + + BUG_ON( is_idle_vcpu(vc) ); + + if ( schedule_data[vc->processor].curr == vc ) + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + else if ( __vcpu_on_runq(svc) ) + __runq_remove(svc); +} + +static void +csched_vcpu_wake(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON( is_idle_vcpu(vc) ); + + if ( unlikely(schedule_data[cpu].curr == vc) ) + { + CSCHED_STAT_CRANK(vcpu_wake_running); + return; + } + if ( unlikely(__vcpu_on_runq(svc)) ) + { + CSCHED_STAT_CRANK(vcpu_wake_onrunq); + return; + } + + if ( likely(vcpu_runnable(vc)) ) + CSCHED_STAT_CRANK(vcpu_wake_runnable); + else + CSCHED_STAT_CRANK(vcpu_wake_not_runnable); + + /* Put the VCPU on the runq and tickle CPUs */ + __runq_insert(cpu, svc); + __runq_tickle(cpu, svc); +} + +static int +csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity) +{ + unsigned long flags; + int lcpu; + + if ( vc == current ) + { + /* No locking needed but also can't move on the spot... */ + if ( !cpu_isset(vc->processor, *affinity) ) + return -EBUSY; + + vc->cpu_affinity = *affinity; + } + else + { + /* Pause, modify, and unpause. */ + vcpu_pause(vc); + + vc->cpu_affinity = *affinity; + if ( !cpu_isset(vc->processor, vc->cpu_affinity) ) + { + /* + * We must grab the scheduler lock for the CPU currently owning + * this VCPU before changing its ownership. + */ + vcpu_schedule_lock_irqsave(vc, flags); + lcpu = vc->processor; + + vc->processor = first_cpu(vc->cpu_affinity); + + spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags); + } + + vcpu_unpause(vc); + } + + return 0; +} + +static int +csched_dom_cntl( + struct domain *d, + struct sched_adjdom_cmd *cmd) +{ + struct csched_dom * const sdom = CSCHED_DOM(d); + unsigned long flags; + + if ( cmd->direction == SCHED_INFO_GET ) + { + cmd->u.credit.weight = sdom->weight; + cmd->u.credit.cap = sdom->cap; + } + else + { + ASSERT( cmd->direction == SCHED_INFO_PUT ); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( cmd->u.credit.weight != 0 ) + { + csched_priv.weight -= sdom->weight; + sdom->weight = cmd->u.credit.weight; + csched_priv.weight += sdom->weight; + } + + if ( cmd->u.credit.cap != (uint16_t)~0U ) + sdom->cap = cmd->u.credit.cap; + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } + + return 0; +} + +static void +csched_dom_free(struct domain *dom) +{ + struct csched_dom * const sdom = CSCHED_DOM(dom); + int i; + + CSCHED_STAT_CRANK(dom_free); + + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + { + if ( dom->vcpu[i] ) + csched_vcpu_free(dom->vcpu[i]); + } + + xfree(sdom); +} + +/* + * This is a O(n) optimized sort of the runq. + * + * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk + * through the runq and move up any UNDERs that are preceded by OVERS. We + * remember the last UNDER to make the move up operation O(1). + */ +static void +csched_runq_sort(unsigned int cpu) +{ + struct csched_pcpu * const spc = CSCHED_PCPU(cpu); + struct list_head *runq, *elem, *next, *last_under; + struct csched_vcpu *svc_elem; + unsigned long flags; + int sort_epoch; + + sort_epoch = csched_priv.runq_sort; + if ( sort_epoch == spc->runq_sort_last ) + return; + + spc->runq_sort_last = sort_epoch; + + spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + + runq = &spc->runq; + elem = runq->next; + last_under = runq; + + while ( elem != runq ) + { + next = elem->next; + svc_elem = __runq_elem(elem); + + if ( svc_elem->pri == CSCHED_PRI_TS_UNDER ) + { + /* does elem need to move up the runq? */ + if ( elem->prev != last_under ) + { + list_del(elem); + list_add(elem, last_under); + } + last_under = elem; + } + + elem = next; + } + + spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); +} + +static void +csched_acct(void) +{ + unsigned long flags; + struct list_head *iter_vcpu, *next_vcpu; + struct list_head *iter_sdom, *next_sdom; + struct csched_vcpu *svc; + struct csched_dom *sdom; + uint32_t credit_total; + uint32_t weight_total; + uint32_t weight_left; + uint32_t credit_fair; + uint32_t credit_peak; + int credit_balance; + int credit_xtra; + int credit; + + + spin_lock_irqsave(&csched_priv.lock, flags); + + weight_total = csched_priv.weight; + credit_total = csched_priv.credit; + + /* Converge balance towards 0 when it drops negative */ + if ( csched_priv.credit_balance < 0 ) + { + credit_total -= csched_priv.credit_balance; + CSCHED_STAT_CRANK(acct_balance); + } + + if ( unlikely(weight_total == 0) ) + { + csched_priv.credit_balance = 0; + spin_unlock_irqrestore(&csched_priv.lock, flags); + CSCHED_STAT_CRANK(acct_no_work); + return; + } + + CSCHED_STAT_CRANK(acct_run); + + weight_left = weight_total; + credit_balance = 0; + credit_xtra = 0; + + list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom ) + { + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + + BUG_ON( is_idle_domain(sdom->dom) ); + BUG_ON( sdom->active_vcpu_count == 0 ); + BUG_ON( sdom->weight == 0 ); + BUG_ON( sdom->weight > weight_left ); + + weight_left -= sdom->weight; + + /* + * A domain's fair share is computed using its weight in competition + * with that of all other active domains. + * + * At most, a domain can use credits to run all its active VCPUs + * for one full accounting period. We allow a domain to earn more + * only when the system-wide credit balance is negative. + */ + credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD; + if ( csched_priv.credit_balance < 0 ) + { + credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) + + (weight_total - 1) + ) / weight_total; + } + if ( sdom->cap != 0U ) + { + uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 100; + if ( credit_cap < credit_peak ) + credit_peak = credit_cap; + } + + credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1) + ) / weight_total; + + if ( credit_fair < credit_peak ) + { + credit_xtra = 1; + } + else + { + if ( weight_left != 0U ) + { + /* Give other domains a chance at unused credits */ + credit_total += ( ( ( credit_fair - credit_peak + ) * weight_total + ) + ( weight_left - 1 ) + ) / weight_left; + } + + if ( credit_xtra ) + { + /* + * Lazily keep domains with extra credits at the head of + * the queue to give others a chance at them in future + * accounting periods. + */ + CSCHED_STAT_CRANK(acct_reorder); + list_del(&sdom->active_sdom_elem); + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + } + + credit_fair = credit_peak; + } + + /* Compute fair share per VCPU */ + credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + + + list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu ) + { + svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem); + BUG_ON( sdom != svc->sdom ); + + /* Increment credit */ + atomic_add(credit_fair, &svc->credit); + credit = atomic_read(&svc->credit); + + /* + * Recompute priority or, if VCPU is idling, remove it from + * the active list. + */ + if ( credit < 0 ) + { + if ( sdom->cap == 0U ) + svc->pri = CSCHED_PRI_TS_OVER; + else + svc->pri = CSCHED_PRI_TS_PARKED; + + if ( credit < -CSCHED_TSLICE ) + { + CSCHED_STAT_CRANK(acct_min_credit); + credit = -CSCHED_TSLICE; + atomic_set(&svc->credit, credit); + } + } + else + { + svc->pri = CSCHED_PRI_TS_UNDER; + + if ( credit > CSCHED_TSLICE ) + __csched_vcpu_acct_idle_locked(svc); + } + + svc->credit_last = credit; + svc->credit_incr = credit_fair; + credit_balance += credit; + } + } + + csched_priv.credit_balance = credit_balance; + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + /* Inform each CPU that its runq needs to be sorted */ + csched_priv.runq_sort++; +} + +static void +csched_tick(unsigned int cpu) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(current); + struct csched_dom * const sdom = svc->sdom; + + /* + * Accounting for running VCPU + * + * Note: Some VCPUs, such as the idle tasks, are not credit scheduled. + */ + if ( likely(sdom != NULL) ) + { + csched_vcpu_acct(svc, CSCHED_TICK); + } + + /* + * Accounting duty + * + * Note: Currently, this is always done by the master boot CPU. Eventually, + * we could distribute or at the very least cycle the duty. + */ + if ( (csched_priv.master == cpu) && + (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 ) + { + csched_acct(); + } + + /* + * Check if runq needs to be sorted + * + * Every physical CPU resorts the runq after the accounting master has + * modified priorities. This is a special O(n) sort and runs at most + * once per accounting period (currently 30 milliseconds). + */ + csched_runq_sort(cpu); +} + +static struct csched_vcpu * +csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri) +{ + struct list_head *iter; + struct csched_vcpu *speer; + struct vcpu *vc; + + list_for_each( iter, &spc->runq ) + { + speer = __runq_elem(iter); + + /* + * If next available VCPU here is not of higher priority than ours, + * this PCPU is useless to us. + */ + if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri ) + { + CSCHED_STAT_CRANK(steal_peer_idle); + break; + } + + /* Is this VCPU is runnable on our PCPU? */ + vc = speer->vcpu; + BUG_ON( is_idle_vcpu(vc) ); + + if ( __csched_vcpu_is_stealable(cpu, vc) ) + { + /* We got a candidate. Grab it! */ + __runq_remove(speer); + vc->processor = cpu; + + return speer; + } + } + + return NULL; +} + +static struct csched_vcpu * +csched_load_balance(int cpu, struct csched_vcpu *snext) +{ + struct csched_pcpu *spc; + struct csched_vcpu *speer; + int peer_cpu; + + if ( snext->pri == CSCHED_PRI_IDLE ) + CSCHED_STAT_CRANK(load_balance_idle); + else if ( snext->pri == CSCHED_PRI_TS_OVER ) + CSCHED_STAT_CRANK(load_balance_over); + else + CSCHED_STAT_CRANK(load_balance_other); + + peer_cpu = cpu; + BUG_ON( peer_cpu != snext->vcpu->processor ); + + while ( 1 ) + { + /* For each PCPU in the system starting with our neighbour... */ + peer_cpu = (peer_cpu + 1) % csched_priv.ncpus; + if ( peer_cpu == cpu ) + break; + + BUG_ON( peer_cpu >= csched_priv.ncpus ); + BUG_ON( peer_cpu == cpu ); + + /* + * Get ahold of the scheduler lock for this peer CPU. + * + * Note: We don't spin on this lock but simply try it. Spinning could + * cause a deadlock if the peer CPU is also load balancing and trying + * to lock this CPU. + */ + if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) ) + { + + spc = CSCHED_PCPU(peer_cpu); + if ( unlikely(spc == NULL) ) + { + CSCHED_STAT_CRANK(steal_peer_down); + speer = NULL; + } + else + { + speer = csched_runq_steal(spc, cpu, snext->pri); + } + + spin_unlock(&schedule_data[peer_cpu].schedule_lock); + + /* Got one! */ + if ( speer ) + { + CSCHED_STAT_CRANK(vcpu_migrate); + return speer; + } + } + else + { + CSCHED_STAT_CRANK(steal_trylock_failed); + } + } + + + /* Failed to find more important work */ + __runq_remove(snext); + return snext; +} + +/* + * This function is in the critical path. It is designed to be simple and + * fast for the common case. + */ +static struct task_slice +csched_schedule(s_time_t now) +{ + const int cpu = smp_processor_id(); + struct list_head * const runq = RUNQ(cpu); + struct csched_vcpu * const scurr = CSCHED_VCPU(current); + struct csched_vcpu *snext; + struct task_slice ret; + + CSCHED_STAT_CRANK(schedule); + CSCHED_VCPU_CHECK(current); + + /* + * Select next runnable local VCPU (ie top of local runq) + */ + if ( vcpu_runnable(current) ) + __runq_insert(cpu, scurr); + else + BUG_ON( is_idle_vcpu(current) || list_empty(runq) ); + + snext = __runq_elem(runq->next); + + /* + * SMP Load balance: + * + * If the next highest priority local runnable VCPU has already eaten + * through its credits, look on other PCPUs to see if we have more + * urgent work... If not, csched_load_balance() will return snext, but + * already removed from the runq. + */ + if ( snext->pri > CSCHED_PRI_TS_OVER ) + __runq_remove(snext); + else + snext = csched_load_balance(cpu, snext); + + /* + * Update idlers mask if necessary. When we're idling, other CPUs + * will tickle us when they get extra work. + */ + if ( snext->pri == CSCHED_PRI_IDLE ) + { + if ( !cpu_isset(cpu, csched_priv.idlers) ) + cpu_set(cpu, csched_priv.idlers); + } + else if ( cpu_isset(cpu, csched_priv.idlers) ) + { + cpu_clear(cpu, csched_priv.idlers); + } + + /* + * Return task to run next... + */ + ret.time = MILLISECS(CSCHED_TSLICE); + ret.task = snext->vcpu; + + CSCHED_VCPU_CHECK(ret.task); + BUG_ON( !vcpu_runnable(ret.task) ); + + return ret; +} + +static void +csched_dump_vcpu(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + + printk("[%i.%i] pri=%i cpu=%i", + svc->vcpu->domain->domain_id, + svc->vcpu->vcpu_id, + svc->pri, + svc->vcpu->processor); + + if ( sdom ) + { + printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}", + atomic_read(&svc->credit), + svc->credit_last, + svc->credit_incr, + svc->state_active, + svc->state_idle, + sdom->weight); + } + + printk("\n"); +} + +static void +csched_dump_pcpu(int cpu) +{ + struct list_head *runq, *iter; + struct csched_pcpu *spc; + struct csched_vcpu *svc; + int loop; + + spc = CSCHED_PCPU(cpu); + runq = &spc->runq; + + printk(" tick=%lu, sort=%d\n", + schedule_data[cpu].tick, + spc->runq_sort_last); + + /* current VCPU */ + svc = CSCHED_VCPU(schedule_data[cpu].curr); + if ( svc ) + { + printk("\trun: "); + csched_dump_vcpu(svc); + } + + loop = 0; + list_for_each( iter, runq ) + { + svc = __runq_elem(iter); + if ( svc ) + { + printk("\t%3d: ", ++loop); + csched_dump_vcpu(svc); + } + } +} + +static void +csched_dump(void) +{ + struct list_head *iter_sdom, *iter_svc; + int loop; + + printk("info:\n" + "\tncpus = %u\n" + "\tmaster = %u\n" + "\tcredit = %u\n" + "\tcredit balance = %d\n" + "\tweight = %u\n" + "\trunq_sort = %u\n" + "\ttick = %dms\n" + "\ttslice = %dms\n" + "\taccounting period = %dms\n" + "\tdefault-weight = %d\n", + csched_priv.ncpus, + csched_priv.master, + csched_priv.credit, + csched_priv.credit_balance, + csched_priv.weight, + csched_priv.runq_sort, + CSCHED_TICK, + CSCHED_TSLICE, + CSCHED_ACCT_PERIOD, + CSCHED_DEFAULT_WEIGHT); + + printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]); + + CSCHED_STATS_PRINTK(); + + printk("active vcpus:\n"); + loop = 0; + list_for_each( iter_sdom, &csched_priv.active_sdom ) + { + struct csched_dom *sdom; + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + + list_for_each( iter_svc, &sdom->active_vcpu ) + { + struct csched_vcpu *svc; + svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem); + + printk("\t%3d: ", ++loop); + csched_dump_vcpu(svc); + } + } +} + +static void +csched_init(void) +{ + spin_lock_init(&csched_priv.lock); + INIT_LIST_HEAD(&csched_priv.active_sdom); + csched_priv.ncpus = 0; + csched_priv.master = UINT_MAX; + cpus_clear(csched_priv.idlers); + csched_priv.weight = 0U; + csched_priv.credit = 0U; + csched_priv.credit_balance = 0; + csched_priv.runq_sort = 0U; + CSCHED_STATS_RESET(); +} + + +struct scheduler sched_credit_def = { + .name = "SMP Credit Scheduler", + .opt_name = "credit", + .sched_id = SCHED_CREDIT, + + .alloc_task = csched_vcpu_alloc, + .add_task = csched_vcpu_add, + .sleep = csched_vcpu_sleep, + .wake = csched_vcpu_wake, + .set_affinity = csched_vcpu_set_affinity, + + .adjdom = csched_dom_cntl, + .free_task = csched_dom_free, + + .tick = csched_tick, + .do_schedule = csched_schedule, + + .dump_cpu_state = csched_dump_pcpu, + .dump_settings = csched_dump, + .init = csched_init, +}; diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/tlbflush.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/tlbflush.h Tue May 30 14:30:34 2006 -0500 @@ -0,0 +1,37 @@ +#ifndef __FLUSHTLB_H__ +#define __FLUSHTLB_H__ + +#include <xen/sched.h> + +/* TLB flushes can be either local (current vcpu only) or domain wide (on + all vcpus). + TLB flushes can be either all-flush or range only. + + vTLB flushing means flushing VCPU virtual TLB + machine TLB + machine VHPT. +*/ + +/* Local all flush of vTLB. */ +void vcpu_flush_vtlb_all (void); + +/* Local range flush of machine TLB only (not full VCPU virtual TLB!!!) */ +void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range); + +/* Global all flush of vTLB */ +void domain_flush_vtlb_all (void); + +/* Global range-flush of vTLB. */ +void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); + +/* Final vTLB flush on every dirty cpus. */ +void domain_flush_destroy (struct domain *d); + +/* Flush v-tlb on cpus set in mask for current domain. */ +void flush_tlb_mask(cpumask_t mask); + +/* Flush local machine TLB. */ +void local_flush_tlb_all (void); + +#define tlbflush_current_time() 0 +#define tlbflush_filter(x,y) ((void)0) + +#endif diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -/* - * Architecture-specific kernel symbols - * - * Don't put any exports here unless it's defined in an assembler file. - * All other exports should be put directly after the definition. - */ - -#include <linux/config.h> -#include <linux/module.h> - -extern int is_running_on_xen(void); -EXPORT_SYMBOL(is_running_on_xen); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/net_driver_util.c --- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -/***************************************************************************** - * - * Utility functions for Xen network devices. - * - * Copyright (c) 2005 XenSource Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <linux/if_ether.h> -#include <linux/err.h> -#include <linux/module.h> -#include <xen/net_driver_util.h> - - -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) -{ - char *s; - int i; - char *e; - char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); - if (IS_ERR(macstr)) - return PTR_ERR(macstr); - s = macstr; - for (i = 0; i < ETH_ALEN; i++) { - mac[i] = simple_strtoul(s, &e, 16); - if (s == e || (e[0] != ':' && e[0] != 0)) { - kfree(macstr); - return -ENOENT; - } - s = &e[1]; - } - kfree(macstr); - return 0; -} -EXPORT_SYMBOL_GPL(xen_net_read_mac); diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/net_driver_util.h --- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -/***************************************************************************** - * - * Utility functions for Xen network devices. - * - * Copyright (c) 2005 XenSource Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _ASM_XEN_NET_DRIVER_UTIL_H -#define _ASM_XEN_NET_DRIVER_UTIL_H - - -#include <xen/xenbus.h> - - -/** - * Read the 'mac' node at the given device's node in the store, and parse that - * as colon-separated octets, placing result the given mac array. mac must be - * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). - * Return 0 on success, or -errno on error. - */ -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]); - - -#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */ diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_proc.h --- a/tools/xenstore/xenstored_proc.h Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -/* - Copyright (C) 2005 XenSource Ltd - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ - -#ifndef _XENSTORED_PROC_H -#define _XENSTORED_PROC_H - -#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva" -#define XENSTORED_PROC_PORT "/proc/xen/xsd_port" - - -#endif /* _XENSTORED_PROC_H */ diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/flushtlb.h --- a/xen/include/asm-ia64/flushtlb.h Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -#ifndef __FLUSHTLB_H__ -#define __FLUSHTLB_H__ - -#include <asm/tlbflush.h> - -#define tlbflush_current_time() 0 -#define tlbflush_filter(x,y) ((void)0) - -#endif diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/linux-xen/asm/tlbflush.h --- a/xen/include/asm-ia64/linux-xen/asm/tlbflush.h Tue May 30 12:52:02 2006 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,119 +0,0 @@ -#ifndef _ASM_IA64_TLBFLUSH_H -#define _ASM_IA64_TLBFLUSH_H - -/* - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - */ - -#include <linux/config.h> - -#include <linux/mm.h> - -#include <asm/intrinsics.h> -#include <asm/mmu_context.h> -#include <asm/page.h> - -/* - * Now for some TLB flushing routines. This is the kind of stuff that - * can be very expensive, so try to avoid them whenever possible. - */ - -/* - * Flush everything (kernel mapping may also have changed due to - * vmalloc/vfree). - */ -extern void local_flush_tlb_all (void); - -#ifdef CONFIG_SMP - extern void smp_flush_tlb_all (void); - extern void smp_flush_tlb_mm (struct mm_struct *mm); -# define flush_tlb_all() smp_flush_tlb_all() -#else -# define flush_tlb_all() local_flush_tlb_all() -#endif - -#ifndef XEN -static inline void -local_finish_flush_tlb_mm (struct mm_struct *mm) -{ -#ifndef XEN -// FIXME SMP? - if (mm == current->active_mm) - activate_context(mm); -#endif -} - -/* - * Flush a specified user mapping. This is called, e.g., as a result of fork() and - * exit(). fork() ends up here because the copy-on-write mechanism needs to write-protect - * the PTEs of the parent task. - */ -static inline void -flush_tlb_mm (struct mm_struct *mm) -{ - if (!mm) - return; - -#ifndef XEN -// FIXME SMP? - mm->context = 0; -#endif - - if (atomic_read(&mm->mm_users) == 0) - return; /* happens as a result of exit_mmap() */ - -#ifdef CONFIG_SMP - smp_flush_tlb_mm(mm); -#else - local_finish_flush_tlb_mm(mm); -#endif -} - -extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end); - -/* - * Page-granular tlb flush. - */ -static inline void -flush_tlb_page (struct vm_area_struct *vma, unsigned long addr) -{ -#ifdef CONFIG_SMP - flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE); -#else -#ifdef XEN - if (vma->vm_mm == current->domain->arch.mm) -#else - if (vma->vm_mm == current->active_mm) -#endif - ia64_ptcl(addr, (PAGE_SHIFT << 2)); -#ifndef XEN -// FIXME SMP? - else - vma->vm_mm->context = 0; -#endif -#endif -} - -/* - * Flush the TLB entries mapping the virtually mapped linear page - * table corresponding to address range [START-END). - */ -static inline void -flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end) -{ - /* - * Deprecated. The virtual page table is now flushed via the normal gather/flush - * interface (see tlb.h). - */ -} - - -#define flush_tlb_kernel_range(start, end) flush_tlb_all() /* XXX fix me */ -#endif /* XEN */ - -#ifdef XEN -extern void flush_tlb_mask(cpumask_t mask); -#endif - -#endif /* _ASM_IA64_TLBFLUSH_H */ _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |